[med-svn] [iqtree] 01/05: New upstream version 1.5.2+dfsg
Kevin Murray
daube-guest at moszumanska.debian.org
Sun Dec 4 01:29:03 UTC 2016
This is an automated email from the git hooks/post-receive script.
daube-guest pushed a commit to branch master
in repository iqtree.
commit 80d358298be5121dd976ba4e053538dd27f18f3c
Author: Kevin Murray <kdmfoss at gmail.com>
Date: Sun Dec 4 11:25:31 2016 +1100
New upstream version 1.5.2+dfsg
---
CMakeLists.txt | 416 ++++--
MPIHelper.cpp | 560 +++++++
MPIHelper.h | 305 ++++
ObjectStream.cpp | 113 ++
ObjectStream.h | 82 ++
README.md | 71 +-
TreeCollection.cpp | 56 +
TreeCollection.h | 63 +
alignment.cpp | 21 +-
alignment.h | 7 +-
candidateset.cpp | 584 +++++---
candidateset.h | 260 ++--
checkpoint.cpp | 120 +-
checkpoint.h | 20 +-
constrainttree.cpp | 211 +++
constrainttree.h | 81 ++
example/example.nex | 6 +-
example/example.phy | 64 +-
gsl/gauss.cpp | 12 +
hashsplitset.h | 19 +-
iqtree.cpp | 2064 +++++++++++++++++---------
iqtree.h | 378 +++--
iqtree_config.h.in | 3 +
lpwrapper.c | 2 +-
memslot.cpp | 254 ++++
memslot.h | 111 ++
mexttree.cpp | 73 +-
mexttree.h | 10 +
model/modelcodon.cpp | 8 +
model/modelfactory.cpp | 34 +-
model/modelmixture.cpp | 26 +-
model/modelmixture.h | 11 +
model/modelpomo.cpp | 3 +-
model/modelset.cpp | 136 +-
model/modelset.h | 7 +-
model/modelsubst.h | 6 +
model/partitionmodel.cpp | 6 +-
model/ratefree.cpp | 23 +-
model/rategamma.cpp | 8 +-
model/rategamma.h | 8 +-
model/rategammainvar.cpp | 3 +-
model/rateheterogeneity.h | 12 +-
model/rateinvar.cpp | 11 +-
model/rateinvar.h | 2 +-
mtree.cpp | 391 +++--
mtree.h | 123 +-
mtreeset.cpp | 1 +
ngs.cpp | 2 +-
node.cpp | 6 +-
node.h | 20 +-
optimization.cpp | 29 +-
pda.cpp | 196 ++-
pdtree.cpp | 5 +-
phyloanalysis.cpp | 369 +++--
phyloanalysis.h | 6 +-
phylokernel.h | 904 +++++++-----
phylokernelavx512.cpp | 120 ++
phylokernelfma.cpp | 164 +++
phylokernelmixrate.h | 2 +-
phylokernelmixture.h | 2 +-
phylokernelnew.h | 2802 ++++++++++++++++++++++++++++++++++++
phylokernel.h => phylokernelsafe.h | 1155 +++++++++------
phylokernelsitemodel.cpp | 6 +-
phylokernelsse.cpp | 169 +++
phylonode.cpp | 49 +-
phylonode.h | 22 +-
phylosupertree.cpp | 76 +-
phylosupertree.h | 7 +-
phylosupertreeplen.cpp | 164 ++-
phylosupertreeplen.h | 9 +-
phylotesting.cpp | 316 +++-
phylotesting.h | 21 +-
phylotree.cpp | 874 ++++++-----
phylotree.h | 400 +++--
phylotreeavx.cpp | 203 +--
phylotreepars.cpp | 354 +++--
phylotreesse.cpp | 2647 ++++++++++------------------------
pllnni.cpp | 53 +-
pllnni.h | 22 +-
quartet.cpp | 2 +-
split.cpp | 6 +-
split.h | 7 +-
splitgraph.cpp | 2 +-
splitgraph.h | 2 +-
splitset.cpp | 6 +-
splitset.h | 6 +-
stoprule.cpp | 44 +-
stoprule.h | 19 +-
superalignment.cpp | 66 +-
superalignment.h | 19 +
test_scripts/README | 22 +-
test_scripts/compile.sh | 113 +-
test_scripts/gen_test_standard.py | 34 +-
test_scripts/generate_test_cmds.py | 97 --
test_scripts/run_tests.sh | 50 +
test_scripts/submit_jobs.sh | 8 +-
test_scripts/submitjob.sh | 2 -
test_scripts/test_configs.txt | 18 +-
test_scripts/test_data/d59_8.nex | 2 -
tools.cpp | 339 ++++-
tools.h | 180 ++-
vectorclass/changelog.txt | 32 +-
vectorclass/dispatch_example.cpp | 12 +-
vectorclass/instrset.h | 60 +-
vectorclass/instrset_detect.cpp | 18 +-
vectorclass/special.zip | Bin 34477 -> 34897 bytes
vectorclass/vectorclass.h | 10 +-
vectorclass/vectorclass.pdf | Bin 476370 -> 431608 bytes
vectorclass/vectorf128.h | 46 +-
vectorclass/vectorf256.h | 14 +-
vectorclass/vectorf256e.h | 13 +-
vectorclass/vectorf512.h | 22 +-
vectorclass/vectorf512e.h | 13 +-
vectorclass/vectori128.h | 140 +-
vectorclass/vectori256.h | 28 +-
vectorclass/vectori256e.h | 21 +-
vectorclass/vectori512.h | 31 +-
vectorclass/vectori512e.h | 25 +-
vectorclass/vectormath_common.h | 93 +-
vectorclass/vectormath_exp.h | 27 +-
vectorclass/vectormath_hyp.h | 9 +-
vectorclass/vectormath_lib.h | 103 +-
vectorclass/vectormath_trig.h | 204 +--
vectorf64.h | 377 +++++
124 files changed, 14411 insertions(+), 5890 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 73688ca..767f309 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
##################################################################
# IQ-TREE cmake build definition
-# Copyright (c) 2012-2015 Bui Quang Minh, Lam Tung Nguyen
+# Copyright (c) 2012-2015 Bui Quang Minh, Lam-Tung Nguyen
##################################################################
# Windows example usages:
@@ -18,12 +18,17 @@
# cmake -DIQTREE_FLAGS="m32" <source_dir> (32-bit sequential version)
# cmake -DIQTREE_FLAGS="m32 omp" <source_dir> (32-bit OpenMP version)
#
-
+# To compile with CLANG on Linux:
+# export CC=/usr/bin/clang
+# export CXX=/usr/bin/clang++
+# Best practices for setting up CMAKE for diffrent compiler can be found here:
+# http://stackoverflow.com/questions/7031126/switching-between-gcc-and-clang-llvm-using-cmake
+#
# Mac OSX example usages:
#------------------------
# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ <source_dir> (sequential version)
#
-# To build OpenMP version one needs to download Clang version 3.7 or later (as of November 2015)
+# To build OpenMP version one needs to download Clang version 3.7 or later (as of November 2015)
# Then assuming clang3.7 and clang++3.7 are the newly built compilers, then:
# cmake -DCMAKE_C_COMPILER=clang3.7 -DCMAKE_CXX_COMPILER=clang++3.7 -DIQTREE_FLAGS="omp" <source_dir> (OpenMP version)
#
@@ -44,23 +49,30 @@ project(iqtree)
add_definitions(-DIQ_TREE)
# The version number.
set (iqtree_VERSION_MAJOR 1)
-set (iqtree_VERSION_MINOR 4)
-set (iqtree_VERSION_PATCH "4")
+set (iqtree_VERSION_MINOR 5)
+set (iqtree_VERSION_PATCH "2")
set(BUILD_SHARED_LIBS OFF)
+if (CMAKE_C_COMPILER MATCHES "mpi")
+ set(IQTREE_FLAGS "${IQTREE_FLAGS} mpi")
+endif()
+
message("IQ-TREE flags : ${IQTREE_FLAGS}")
-if (NOT CMAKE_BUILD_TYPE)
+if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
-if (CMAKE_BUILD_TYPE STREQUAL "Release")
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
message("Builde mode : Release")
endif()
-include_directories("${PROJECT_SOURCE_DIR}")
+if (CMAKE_GENERATOR MATCHES "Xcode")
+ set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym")
+endif()
+include_directories("${PROJECT_SOURCE_DIR}")
##################################################################
# Detect target platforms
@@ -73,18 +85,18 @@ if (WIN32)
endif()
SET(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
add_definitions(-DWIN32)
-elseif (APPLE)
+elseif (APPLE)
message("Target OS : Mac OS X")
# to be compatible back to Mac OS X 10.6
- if (IQTREE_FLAGS MATCHES "oldmac")
- add_definitions("-mmacosx-version-min=10.5")
+ if (IQTREE_FLAGS MATCHES "oldmac")
+ add_definitions("-mmacosx-version-min=10.5")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.5")
else()
- add_definitions("-mmacosx-version-min=10.6")
+ add_definitions("-mmacosx-version-min=10.6")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.6")
endif()
SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
-elseif (UNIX)
+elseif (UNIX)
message("Target OS : Unix")
# build as static binary to run on most machines
if (NOT IQTREE_FLAGS MATCHES "static")
@@ -105,38 +117,48 @@ set(GCC "FALSE") # GNU compiler
set(CLANG "FALSE") # Clang compiler
set(ICC "FALSE") # Intel compiler
set(VCC "FALSE") # MS Visual C Compiler, note that it is different from MSVC variable
+# using C++11 standard
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-if (CMAKE_COMPILER_IS_GNUCXX)
+if (CMAKE_COMPILER_IS_GNUCXX)
message("Compiler : GNU Compiler (gcc)")
set(GCC "TRUE")
# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -fms-extensions -Wno-deprecated")
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++98")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g")
set(CMAKE_C_FLAGS_RELEASE "-O3 -g")
- # require at least gcc 4.6
- if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6)
- message(FATAL_ERROR "GCC version must be at least 4.6!")
+ # require at least gcc 4.8
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
+ message(FATAL_ERROR "GCC version must be at least 4.8!")
+ endif()
+ if (WIN32)
+ # disable AVX on Windows due to memory alignment
+ set(IQTREE_FLAGS "${IQTREE_FLAGS} novx")
+ message("WARNING: AVX is disabled on Windows as GCC does not properly suport memory alignment")
endif()
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
message("Compiler : Clang")
set(CLANG "TRUE")
# set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -Wno-nested-anon-types")
+ #if (APPLE AND NOT CMAKE_BUILD_TYPE MATCHES "Debug")
+ # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+ # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
+ #endif()
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
- set(CMAKE_C_FLAGS_RELEASE "-O3")
+ set(CMAKE_C_FLAGS_RELEASE "-O3")
elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(VCC "TRUE")
message("Compiler : MS Visual C++ Compiler")
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel")
message("Compiler : Intel C++ Compiler (icc)")
set(ICC "TRUE")
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99")
+ #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99")
else()
message("Compiler : Unknown and untested yet")
endif()
set(EXE_SUFFIX "")
-if (MSVC)
+if (MSVC)
# MS Visual Studio environment
message("Exporting MS Visual Studio projects...")
add_definitions(/MP) # enable multi-processor compilation
@@ -145,11 +167,29 @@ if (MSVC)
if (VCC)
add_definitions(/O2)
elseif (ICC)
+ #add_definitions(/O3)
add_definitions(/O3)
endif()
endif()
endif()
+##################################################################
+# configure MPI compilation
+##################################################################
+
+if (IQTREE_FLAGS MATCHES "mpi")
+ add_definitions(-D_IQTREE_MPI)
+ if (NOT CMAKE_CXX_COMPILER MATCHES "mpi")
+ # if not using the MPI compiler wrapper, set own options manually
+ find_package(MPI REQUIRED)
+ set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${MPI_CXX_COMPILE_FLAGS}")
+ set(CMAKE_C_COMPILE_FLAGS "${CMAKE_C_COMPILE_FLAGS} ${MPI_C_COMPILE_FLAGS}")
+ set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${MPI_CXX_LINK_FLAGS}")
+ set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} ${MPI_C_LINK_FLAGS}")
+ include_directories(${MPI_C_INCLUDE_PATH})
+ include_directories(${MPI_CXX_INCLUDE_PATH})
+ endif()
+endif()
##################################################################
@@ -170,7 +210,7 @@ if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR IQTREE_FLAGS MATCHES "m32")
if (CMAKE_GENERATOR MATCHES "Win64")
error("Both 32-bit and 64-bit mode cannot be specified")
endif()
- SET(EXE_SUFFIX "${EXE_SUFFIX}32")
+ #SET(EXE_SUFFIX "${EXE_SUFFIX}32")
if (GCC OR CLANG)
set(COMBINED_FLAGS "${COMBINED_FLAGS} -m32")
endif()
@@ -179,7 +219,7 @@ else()
message("Target binary : 64-bit")
endif()
-if(IQTREE_FLAGS MATCHES "novx")
+if(IQTREE_FLAGS MATCHES "novx")
add_definitions(-D__NOAVX__)
endif()
@@ -188,44 +228,52 @@ endif()
# change the executable name if compiled for OpenMP parallel version
##################################################################
if (IQTREE_FLAGS MATCHES "omp")
- message("Parallel : OpenMP/PThreads")
- SET(EXE_SUFFIX "${EXE_SUFFIX}-omp")
+ message("OpenMP : Yes")
+ SET(EXE_SUFFIX "${EXE_SUFFIX}-omp")
add_definitions(-D_USE_PTHREADS)
- if (MSVC)
+ if (MSVC)
add_definitions(/MT)
endif()
-
- if (VCC)
+
+ if (VCC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
- include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers
+ include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers
elseif (ICC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qopenmp")
if (WIN32)
include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers
- endif()
+ endif()
elseif (GCC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
- elseif (CLANG)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread")
+ elseif (CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libomp")
endif()
else()
- message("Parallel : None")
+ message("OpenMP : NONE")
+endif()
+
+
+if (IQTREE_FLAGS MATCHES "mpi")
+ message("MPI : Yes")
+ SET(EXE_SUFFIX "${EXE_SUFFIX}-mpi")
+else()
+ message("MPI : NONE")
endif()
##################################################################
# configure SSE/AVX/FMA instructions
##################################################################
-SET(AVX_FLAGS "-D__AVX")
+SET(AVX_FLAGS "-D__SSE3 -D__AVX")
if (VCC)
set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
elseif (CLANG)
set(AVX_FLAGS "${AVX_FLAGS} -mavx")
elseif (GCC)
set(AVX_FLAGS "${AVX_FLAGS} -mavx -fabi-version=0")
-elseif (ICC)
+elseif (ICC)
if (WIN32)
set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
else()
@@ -233,47 +281,67 @@ elseif (ICC)
endif()
endif()
-SET(SSE_FLAGS "")
+SET(SSE_FLAGS "-D__SSE3")
if (VCC)
- set(SSE_FLAGS "/arch:SSE2 -D__SSE3__")
+ set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE2 -D__SSE3__")
elseif (GCC OR CLANG)
- set(SSE_FLAGS "-msse3")
+ set(SSE_FLAGS "${SSE_FLAGS} -msse3")
elseif (ICC)
if (WIN32)
- set(SSE_FLAGS "/arch:SSE3")
+ set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE3")
else()
- set(SSE_FLAGS "-msse3")
+ set(SSE_FLAGS "${SSE_FLAGS} -msse3")
endif()
endif()
-if (IQTREE_FLAGS MATCHES "fma") # AVX+FMA instruction set
- message("Vectorization : AVX+FMA")
- add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive
- if (VCC)
- # Visual C++ has no /mfma flag!, FMA is only included in AVX2
- set(COMBINED_FLAGS "${COMBINED_FLAGS} /arch:AVX2")
- elseif (CLANG)
- set(COMBINED_FLAGS "${COMBINED_FLAGS} -mavx -mfma")
- elseif (GCC)
- set(COMBINED_FLAGS "${COMBINED_FLAGS} -mavx -fabi-version=0 -mfma")
- elseif (ICC)
- if (WIN32)
- set(COMBINED_FLAGS "${COMBINED_FLAGS} /arch:AVX /Qfma")
- else()
- set(COMBINED_FLAGS "${COMBINED_FLAGS} -mavx -mfma")
- endif()
+SET(FMA_FLAGS "-D__SSE3 -D__AVX")
+if (VCC)
+ set(FMA_FLAGS "${FMA_FLAGS} /arch:AVX2")
+elseif (CLANG)
+ set(FMA_FLAGS "${FMA_FLAGS} -mavx -mfma")
+elseif (GCC)
+ set(FMA_FLAGS "${FMA_FLAGS} -mavx -fabi-version=0 -mfma")
+elseif (ICC)
+ if (WIN32)
+ set(FMA_FLAGS "${FMA_FLAGS} /arch:AVX /Qfma")
+ else()
+ set(FMA_FLAGS "${FMA_FLAGS} -march=core-avx2")
endif()
+endif()
- SET(EXE_SUFFIX "${EXE_SUFFIX}-fma")
+SET(AVX512_FLAGS "-D__SSE3 -D__AVX")
+if (VCC)
+ message("AVX512 not available in Visual C++")
+ #set(AVX512_FLAGS "${AVX512_FLAGS} /arch:AVX512")
+elseif (CLANG)
+ set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma")
+elseif (GCC)
+ set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma")
+elseif (ICC)
+ if (WIN32)
+ set(AVX512_FLAGS "${AVX512_FLAGS} /arch:MIC-AVX512 /Qfma")
+ else()
+ set(AVX512_FLAGS "${AVX512_FLAGS} -xMIC-AVX512 -mfma")
+ endif()
+endif()
+
+# further flag to improve performance
+
+if (IQTREE_FLAGS MATCHES "fma") # AVX+FMA instruction set
+ message("Vectorization : AVX+FMA")
+ add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive
+ set(COMBINED_FLAGS "${COMBINED_FLAGS} ${FMA_FLAGS}")
+ #SET(EXE_SUFFIX "${EXE_SUFFIX}-fma")
elseif (IQTREE_FLAGS MATCHES "avx") # AVX instruction set
message("Vectorization : AVX")
add_definitions(-D__SSE3 -D__AVX) # define both SSE3 and AVX directive
set(COMBINED_FLAGS "${COMBINED_FLAGS} ${AVX_FLAGS}")
- SET(EXE_SUFFIX "${EXE_SUFFIX}-avx")
-else() #SSE intruction set
- message("Vectorization : SSE3")
- add_definitions(-D__SSE3)
+ #SET(EXE_SUFFIX "${EXE_SUFFIX}-avx")
+elseif (NOT IQTREE_FLAGS MATCHES "nosse") #SSE intruction set
+ message("Vectorization : SSE3/AVX/AVX2")
+ #add_definitions(-D__SSE3)
+ #set(COMBINED_FLAGS "${COMBINED_FLAGS} ${SSE_FLAGS}")
endif()
@@ -283,21 +351,26 @@ endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMBINED_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMBINED_FLAGS}")
+set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -fno-default-inline -fno-inline -O2 -fno-omit-frame-pointer -g")
+set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -O2 -fno-omit-frame-pointer -g")
if (CMAKE_BUILD_TYPE STREQUAL "Release")
- message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_RELEASE}")
- message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
+ message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_RELEASE}")
+ message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
- message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
- message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}")
+ message("C flags : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
+ message("CXX flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}")
+endif()
+
+if (CMAKE_BUILD_TYPE STREQUAL "Profile")
+ message("C flags : ${CMAKE_C_FLAGS_PROFILE} ")
+ message("CXX flags : ${CMAKE_CXX_FLAGS_PROFILE} ")
endif()
-set(CMAKE_CXX_FLAGS_PROFILE "-fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -fno-default-inline -fno-inline -O0 -fno-omit-frame-pointer -pg")
-set(CMAKE_C_FLAGS_PROFILE "-fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -O0 -fno-omit-frame-pointer -pg")
-if (GCC)
+if (GCC)
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline")
set(CMAKE_CXX_FLAGS_MEM "-g -O1")
@@ -317,6 +390,7 @@ check_function_exists (gettimeofday HAVE_GETTIMEOFDAY)
check_function_exists (getrusage HAVE_GETRUSAGE)
check_function_exists (GlobalMemoryStatusEx HAVE_GLOBALMEMORYSTATUSEX)
check_function_exists (strndup HAVE_STRNDUP)
+find_package(Backtrace)
# configure a header file to pass some of the CMake settings
# to the source code
@@ -334,7 +408,7 @@ include_directories("${PROJECT_BINARY_DIR}")
##################################################################
-# subdirectories containing necessary libraries for the build
+# subdirectories containing necessary libraries for the build
##################################################################
add_subdirectory(pll)
add_subdirectory(ncl)
@@ -355,73 +429,88 @@ add_subdirectory(gsl)
# the main executable
##################################################################
+add_library(kernelsse phylokernelsse.cpp)
+
if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx")
-add_library(avxkernel phylotreeavx.cpp)
+add_library(kernelavx phylotreeavx.cpp)
+add_library(kernelfma phylokernelfma.cpp)
+if (IQTREE_FLAGS MATCHES "512")
+ add_library(kernelavx512 phylokernelavx512.cpp)
+ add_definitions(-DINCLUDE_AVX512)
+endif()
+endif()
+
+if (IQTREE_FLAGS MATCHES "mpi")
+ add_library(mympi TreeCollection.cpp ObjectStream.cpp)
endif()
add_executable(iqtree
-alignment.cpp
-alignmentpairwise.cpp
-circularnetwork.cpp
-eigendecomposition.cpp
-greedy.cpp
-gss.cpp
-#guidedbootstrap.cpp
-gurobiwrapper.cpp
-gzstream.cpp
-hashsplitset.cpp
-iqtree.cpp
-maalignment.cpp
-matree.cpp
-mexttree.cpp
-mpdablock.cpp
-msetsblock.cpp
-msplitsblock.cpp
-modelsblock.cpp
-mtree.cpp
-mtreeset.cpp
-ncbitree.cpp
-ngs.cpp
-node.cpp
-optimization.cpp
-parsmultistate.cpp
-pattern.cpp
+alignment.cpp alignment.h
+alignmentpairwise.cpp alignmentpairwise.h
+circularnetwork.cpp circularnetwork.h
+eigendecomposition.cpp eigendecomposition.h
+greedy.cpp greedy.h
+gss.cpp gss.h
+gurobiwrapper.cpp gurobiwrapper.h
+gzstream.cpp gzstream.h
+hashsplitset.cpp hashsplitset.h
+iqtree.cpp iqtree.h
+maalignment.cpp maalignment.h
+matree.cpp matree.h
+mexttree.cpp mexttree.h
+mpdablock.cpp mpdablock.h
+msetsblock.cpp msetsblock.h
+msplitsblock.cpp msplitsblock.h
+modelsblock.cpp modelsblock.h
+mtree.cpp mtree.h
+mtreeset.cpp mtreeset.h
+ncbitree.cpp ncbitree.h
+ngs.cpp ngs.h
+node.cpp node.h
+optimization.cpp optimization.h
+parsmultistate.cpp parsmultistate.h
+pattern.cpp pattern.h
pda.cpp
-pdnetwork.cpp
-pdtree.cpp
-pdtreeset.cpp
-phyloanalysis.cpp
-phylonode.cpp
-phylosupertree.cpp
-phylotree.cpp
-phylotreesse.cpp
+pdnetwork.cpp pdnetwork.h
+pdtree.cpp pdtree.h
+pdtreeset.cpp pdtreeset.h
+phyloanalysis.cpp phyloanalysis.h
+phylonode.cpp phylonode.h
+phylosupertree.cpp phylosupertree.h
+phylotree.cpp phylotree.h
+phylotreesse.cpp phylokernelnew.h
phylotreepars.cpp
-phylokernelsitemodel.cpp
-#phylotreeavx.cpp
-pruning.cpp
+pruning.cpp pruning.h
quartet.cpp
-split.cpp
-splitgraph.cpp
-splitset.cpp
-stoprule.cpp
-superalignment.cpp
-superalignmentpairwise.cpp
-supernode.cpp
-tinatree.cpp
-tools.cpp
-whtest_wrapper.cpp
-lpwrapper.c
-pllnni.cpp
-phylosupertreeplen.cpp
-phylotesting.cpp
-ecopd.cpp
-ecopdmtreeset.cpp
-graph.cpp
-candidateset.cpp
-checkpoint.cpp
-upperbounds.cpp
+split.cpp split.h
+splitgraph.cpp splitgraph.h
+splitset.cpp splitset.h
+stoprule.cpp stoprule.h
+superalignment.cpp superalignment.h
+superalignmentpairwise.cpp superalignmentpairwise.h
+supernode.cpp supernode.h
+tinatree.cpp tinatree.h
+tools.cpp tools.h
+whtest_wrapper.cpp whtest_wrapper.h
+lpwrapper.c lpwrapper.h
+pllnni.cpp pllnni.h
+phylosupertreeplen.cpp phylosupertreeplen.h
+phylotesting.cpp phylotesting.h
+ecopd.cpp ecopd.h
+ecopdmtreeset.cpp ecopdmtreeset.h
+graph.cpp graph.h
+candidateset.cpp candidateset.h
+checkpoint.cpp checkpoint.h
+constrainttree.cpp constrainttree.h
+MPIHelper.cpp MPIHelper.h
+memslot.cpp memslot.h
)
+if(Backtrace_FOUND)
+ include_directories(${Backtrace_INCLUDE_DIR})
+ target_link_libraries(iqtree ${Backtrace_LIBRARY})
+endif(Backtrace_FOUND)
+
if (NOT IQTREE_FLAGS MATCHES "nozlib")
find_package(ZLIB)
endif()
@@ -438,18 +527,25 @@ else(ZLIB_FOUND)
endif(ZLIB_FOUND)
if (NOT IQTREE_FLAGS MATCHES "avx" AND NOT IQTREE_FLAGS MATCHES "fma")
- set_target_properties(iqtree pll ncl lbfgsb whtest sprng vectorclass model PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}")
+ if (NOT IQTREE_FLAGS MATCHES "nosse")
+ set_target_properties(iqtree ncl lbfgsb whtest sprng vectorclass model PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}")
+ endif()
+ set_target_properties(kernelsse pll PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}")
if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx")
- set_target_properties(avxkernel pllavx PROPERTIES COMPILE_FLAGS "${AVX_FLAGS}")
+ set_target_properties(kernelavx pllavx PROPERTIES COMPILE_FLAGS "${AVX_FLAGS}")
+ set_target_properties(kernelfma PROPERTIES COMPILE_FLAGS "${FMA_FLAGS}")
+ if (IQTREE_FLAGS MATCHES "512")
+ set_target_properties(kernelavx512 PROPERTIES COMPILE_FLAGS "${AVX512_FLAGS}")
+ endif()
endif()
-endif()
+endif()
##################################################################
# setup linking flags
##################################################################
# link special lib for WIN32
-if (WIN32)
+if (WIN32)
set(PLATFORM_LIB "ws2_32")
else()
set(PLATFORM_LIB "m")
@@ -460,10 +556,10 @@ if(CLANG AND WIN32 AND IQTREE_FLAGS MATCHES "static")
endif()
set(THREAD_LIB "")
-if (IQTREE_FLAGS MATCHES "omp")
+if (IQTREE_FLAGS MATCHES "omp")
link_directories(${PROJECT_SOURCE_DIR}/lib)
if (MSVC)
- if (BINARY32)
+ if (BINARY32)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${PROJECT_SOURCE_DIR}/lib32")
set(THREAD_LIB "pthreadVC2")
else()
@@ -471,9 +567,9 @@ if (IQTREE_FLAGS MATCHES "omp")
set(THREAD_LIB "pthreadVC2")
endif()
elseif(CLANG AND APPLE)
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac")
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac -fopenmp=libomp")
elseif(CLANG AND WIN32)
- if (BINARY32)
+ if (BINARY32)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib32 libiomp5md.dll")
else()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib libiomp5md.dll")
@@ -482,14 +578,30 @@ if (IQTREE_FLAGS MATCHES "omp")
endif()
endif()
-if (BINARY32 OR IQTREE_FLAGS MATCHES "novx")
- target_link_libraries(iqtree pll ncl lbfgsb whtest sprng vectorclass model gsl ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})
-else()
- target_link_libraries(iqtree pll pllavx ncl lbfgsb whtest sprng vectorclass model avxkernel gsl ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})
+# basic linking librararies
+target_link_libraries(iqtree pll ncl lbfgsb whtest sprng vectorclass model gsl ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})
+
+if (NOT IQTREE_FLAGS MATCHES "nosse")
+ target_link_libraries(iqtree kernelsse)
endif()
-##################################################################
-# setup the executable name
+# MPI libraries
+if (IQTREE_FLAGS MATCHES "mpi")
+ target_link_libraries(iqtree mympi)
+ if (NOT CMAKE_CXX_COMPILER MATCHES "mpi")
+ target_link_libraries(iqtree ${MPI_CXX_LIBRARIES})
+ endif()
+endif()
+
+# SSE, AVX etc. libraries
+if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx")
+ target_link_libraries(iqtree pllavx kernelavx kernelfma)
+ if (IQTREE_FLAGS MATCHES "512")
+ target_link_libraries(iqtree kernelavx512)
+ endif()
+endif()
+
+# setup the executable name
##################################################################
set_target_properties(iqtree PROPERTIES OUTPUT_NAME "iqtree${EXE_SUFFIX}")
@@ -497,7 +609,7 @@ set_target_properties(iqtree PROPERTIES OUTPUT_NAME "iqtree${EXE_SUFFIX}")
if (CMAKE_BUILD_TYPE STREQUAL "Release" AND (GCC OR CLANG)) # strip is not necessary for MSVC
if (WIN32)
ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND strip $<TARGET_FILE:iqtree>)
- else()
+ elseif (NOT APPLE)
ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND ${CMAKE_STRIP} $<TARGET_FILE:iqtree>)
endif()
endif()
@@ -509,11 +621,11 @@ else()
endif()
if (WIN32)
- if (MSVC)
+ if (MSVC)
ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND copy "Release\\iqtree${EXE_SUFFIX}.exe" "Release\\iqtree${EXE_SUFFIX}-click.exe")
else()
ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND copy "iqtree${EXE_SUFFIX}.exe" "iqtree${EXE_SUFFIX}-click.exe")
- endif()
+ endif()
endif()
##############################################################
@@ -552,7 +664,7 @@ endif()
# build a CPack driven installer package
##############################################################
include (InstallRequiredSystemLibraries)
-set (CPACK_RESOURCE_FILE_LICENSE
+set (CPACK_RESOURCE_FILE_LICENSE
"${CMAKE_CURRENT_SOURCE_DIR}/License.txt")
set (CPACK_PACKAGE_VERSION_MAJOR "${iqtree_VERSION_MAJOR}")
set (CPACK_PACKAGE_VERSION_MINOR "${iqtree_VERSION_MINOR}")
@@ -572,25 +684,19 @@ set(CPACK_SOURCE_IGNORE_FILES
set (SYSTEM_NAME "${CMAKE_SYSTEM_NAME}")
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
- if (IQTREE_FLAGS MATCHES "oldmac")
+ if (IQTREE_FLAGS MATCHES "oldmac")
set (SYSTEM_NAME "MacOS10.5")
- else()
+ else()
set (SYSTEM_NAME "MacOSX")
endif()
endif()
if (BINARY32)
- set (PROJECT_NAME_SUFFIX "${EXE_SUFFIX}")
-else()
- set (PROJECT_NAME_SUFFIX "${EXE_SUFFIX}")
+ set (SYSTEM_NAME "${SYSTEM_NAME}32")
endif()
-#if (NOT IQTREE_FLAGS MATCHES "omp" AND NOT IQTREE_FLAGS MATCHES "avx" AND NOT IQTREE_FLAGS MATCHES "fma")
-# set (PROJECT_NAME_SUFFIX "${PROJECT_NAME_SUFFIX}-sse")
-#endif()
-
set(CPACK_PACKAGE_FILE_NAME
- "${CMAKE_PROJECT_NAME}${PROJECT_NAME_SUFFIX}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${SYSTEM_NAME}")
+ "${CMAKE_PROJECT_NAME}${EXE_SUFFIX}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${SYSTEM_NAME}")
set(CPACK_STRIP_FILES TRUE)
diff --git a/MPIHelper.cpp b/MPIHelper.cpp
new file mode 100644
index 0000000..a950e8b
--- /dev/null
+++ b/MPIHelper.cpp
@@ -0,0 +1,560 @@
+//
+// Created by tung on 6/18/15.
+//
+
+#include "MPIHelper.h"
+#include "timeutil.h"
+
+/**
+ * Initialize the single getInstance of MPIHelper
+ */
+
+MPIHelper& MPIHelper::getInstance() {
+ static MPIHelper instance;
+#ifndef _IQTREE_MPI
+ instance.setProcessID(0);
+ instance.setNumProcesses(1);
+#endif
+ return instance;
+}
+
+void MPIHelper::distributeTrees(vector<string> &treeStrings, vector<double> &scores, int tag) {
+ if (getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ vector<int> sourceProcID;
+ sourceProcID.insert(sourceProcID.end(), scores.size(), getProcessID());
+ TreeCollection outTrees(treeStrings, scores, sourceProcID);
+ cleanUpMessages();
+ for (int i = 0; i < getNumProcesses(); i++) {
+ if (i != getProcessID()) {
+ MPI_Request *request = new MPI_Request;
+ ObjectStream *os = new ObjectStream(outTrees);
+ MPI_Isend(os->getObjectData(), os->getDataLength(), MPI_CHAR, i, tag, MPI_COMM_WORLD, request);
+ sentMessages.push_back(make_pair(request, os));
+ int flag = 0;
+ MPI_Status status;
+ MPI_Test(request, &flag, &status);
+ }
+ }
+ //numTreeSent += treeStrings.size();
+#endif
+}
+
+void MPIHelper::distributeTree(string treeString, double score, int tag) {
+ if (getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ double start = getRealTime();
+ vector<string> trees;
+ vector<double> scores;
+ trees.push_back(treeString);
+ scores.push_back(score);
+ distributeTrees(trees, scores, tag);
+ if (verbose_mode >= VB_MED)
+ cout << "Sent tree to other processes in " << getRealTime() - start << " seconds" << endl;
+ numTreeSent++;
+#endif
+}
+
+void MPIHelper::sendTrees(int dest, vector<string> &treeStrings, vector<double> &scores, int tag) {
+ if (getNumProcesses() == 1 || dest == getProcessID())
+ return;
+#ifdef _IQTREE_MPI
+ vector<int> sourceProcID;
+ sourceProcID.insert(sourceProcID.end(), scores.size(), getProcessID());
+ TreeCollection outTrees(treeStrings, scores, sourceProcID);
+ cleanUpMessages();
+ MPI_Request *request = new MPI_Request;
+ ObjectStream *os = new ObjectStream(outTrees);
+ MPI_Isend(os->getObjectData(), os->getDataLength(), MPI_CHAR, dest, tag, MPI_COMM_WORLD, request);
+ sentMessages.push_back(make_pair(request, os));
+ numTreeSent += treeStrings.size();
+
+ int flag = 0;
+ MPI_Status status;
+ MPI_Test(request, &flag, &status);
+#endif
+}
+
+void MPIHelper::sendTree(int dest, string treeString, double score, int tag) {
+ if (getNumProcesses() == 1 || dest == getProcessID())
+ return;
+#ifdef _IQTREE_MPI
+ StrVector treeStrings;
+ treeStrings.push_back(treeString);
+ DoubleVector scores;
+ scores.push_back(score);
+ sendTrees(dest, treeStrings, scores, tag);
+#endif
+}
+
+int MPIHelper::sendRecvTrees(int dest, vector<string> &treeStrings, vector<double> &scores, int tag) {
+ if (getNumProcesses() == 1 || dest == getProcessID())
+ return tag;
+#ifdef _IQTREE_MPI
+ double beginTime = getRealTime();
+ // prepare message
+ vector<int> sourceProcID;
+ sourceProcID.insert(sourceProcID.end(), scores.size(), getProcessID());
+ TreeCollection outTrees(treeStrings, scores, sourceProcID);
+ ObjectStream *os = new ObjectStream(outTrees);
+
+ // blocking send
+ MPI_Send(os->getObjectData(), os->getDataLength(), MPI_CHAR, dest, tag, MPI_COMM_WORLD);
+ numTreeSent += treeStrings.size();
+ delete os;
+
+ // blocking probe
+ MPI_Status status;
+ MPI_Probe(dest, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
+ int msgCount;
+ MPI_Get_count(&status, MPI_CHAR, &msgCount);
+
+ // receive the message
+ char *recvBuffer = new char[msgCount];
+ MPI_Recv(recvBuffer, msgCount, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ treeStrings.clear();
+ scores.clear();
+
+ if (status.MPI_TAG != STOP_TAG) {
+ os = new ObjectStream(recvBuffer, msgCount);
+ TreeCollection curTrees = os->getTreeCollection();
+ treeStrings = curTrees.getTreeStrings();
+ scores = curTrees.getScores();
+ numTreeReceived += treeStrings.size();
+ }
+ delete [] recvBuffer;
+
+ double endTime = getRealTime();
+ cout << "INFO: " << endTime - beginTime << " seconds for " << __func__ << endl;
+
+ return status.MPI_TAG;
+#else
+ return tag;
+#endif
+}
+
+int MPIHelper::recvSendTrees(vector<string> &treeStrings, vector<double> &scores, vector<bool> &should_send, int tag) {
+ if (getNumProcesses() == 1)
+ return 0;
+#ifdef _IQTREE_MPI
+ double beginTime = getRealTime();
+ // blocking probe
+ MPI_Status status;
+ MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
+ int msgCount;
+ MPI_Get_count(&status, MPI_CHAR, &msgCount);
+ int dest = status.MPI_SOURCE;
+
+ // receive the message
+ char *recvBuffer = new char[msgCount];
+ MPI_Recv(recvBuffer, msgCount, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+
+ // now send message
+ if (!should_send[dest]) {
+ treeStrings.resize(1, "notree");
+ scores.resize(1, -DBL_MAX);
+ }
+ IntVector sourceProcID;
+ sourceProcID.insert(sourceProcID.end(), scores.size(), getProcessID());
+ TreeCollection outTrees(treeStrings, scores, sourceProcID);
+ ObjectStream *os = new ObjectStream(outTrees);
+
+ // blocking send
+ MPI_Send(os->getObjectData(), os->getDataLength(), MPI_CHAR, dest, tag, MPI_COMM_WORLD);
+ numTreeSent += treeStrings.size();
+ delete os;
+
+ // now extract trees from received buffer
+ treeStrings.clear();
+ scores.clear();
+ os = new ObjectStream(recvBuffer, msgCount);
+ TreeCollection curTrees = os->getTreeCollection();
+ treeStrings = curTrees.getTreeStrings();
+ scores = curTrees.getScores();
+ delete [] recvBuffer;
+ numTreeReceived += treeStrings.size();
+
+ should_send[dest] = false;
+
+ double endTime = getRealTime();
+ if (endTime - beginTime > 1)
+ cout << "WARNING: " << endTime - beginTime << " seconds for " << __func__ << endl;
+
+ return dest;
+#else
+ return 0;
+#endif
+}
+
+void MPIHelper::gatherTrees(TreeCollection &trees) {
+ if (getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ double beginTime = getRealTime();
+
+ if (isMaster()) {
+ trees.clear();
+ // Master: receive from all Workers
+ for (int w = 1; w < getNumProcesses(); w++) {
+ // blocking probe
+ MPI_Status status;
+ MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
+ int msgCount;
+ MPI_Get_count(&status, MPI_CHAR, &msgCount);
+ // receive the message
+ char *recvBuffer = new char[msgCount];
+ MPI_Recv(recvBuffer, msgCount, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ ObjectStream *os = new ObjectStream(recvBuffer, msgCount);
+ TreeCollection curTrees = os->getTreeCollection();
+ trees.addTrees(curTrees);
+ numTreeReceived += curTrees.getNumTrees();
+ delete [] recvBuffer;
+ }
+ cout << trees.getNumTrees() << " trees gathered from workers in ";
+ } else {
+ // Worker: send trees to Master
+ ObjectStream *os = new ObjectStream(trees);
+ // blocking send
+ MPI_Send(os->getObjectData(), os->getDataLength(), MPI_CHAR, PROC_MASTER, TREE_TAG, MPI_COMM_WORLD);
+ numTreeSent += trees.getNumTrees();
+ delete os;
+ cout << trees.getNumTrees() << " trees sent to master in ";
+ }
+
+ double endTime = getRealTime();
+ cout << endTime - beginTime << " seconds" << endl;
+#endif
+}
+
+void MPIHelper::broadcastTrees(TreeCollection &trees) {
+ if (getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ double beginTime = getRealTime();
+
+ // prepare data from Master
+ ObjectStream *os;
+ int msgCount = 0;
+ if (isMaster()) {
+ os = new ObjectStream(trees);
+ msgCount = os->getDataLength();
+ }
+
+ // broadcast the count for workers
+ MPI_Bcast(&msgCount, 1, MPI_INT, PROC_MASTER, MPI_COMM_WORLD);
+
+ char *recvBuffer = new char[msgCount];
+ if (isMaster())
+ memcpy(recvBuffer, os->getObjectData(), msgCount);
+
+ // broadcast trees to workers
+ MPI_Bcast(recvBuffer, msgCount, MPI_CHAR, PROC_MASTER, MPI_COMM_WORLD);
+
+ if (isWorker()) {
+ os = new ObjectStream(recvBuffer, msgCount);
+ trees = os->getTreeCollection();
+ }
+ delete os;
+ delete [] recvBuffer;
+
+ double endTime = getRealTime();
+ cout << trees.getNumTrees() << " trees broadcasted to workers in " << endTime - beginTime << " seconds" << endl;
+
+#endif
+}
+
+
+bool MPIHelper::gotMessage() {
+ // Check for incoming messages
+ if (getNumProcesses() == 1)
+ return false;
+#ifdef _IQTREE_MPI
+ int flag = 0;
+ MPI_Status status;
+ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status);
+ if (flag)
+ return true;
+ else
+ return false;
+#else
+ return false;
+#endif
+}
+
+void MPIHelper::sendMsg(int tag, string msg) {
+ if (getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ if (tag == STOP_TAG)
+ cleanUpMessages();
+ for (int i = 0; i < getNumProcesses(); i++) {
+ if (i != getProcessID()) {
+ MPI_Request *request = new MPI_Request;
+ ObjectStream *os = new ObjectStream(msg.c_str(), msg.size()+1);
+ MPI_Isend(os->getObjectData(), os->getDataLength(), MPI_CHAR, i, tag, MPI_COMM_WORLD, request);
+ sentMessages.push_back(make_pair(request, os));
+ int flag = 0;
+ MPI_Status status;
+ MPI_Test(request, &flag, &status);
+ }
+ }
+#endif
+}
+
+bool MPIHelper::checkMsg(int tag, string &msg) {
+ if (getNumProcesses() == 1)
+ return true;
+#ifdef _IQTREE_MPI
+ int flag=0;
+ MPI_Status status;
+ char *recvBuffer;
+ int numBytes;
+ // Check for incoming messages
+ MPI_Iprobe(PROC_MASTER, tag, MPI_COMM_WORLD, &flag, &status);
+ // flag == true if there is a message
+ if (flag) {
+ MPI_Get_count(&status, MPI_CHAR, &numBytes);
+ recvBuffer = new char[numBytes];
+ MPI_Recv(recvBuffer, numBytes, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ msg = recvBuffer;
+ delete[] recvBuffer;
+ return true;
+ }
+#endif
+ return false;
+}
+
+bool MPIHelper::checkMsg(int tag) {
+ if (getNumProcesses() == 1) {
+ return false;
+ }
+#ifdef _IQTREE_MPI
+ string msg;
+ if (checkMsg(tag, msg)) {
+ cout << "Worker " << getProcessID() << " gets message " << msg << endl;
+ return true;
+ }
+#endif
+ return false;
+}
+
+
+void MPIHelper::receiveTrees(bool fromAll, int maxNumTrees, TreeCollection &trees, int tag) {
+ if (getNumProcesses() == 1) {
+ return;
+ }
+#ifdef _IQTREE_MPI
+ int flag = 0;
+ int minNumTrees = 0;
+ bool nodes[getNumProcesses()];
+ if (fromAll)
+ minNumTrees = getNumProcesses() - 1;
+ for (int i = 0; i < getNumProcesses(); i++)
+ nodes[i] = false;
+ nodes[getProcessID()] = true;
+ // Process all pending messages
+ MPI_Status status;
+ size_t totalMsgSize = 0;
+ do {
+ char* recvBuffer;
+ int numBytes;
+ flag = 0;
+ // Check for incoming messages
+ MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status);
+ // flag == true if there is a message
+ if (flag) {
+ //cout << "Getting messages from node " << status.MPI_SOURCE << endl;
+ MPI_Get_count(&status, MPI_CHAR, &numBytes);
+ totalMsgSize += numBytes;
+ recvBuffer = new char[numBytes];
+ MPI_Recv(recvBuffer, numBytes, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ ObjectStream os(recvBuffer, numBytes);
+ if (status.MPI_TAG == STOP_TAG) {
+ cout << os.getObjectData() << endl;
+ MPI_Finalize();
+ exit(0);
+ }
+ TreeCollection curTrees = os.getTreeCollection();
+ trees.addTrees(curTrees);
+ if (trees.getNumTrees() >= maxNumTrees) {
+ break;
+ }
+ if (fromAll && !nodes[status.MPI_SOURCE]) {
+ nodes[status.MPI_SOURCE] = true;
+ minNumTrees--;
+ }
+ delete [] recvBuffer;
+ }
+ } while (minNumTrees > 0 || flag);
+ numTreeReceived += trees.getNumTrees();
+ if (trees.getNumTrees() > 0) {
+ cout << "Proc " << getProcessID() << ": " << trees.getNumTrees() << " trees received from other processes (" << totalMsgSize << " bytes)" << endl;
+ }
+#endif
+}
+
+int MPIHelper::receiveTrees(TreeCollection &trees, int tag) {
+ if (getNumProcesses() == 1) {
+ return -1;
+ }
+#ifdef _IQTREE_MPI
+ int flag = 0;
+ // Process all pending messages
+ MPI_Status status;
+ char* recvBuffer;
+ int numBytes;
+ // Check for incoming messages
+ MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status);
+ // flag == true if there is a message
+ if (flag) {
+ //cout << "Getting messages from node " << status.MPI_SOURCE << endl;
+ MPI_Get_count(&status, MPI_CHAR, &numBytes);
+ recvBuffer = new char[numBytes];
+ MPI_Recv(recvBuffer, numBytes, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ ObjectStream os(recvBuffer, numBytes);
+ TreeCollection curTrees = os.getTreeCollection();
+ trees.addTrees(curTrees);
+ delete [] recvBuffer;
+ return status.MPI_SOURCE;
+ }
+#endif
+ return -1;
+}
+
+int MPIHelper::cleanUpMessages() {
+#ifdef _IQTREE_MPI
+ int numMsgCleaned = 0;
+ // change iterator to index to avoid iterator being invalidated after erase()
+ for (int i = 0; i < sentMessages.size(); ) {
+ int flag = 0;
+ MPI_Status status;
+ MPI_Test(sentMessages[i].first, &flag, &status);
+ if (flag) {
+ delete sentMessages[i].first;
+ delete sentMessages[i].second;
+ numMsgCleaned++;
+ sentMessages.erase(sentMessages.begin()+i);
+ } else {
+ i++;
+ }
+ }
+ if (verbose_mode >= VB_MED && numMsgCleaned)
+ cout << numMsgCleaned << " messages sent and cleaned up" << endl;
+ return numMsgCleaned;
+#else
+ return 0;
+#endif
+}
+
+#ifdef _IQTREE_MPI
+void MPIHelper::sendString(string &str, int dest, int tag) {
+ char *buf = (char*)str.c_str();
+ MPI_Send(buf, str.length()+1, MPI_CHAR, dest, tag, MPI_COMM_WORLD);
+}
+
+void MPIHelper::sendCheckpoint(Checkpoint *ckp, int dest) {
+ stringstream ss;
+ ckp->dump(ss);
+ string str = ss.str();
+ sendString(str, dest, TREE_TAG);
+}
+
+
+int MPIHelper::recvString(string &str, int src, int tag) {
+ MPI_Status status;
+ MPI_Probe(src, tag, MPI_COMM_WORLD, &status);
+ int msgCount;
+ MPI_Get_count(&status, MPI_CHAR, &msgCount);
+ // receive the message
+ char *recvBuffer = new char[msgCount];
+ MPI_Recv(recvBuffer, msgCount, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
+ str = recvBuffer;
+ delete [] recvBuffer;
+ return status.MPI_SOURCE;
+}
+
+int MPIHelper::recvCheckpoint(Checkpoint *ckp, int src) {
+ string str;
+ int proc = recvString(str, src, TREE_TAG);
+ stringstream ss(str);
+ ckp->load(ss);
+ return proc;
+}
+
+void MPIHelper::broadcastCheckpoint(Checkpoint *ckp) {
+ int msgCount = 0;
+ stringstream ss;
+ string str;
+ if (isMaster()) {
+ ckp->dump(ss);
+ str = ss.str();
+ msgCount = str.length()+1;
+ }
+
+ // broadcast the count for workers
+ MPI_Bcast(&msgCount, 1, MPI_INT, PROC_MASTER, MPI_COMM_WORLD);
+
+ char *recvBuffer = new char[msgCount];
+ if (isMaster())
+ memcpy(recvBuffer, str.c_str(), msgCount);
+
+ // broadcast trees to workers
+ MPI_Bcast(recvBuffer, msgCount, MPI_CHAR, PROC_MASTER, MPI_COMM_WORLD);
+
+ if (isWorker()) {
+ ss.clear();
+ ss.str(recvBuffer);
+ ckp->load(ss);
+ }
+ delete [] recvBuffer;
+}
+
+void MPIHelper::gatherCheckpoint(Checkpoint *ckp) {
+ stringstream ss;
+ ckp->dump(ss);
+ string str = ss.str();
+ int msgCount = str.length();
+
+ // first send the counts to MASTER
+ int *msgCounts = NULL, *displ = NULL;
+ char *recvBuffer = NULL;
+ int totalCount = 0;
+
+ if (isMaster()) {
+ msgCounts = new int[getNumProcesses()];
+ displ = new int[getNumProcesses()];
+ }
+ MPI_Gather(&msgCount, 1, MPI_INT, msgCounts, 1, MPI_INT, PROC_MASTER, MPI_COMM_WORLD);
+
+ // now real contents to MASTER
+ if (isMaster()) {
+ for (int i = 0; i < getNumProcesses(); i++) {
+ displ[i] = totalCount;
+ totalCount += msgCounts[i];
+ }
+ recvBuffer = new char[totalCount+1];
+ memset(recvBuffer, 0, totalCount+1);
+ }
+ char *buf = (char*)str.c_str();
+ MPI_Gatherv(buf, msgCount, MPI_CHAR, recvBuffer, msgCounts, displ, MPI_CHAR, PROC_MASTER, MPI_COMM_WORLD);
+
+ if (isMaster()) {
+ // now decode the buffer
+ ss.clear();
+ ss.str(recvBuffer);
+ ckp->load(ss);
+
+ delete [] recvBuffer;
+ delete [] displ;
+ delete [] msgCounts;
+ }
+}
+
+#endif
+
+MPIHelper::~MPIHelper() {
+// cleanUpMessages();
+}
+
diff --git a/MPIHelper.h b/MPIHelper.h
new file mode 100644
index 0000000..615ef48
--- /dev/null
+++ b/MPIHelper.h
@@ -0,0 +1,305 @@
+/***************************************************************************
+ * Copyright (C) 2015 by *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#ifndef MPIHELPER_H
+#define MPIHELPER_H
+
+#include <string>
+#include <vector>
+#include "tools.h"
+#include "TreeCollection.h"
+#include "ObjectStream.h"
+
+#ifdef _IQTREE_MPI
+
+#include <mpi.h>
+
+#endif
+
+#define PROC_MASTER 0
+#define TREE_TAG 1 // Message contain trees
+#define STOP_TAG 2 // Stop message
+#define BOOT_TAG 3 // Message to please send bootstrap trees
+#define BOOT_TREE_TAG 4 // bootstrap tree tag
+#define LOGL_CUTOFF_TAG 5 // send logl_cutoff for ultrafast bootstrap
+
+using namespace std;
+
+class MPIHelper {
+public:
+ /**
+ * Singleton method: get one and only one getInstance of the class
+ */
+ static MPIHelper &getInstance();
+
+ /**
+ destructor
+ */
+ ~MPIHelper();
+
+ int getNumProcesses() const {
+ return numProcesses;
+ }
+
+ void setNumProcesses(int numProcesses) {
+ MPIHelper::numProcesses = numProcesses;
+ }
+
+ int getProcessID() const {
+ return processID;
+ }
+
+ bool isMaster() const {
+ return processID == PROC_MASTER;
+ }
+
+ bool isWorker() const {
+ return processID != PROC_MASTER;
+ }
+
+ void setProcessID(int processID) {
+ MPIHelper::processID = processID;
+ }
+
+ /** @return true if got any message from another process */
+ bool gotMessage();
+
+ /**
+ * Receive trees that sent to the current process
+ *
+ * @param fromAll
+ * wait until at least one tree from each remaining process has been received
+ * @param maxNumTrees
+ * Only received up to maxNumTrees to prevent the function to block because it can constantly receive
+ * new trees
+ * @param trees[OUT]
+ * Trees received from other processes
+ * @param tag MPI tag
+ */
+ void receiveTrees(bool fromAll, int maxNumTrees, TreeCollection &trees, int tag);
+
+
+ /**
+ * Receive trees that sent to the current process
+ *
+ * @param trees[OUT]
+ * Trees received from other processes
+ * @param tag MPI tag
+ * @return source process ID
+ */
+ int receiveTrees(TreeCollection &trees, int tag);
+
+ /**
+ * Send trees to all other processes
+ * @param treeStrings vector of trees
+ * @param scores vector containing scores of the trees with same order as in treeStrings
+ * @param tag used to classified the message
+ */
+ void distributeTrees(vector<string> &treeStrings, vector<double> &scores, int tag = TREE_TAG);
+
+ /**
+ * Similar to distributeTrees but only 1 tree is sent
+ * @param treeString
+ * @param score
+ * @param tag
+ */
+ void distributeTree(string treeString, double score, int tag);
+
+ /**
+ * Send trees to a dest process
+ * @param dest MPI rank of destination process
+ * @param treeStrings vector of trees
+ * @param scores vector containing scores of the trees with same order as in treeStrings
+ * @param tag used to classified the message
+ */
+ void sendTrees(int dest, vector<string> &treeStrings, vector<double> &scores, int tag);
+
+ /**
+ * Send one tree to a dest process
+ * @param dest MPI rank of destination process
+ * @param treeString NEWICK tree string
+ * @param score its score
+ * @param tag used to classified the message
+ */
+ void sendTree(int dest, string treeString, double score, int tag);
+
+ /**
+ * Blocking Send and then receive trees with a dest process
+ * @param dest MPI rank of destination process
+ * @param[in,out] treeString NEWICK tree string
+ * @param[in,out] score its score
+ * @param tag used to classified the message
+ * return the message tag
+ */
+ int sendRecvTrees(int dest, vector<string> &treeStrings, vector<double> &scores, int tag);
+
+ /**
+ * Blocking receive and then send trees with a dest process
+ * @param dest MPI rank of destination process
+ * @param[in,out] treeString NEWICK tree string
+ * @param[in,out] score its score
+ * @param tag used to classified the message
+ * return the message tag
+ */
+ int recvSendTrees(vector<string> &treeStrings, vector<double> &scores, vector<bool> &should_send, int tag);
+
+ /**
+ gather trees from workers to master
+ */
+ void gatherTrees(TreeCollection &trees);
+
+ /**
+ broadcase trees from master to works
+ */
+ void broadcastTrees(TreeCollection &trees);
+
+ /**
+ * Send a message to other process, e.g. STOP_TAG
+ */
+ void sendMsg(int tag, string msg);
+
+ /**
+ * Check if a message is received, e.g. STOP_TAG
+ */
+ bool checkMsg(int tag);
+
+ /**
+ * Check if a message is received, e.g. STOP_TAG
+ */
+ bool checkMsg(int tag, string &msg);
+
+ /** wrapper for MPI_Send a string
+ @param str string to send
+ @param dest destination process
+ @param tag message tag
+ */
+
+#ifdef _IQTREE_MPI
+ void sendString(string &str, int dest, int tag);
+
+ /** wrapper for MPI_Recv a string
+ @param[out] str string received
+ @param src source process
+ @param tag message tag
+ @return the source process that sent the message
+ */
+ int recvString(string &str, int src = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG);
+
+ /** wrapper for MPI_Send an entire Checkpoint object
+ @param ckp Checkpoint object to send
+ @param dest destination process
+ */
+ void sendCheckpoint(Checkpoint *ckp, int dest);
+
+ /** wrapper for MPI_Recv an entire Checkpoint object
+ @param[out] ckp Checkpoint object received
+ @param src source process
+ @param tag message tag
+ @return the source process that sent the message
+ */
+ int recvCheckpoint(Checkpoint *ckp, int src = MPI_ANY_SOURCE);
+
+ /**
+ wrapper for MPI_Bcast to broadcast checkpoint from Master to all Workers
+ @param ckp Checkpoint object
+ */
+ void broadcastCheckpoint(Checkpoint *ckp);
+
+ /**
+ wrapper for MPI_Gather to gather all checkpoints into Master
+ @param ckp Checkpoint object
+ */
+ void gatherCheckpoint(Checkpoint *ckp);
+#endif
+
+ void increaseTreeSent(int inc = 1) {
+ numTreeSent += inc;
+ }
+
+ void increaseTreeReceived(int inc = 1) {
+ numTreeReceived += inc;
+ }
+
+private:
+ /**
+ * Remove the buffers for finished messages
+ */
+ int cleanUpMessages();
+
+private:
+ MPIHelper() { }; // Disable constructor
+ MPIHelper(MPIHelper const &) { }; // Disable copy constructor
+ void operator=(MPIHelper const &) { }; // Disable assignment
+
+ int processID;
+
+ int numProcesses;
+
+public:
+ int getNumTreeReceived() const {
+ return numTreeReceived;
+ }
+
+ void setNumTreeReceived(int numTreeReceived) {
+ MPIHelper::numTreeReceived = numTreeReceived;
+ }
+
+ int getNumTreeSent() const {
+ return numTreeSent;
+ }
+
+ void setNumTreeSent(int numTreeSent) {
+ MPIHelper::numTreeSent = numTreeSent;
+ }
+
+ void resetNumbers() {
+ numTreeSent = 0;
+ numTreeReceived = 0;
+ numNNISearch = 0;
+ }
+
+private:
+ int numTreeSent;
+
+ int numTreeReceived;
+
+public:
+ int getNumNNISearch() const {
+ return numNNISearch;
+ }
+
+ void setNumNNISearch(int numNNISearch) {
+ MPIHelper::numNNISearch = numNNISearch;
+ }
+
+private:
+ int numNNISearch;
+
+#ifdef _IQTREE_MPI
+ // A list storing messages and the corresponding requests that have been sent from the current process.
+ // When a message has been successfully received, it will be deleted from the list
+ vector< pair<MPI_Request *, ObjectStream *> > sentMessages;
+#endif
+
+
+};
+
+#endif
diff --git a/ObjectStream.cpp b/ObjectStream.cpp
new file mode 100644
index 0000000..f06efea
--- /dev/null
+++ b/ObjectStream.cpp
@@ -0,0 +1,113 @@
+//
+// Created by tung on 6/23/15.
+//
+
+#include "ObjectStream.h"
+
+ObjectStream::ObjectStream(const char *data, size_t length) {
+ objectData = new char[length];
+ memcpy(objectData, data, length);
+ objectDataSize = length;
+}
+
+ObjectStream::ObjectStream(TreeCollection &trees) {
+ objectData = NULL;
+ objectDataSize = 0;
+ initFromTreeCollection(trees);
+}
+
+void ObjectStream::initFromTreeCollection(TreeCollection &trees) {
+ vector<string> treeStrings = trees.getTreeStrings();
+ vector<double> scores = trees.getScores();
+ vector<int> sourceProcID = trees.getSourceProcID();
+
+ char* stringData;
+ size_t stringDataSize = serializeStrings(treeStrings, stringData);
+ size_t doubleDataSize = scores.size() * sizeof(double);
+ size_t intDataSize = sourceProcID.size() * sizeof(int);
+
+ objectDataSize = sizeof(size_t) * 3 + stringDataSize + doubleDataSize + intDataSize;
+
+ if (objectData != NULL) {
+ delete[] objectData;
+ }
+ objectData = new char[objectDataSize];
+
+ char* pos = objectData;
+ // Copy the size of the string block and double block into the beginning of objectData
+ memcpy(pos, &stringDataSize, sizeof(size_t));
+ pos = pos + sizeof(size_t);
+ memcpy(pos, &doubleDataSize, sizeof(size_t));
+ pos = pos + sizeof(size_t);
+ memcpy(pos, &intDataSize, sizeof(size_t));
+ pos = pos + sizeof(size_t);
+
+ // Add string block and double block afterwards
+ memcpy(pos, stringData, stringDataSize);
+ pos = pos + stringDataSize;
+
+ memcpy(pos, scores.data(), doubleDataSize);
+ pos = pos + doubleDataSize;
+
+ memcpy(pos, sourceProcID.data(), intDataSize);
+
+ delete [] stringData;
+}
+
+TreeCollection ObjectStream::getTreeCollection() {
+ size_t metaInfo[3];
+ memcpy(metaInfo, objectData, sizeof(size_t) * 3);
+ size_t stringDataSize = metaInfo[0];
+ size_t doubleDataSize = metaInfo[1];
+ size_t intDataSize = metaInfo[2];
+ size_t numTrees = doubleDataSize / sizeof(double);
+ vector<string> treeStrings;
+ deserializeStrings(objectData + sizeof(size_t) * 3, stringDataSize, treeStrings);
+ assert(treeStrings.size() == numTrees);
+
+ double scoreArr[numTrees];
+ memcpy(scoreArr, objectData + sizeof(size_t) * 3 + stringDataSize, doubleDataSize);
+ vector<double> scores(scoreArr, scoreArr + sizeof(scoreArr) / sizeof(scoreArr[0]));
+
+ int sourceProcIDArr[numTrees];
+ memcpy(sourceProcIDArr, objectData + sizeof(size_t) * 3 + stringDataSize + doubleDataSize, intDataSize);
+ vector<int> sourceProcID(sourceProcIDArr, sourceProcIDArr + sizeof(sourceProcIDArr) / sizeof(sourceProcIDArr[0]));
+
+ TreeCollection decodedTrees(treeStrings, scores, sourceProcID);
+ return decodedTrees;
+}
+
+
+size_t ObjectStream::serializeStrings(vector<string> &strings, char *&data) {
+ size_t numStrings = strings.size();
+ size_t totalSize = 0;
+ // Determine the total bytes required
+ for (int i = 0; i < numStrings; i++) {
+ totalSize += strings[i].length() + 1;
+ }
+ data = new char[totalSize];
+ char* pos = data;
+ for (int i = 0; i < numStrings; i++) {
+ size_t length = strings[i].length();
+ const char* cString = strings[i].c_str();
+ strncpy(pos, cString, length + 1);
+ pos = pos + length + 1;
+ }
+ return totalSize;
+}
+
+void ObjectStream::deserializeStrings(char *data, size_t length, vector<string> &strings) {
+ strings.clear();
+ stringstream ss;
+ ss.str("");
+ for (int i = 0; i < length; i++) {
+ if (data[i] == '\0') {
+ strings.push_back(ss.str());
+ ss.str("");
+ } else {
+ ss << data[i];
+ }
+ }
+}
+
+
diff --git a/ObjectStream.h b/ObjectStream.h
new file mode 100644
index 0000000..af6f46d
--- /dev/null
+++ b/ObjectStream.h
@@ -0,0 +1,82 @@
+//
+// Created by tung on 6/23/15.
+//
+
+#ifndef IQTREE_OBJECTSTREAM_H
+#define IQTREE_OBJECTSTREAM_H
+#include "TreeCollection.h"
+
+/**
+ * This class is used to serialize object. It converts different object to byte stream
+ * and can also read in byte stream to reconstruct the object
+ */
+class ObjectStream {
+public:
+
+ /**
+ * Constructor
+ */
+ ObjectStream(const char* data, size_t length);
+
+ ObjectStream(TreeCollection& trees);
+
+ ObjectStream() {
+ objectData = NULL;
+ }
+
+ virtual ~ObjectStream() {
+ if (objectData != NULL)
+ delete [] objectData;
+ }
+
+ /**
+ * Convert a tree collection into the internal byte stream
+ * @param[IN] trees
+ */
+ void initFromTreeCollection(TreeCollection &trees);
+
+ /**
+ * Reconstruct TreeCollection from a byte stream
+ */
+ TreeCollection getTreeCollection();
+
+
+public:
+ size_t getDataLength() const {
+ return objectDataSize;
+ }
+
+public:
+ char *getObjectData() const {
+ return objectData;
+ }
+
+private:
+ /**
+ * Byte stream representing the object
+ */
+ char* objectData;
+
+ size_t objectDataSize;
+
+
+ /**
+ * Convert vector of strings to array of chars
+ * @param [IN] strings the vector strings
+ * @param [OUT] the char array
+ * @return size of the char array
+ */
+ size_t serializeStrings(vector<string> &strings, char *&data);
+
+ /**
+ * Convert array of chars to vector of strings
+ * @param [IN] data byte stream representing vector<string>
+ * @param [IN] length size of data
+ * @param [OUT] strings the reconstructed vector<string>
+ */
+ void deserializeStrings(char *data, size_t length, vector<string> &strings);
+
+};
+#endif // IQTREE_OBJECTSTREAM_H
+
+
diff --git a/README.md b/README.md
index 9f60b26..76274a6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,71 @@
IQ-TREE
--------
+=======
-Efficient phylogenetic software by maximum likelihood
+Efficient and versatile phylogenomic software by maximum likelihood <http://www.iqtree.org>
-Please see our github wiki for more information: <https://github.com/Cibiv/IQ-TREE/wiki>
+Introduction
+------------
+
+The IQ-TREE software was created as the successor of IQPNNI and [TREE-PUZZLE](http://www.tree-puzzle.de) (thus the name IQ-TREE). IQ-TREE was motivated by the rapid accumulation of phylogenomic data, leading to a need for efficient phylogenomic software that can handle a large amount of data and provide more complex models of sequence evolution. To this end, IQ-TREE can utilize multicore computers and distributed parallel computing to speed up the analysis. IQ-TREE automatically performs [...]
+
+As input IQ-TREE accepts all common sequence alignment formats including PHYLIP, FASTA, Nexus, Clustal and MSF. As output IQ-TREE will write a self-readable report file (name suffix `.iqtree`), a NEWICK tree file (`.treefile`) which can be visualized by tree viewer programs such as [FigTree](http://tree.bio.ed.ac.uk/software/figtree/), [Dendroscope](http://dendroscope.org) or [iTOL](http://itol.embl.de).
+
+
+Key features of IQ-TREE
+-----------------------
+
+* __Efficient search algorithm__: Fast and effective stochastic algorithm to reconstruct phylogenetic trees by maximum likelihood. IQ-TREE compares favorably to RAxML and PhyML in terms of likelihood while requiring similar amount of computing time ([Nguyen et al., 2015]).
+* __Ultrafast bootstrap__: An ultrafast bootstrap approximation (UFBoot) to assess branch supports. UFBoot is 10 to 40 times faster than RAxML rapid bootstrap and obtains less biased support values ([Minh et al., 2013]).
+* __Ultrafast model selection__: An ultrafast and automatic model selection (ModelFinder) which is 10 to 100 times faster than jModelTest and ProtTest. ModelFinder also finds best-fit partitioning scheme like PartitionFinder.
+* __Phylogenetic testing__: Several fast branch tests like SH-aLRT and aBayes test ([Anisimova et al., 2011]) and tree topology tests like the approximately unbiased (AU) test ([Shimodaira, 2002]).
+
+
+The strength of IQ-TREE is the availability of a wide variety of phylogenetic models:
+
+* __Common models__: All [common substitution models](http://www.iqtree.org/doc/Substitution-Models) for DNA, protein, codon, binary and morphological data with [rate heterogeneity among sites](http://www.iqtree.org/doc/Substitution-Models/#rate-heterogeneity-across-sites) and [ascertainment bias correction](http://www.iqtree.org/doc/Substitution-Models/#ascertainment-bias-correction) for e.g. SNP data.
+* __[Partition models](http://www.iqtree.org/doc/Complex-Models/#partition-models)__: Allowing individual models for different genomic loci (e.g. genes or codon positions), mixed data types, mixed rate heterogeneity types, linked or unlinked branch lengths between partitions.
+* __Mixture Models__: [fully customizable mixture models](http://www.iqtree.org/doc/Complex-Models/#mixture-models) and [empirical protein mixture models](http://www.iqtree.org/doc/Substitution-Models/#protein-models) and.
+
+IQ-TREE web service
+-------------------
+
+For a quick start you can also try the IQ-TREE web server, which performs online computation using a dedicated computing cluster. It is very easy to use with as few as just 3 clicks! Try it out at
+
+<http://iqtree.cibiv.univie.ac.at>
+
+
+User support
+------------
+
+Please refer to the [user documentation](http://www.iqtree.org/doc/) and [frequently asked questions](http://www.iqtree.org/doc/Frequently-Asked-Questions). If you have further questions, feedback, feature requests, and bug reports, please sign up the following Google group (if not done yet) and post a topic to the
+
+<https://groups.google.com/d/forum/iqtree>
+
+_The average response time is one working day._
+
+Citations
+---------
+
+To cite IQ-TREE please use:
+
+* L.-T. Nguyen, H.A. Schmidt, A. von Haeseler, and B.Q. Minh (2015) IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies. *Mol. Biol. Evol.*, 32, 268-274. [DOI: 10.1093/molbev/msu300](http://dx.doi.org/10.1093/molbev/msu300)
+
+For the ultrafast bootstrap (UFBoot) please cite:
+
+* B.Q. Minh, M.A.T. Nguyen, and A. von Haeseler (2013) Ultrafast approximation for phylogenetic bootstrap. *Mol. Biol. Evol.*, 30:1188-1195. [DOI: 10.1093/molbev/mst024](http://dx.doi.org/10.1093/molbev/mst024)
+
+#### Credits and Acknowledgements
+
+Some parts of the code were taken from the following packages/libraries: [Phylogenetic likelihood library](http://www.libpll.org), [TREE-PUZZLE](http://www.tree-puzzle.de),
+[BIONJ](http://dx.doi.org/10.1093/oxfordjournals.molbev.a025808), [Nexus Class Libary](http://dx.doi.org/10.1093/bioinformatics/btg319), [Eigen library](http://eigen.tuxfamily.org/),
+[SPRNG library](http://www.sprng.org), [Zlib library](http://www.zlib.net), gzstream library, [vectorclass library](http://www.agner.org/optimize/), [GNU scientific library](https://www.gnu.org/software/gsl/).
+
+
+IQ-TREE was partially funded by the [Austrian Science Fund - FWF](http://www.fwf.ac.at/) (grant no. I760-B17 from 2012-2015 and and I 2508-B29 from 2016-2019) and the [University of Vienna](https://www.univie.ac.at/) (Initiativkolleg I059-N).
+
+
+[Anisimova et al., 2011]: http://dx.doi.org/10.1093/sysbio/syr041
+[Guindon et al., 2010]: http://dx.doi.org/10.1093/sysbio/syq010
+[Minh et al., 2013]: http://dx.doi.org/10.1093/molbev/mst024
+[Nguyen et al., 2015]: http://dx.doi.org/10.1093/molbev/msu300
+[Shimodaira, 2002]: http://dx.doi.org/10.1080/10635150290069913
diff --git a/TreeCollection.cpp b/TreeCollection.cpp
new file mode 100644
index 0000000..26d702c
--- /dev/null
+++ b/TreeCollection.cpp
@@ -0,0 +1,56 @@
+//
+// Created by Tung Nguyen on 6/23/15.
+//
+
+#include "TreeCollection.h"
+#include "MPIHelper.h"
+
+using namespace std;
+
+TreeCollection::TreeCollection(vector<string>& trees, vector<double>& scores, vector<int> &sourceProcID) {
+ assert(trees.size() == scores.size());
+ this->treeStrings = trees;
+ this->scores = scores;
+ this->sourceProcID = sourceProcID;
+// this->sourceProcID.clear();
+// this->sourceProcID.insert(this->sourceProcID.end(), scores.size(), MPIHelper::getInstance().getProcessID());
+}
+
+pair<string, double> TreeCollection::getTree(int i) {
+ assert(treeStrings.size() == scores.size());
+ return std::make_pair(treeStrings[i], scores[i]);
+}
+
+void TreeCollection::clear() {
+ treeStrings.clear();
+ scores.clear();
+ sourceProcID.clear();
+}
+
+void TreeCollection::addTrees(TreeCollection &trees) {
+// for (int i = 0; i < trees.getNumTrees(); i++) {
+// treeStrings.push_back(trees.getTree(i).first);
+// scores.push_back(trees.getTree(i).second);
+//
+// }
+ treeStrings.insert(treeStrings.end(), trees.treeStrings.begin(), trees.treeStrings.end());
+ scores.insert(scores.end(), trees.scores.begin(), trees.scores.end());
+ sourceProcID.insert(sourceProcID.end(), trees.sourceProcID.begin(), trees.sourceProcID.end());
+}
+
+void TreeCollection::addTrees(CandidateSet &candidateTrees) {
+ CandidateSet::reverse_iterator rit;
+ for (rit = candidateTrees.rbegin(); rit != candidateTrees.rend(); rit++) {
+ treeStrings.push_back(rit->second.tree);
+ scores.push_back(rit->first);
+ sourceProcID.push_back(MPIHelper::getInstance().getProcessID());
+ }
+}
+
+
+
+size_t TreeCollection::getNumTrees() {
+ size_t numTrees = treeStrings.size();
+ assert(numTrees == scores.size());
+ return numTrees;
+}
diff --git a/TreeCollection.h b/TreeCollection.h
new file mode 100644
index 0000000..aecf328
--- /dev/null
+++ b/TreeCollection.h
@@ -0,0 +1,63 @@
+//
+// Created by tung on 6/23/15.
+//
+
+#ifndef IQTREE_TREECOLLECTION_H
+#define IQTREE_TREECOLLECTION_H
+#include "candidateset.h"
+
+/**
+ * A container for a set of trees together with their scores
+ */
+class TreeCollection {
+private:
+ vector<string> treeStrings;
+ vector<double> scores;
+ vector<int> sourceProcID;
+public:
+
+ /**
+ * Constructor
+ */
+ TreeCollection() {};
+
+ TreeCollection(vector<string>& trees, vector<double>& scores, vector<int> &sourceProcID);
+
+ void addTrees(TreeCollection &trees);
+
+ void addTrees(CandidateSet& candidateTrees);
+
+
+ /*
+ * Get i-th tree and its score
+ */
+ pair<string, double> getTree(int i);
+
+ void clear();
+
+ void setTreeStrings(const vector<string> treeStrings) {
+ TreeCollection::treeStrings = treeStrings;
+ }
+
+ void setScores(const vector<double> scores) {
+ TreeCollection::scores = scores;
+ }
+
+ size_t getNumTrees();
+
+ const vector<string> &getTreeStrings() const {
+ return treeStrings;
+ }
+
+ const vector<double> &getScores() const {
+ return scores;
+ }
+
+ const vector<int> &getSourceProcID() const {
+ return sourceProcID;
+ }
+
+};
+
+
+#endif //IQTREE_TREECOLLECTION_H
diff --git a/alignment.cpp b/alignment.cpp
index dcab0ff..f935d2b 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -790,6 +790,7 @@ void Alignment::orderPatternByNumChars() {
}
delete [] ptn_order;
delete [] num_chars;
+// cout << ordered_pattern.size() << " ordered_pattern" << endl;
}
void Alignment::ungroupSitePattern()
@@ -1882,7 +1883,7 @@ int Alignment::buildRetainingSites(const char *aln_site_list, IntVector &kept_si
}
if (exclude_const_sites) {
for (j = 0; j < kept_sites.size(); j++)
- if (at(site_pattern[j]).isConst())
+ if (at(site_pattern[j]).isInvariant())
kept_sites[j] = 0;
}
@@ -1990,10 +1991,12 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t
site_pattern.resize(aln->getNSite(), -1);
clear();
pattern_index.clear();
- int site = 0;
+ int site = 0, removed_sites = 0;
VerboseMode save_mode = verbose_mode;
verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern
- for (iterator pit = aln->begin(); pit != aln->end(); pit++) {
+// for (iterator pit = aln->begin(); pit != aln->end(); pit++) {
+ for (site = 0; site < aln->getNSite(); site++) {
+ iterator pit = aln->begin() + (aln->getPatternID(site));
Pattern pat;
int true_char = 0;
for (it = seq_id.begin(); it != seq_id.end(); it++) {
@@ -2001,12 +2004,14 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t
if (ch != STATE_UNKNOWN) true_char++;
pat.push_back(ch);
}
- if (true_char < min_true_char) continue;
- addPattern(pat, site, (*pit).frequency);
- for (int i = 0; i < (*pit).frequency; i++)
- site_pattern[site++] = size()-1;
+ if (true_char < min_true_char)
+ removed_sites++;
+ else
+ addPattern(pat, site-removed_sites);
+// for (int i = 0; i < (*pit).frequency; i++)
+// site_pattern[site++] = size()-1;
}
- site_pattern.resize(site);
+ site_pattern.resize(aln->getNSite() - removed_sites);
verbose_mode = save_mode;
countConstSite();
buildSeqStates();
diff --git a/alignment.h b/alignment.h
index 781a378..eee6169 100644
--- a/alignment.h
+++ b/alignment.h
@@ -163,7 +163,7 @@ public:
/** order pattern by number of character states and return in ptn_order
*/
- void orderPatternByNumChars();
+ virtual void orderPatternByNumChars();
/**
* un-group site-patterns, i.e., making #sites = #patterns and pattern frequency = 1 for all patterns
@@ -579,6 +579,11 @@ public:
*/
virtual double computeUnconstrainedLogL();
+ /**
+ * @return number of states, if it is a partition model, return max num_states across all partitions
+ */
+ virtual int getMaxNumStates() { return num_states; }
+
/** either SEQ_BINARY, SEQ_DNA, SEQ_PROTEIN, SEQ_MORPH, or SEQ_CODON */
SeqType seq_type;
diff --git a/candidateset.cpp b/candidateset.cpp
index fa5ea81..f6a5fc8 100644
--- a/candidateset.cpp
+++ b/candidateset.cpp
@@ -2,30 +2,40 @@
* candidateset.cpp
*
* Created on: Jun 1, 2014
- * Author: Tung Nguyen
+ * Author: Tung Nguyen
+ * Email: nltung at gmail.com
*/
-#include "phylotree.h"
+#include "iqtree.h"
#include "candidateset.h"
+#include "MPIHelper.h"
-void CandidateSet::init(Alignment* aln, Params *params) {
+void CandidateSet::init(Alignment *aln, int maxSize) {
this->aln = aln;
- this->params = params;
+ this->maxSize = maxSize;
}
CandidateSet::~CandidateSet() {
}
CandidateSet::CandidateSet() : CheckpointFactory() {
- aln = NULL;
- params = NULL;
+ aln = NULL;
+ numStableSplits = 0;
+ this->maxSize = Params::getInstance().maxCandidates;
+}
+
+void CandidateSet::initTrees(CandidateSet& candSet) {
+ int curMaxSize = this->maxSize;
+ *this = candSet;
+ setMaxSize(curMaxSize);
}
+
void CandidateSet::saveCheckpoint() {
checkpoint->startStruct("CandidateSet");
- int ntrees = min(params->numNNITrees, (int)size());
- checkpoint->startList(params->numNNITrees);
+ int ntrees = min(Params::getInstance().numNNITrees, (int) size());
+ checkpoint->startList(Params::getInstance().numNNITrees);
for (reverse_iterator it = rbegin(); it != rend() && ntrees > 0; it++, ntrees--) {
checkpoint->addListElement();
stringstream ss;
@@ -46,8 +56,8 @@ void CandidateSet::restoreCheckpoint() {
checkpoint->startStruct("CandidateSet");
double score;
string tree;
- checkpoint->startList(params->numNNITrees);
- for (int i = 0; i < params->numNNITrees; i++) {
+ checkpoint->startList(Params::getInstance().numNNITrees);
+ for (int i = 0; i < Params::getInstance().numNNITrees; i++) {
checkpoint->addListElement();
string str;
if (!checkpoint->getString("", str)) {
@@ -57,68 +67,84 @@ void CandidateSet::restoreCheckpoint() {
ss >> score >> tree;
// CKP_RESTORE(tree);
update(tree, score);
-
+
}
checkpoint->endList();
checkpoint->endStruct();
}
-vector<string> CandidateSet::getBestTrees() {
- vector<string> res;
- double bestScore = rbegin()->first;
- for (reverse_iterator rit = rbegin(); rit != rend() && rit->second.score == bestScore; rit++) {
- res.push_back(rit->second.tree);
- }
- return res;
-}
-
-string CandidateSet::getRandCandTree() {
- assert(!empty());
- if (empty())
- return "";
- int id = random_int(min(params->popSize, (int)size()) );
- for (reverse_iterator i = rbegin(); i != rend(); i++, id--)
- if (id == 0)
- return i->second.tree;
- assert(0);
- return "";
-}
-
-vector<string> CandidateSet::getTopTrees(int numTree) {
- assert(numTree <= params->maxCandidates);
- if (numTree == 0) {
- numTree = params->maxCandidates;
- }
- vector<string> res;
- int cnt = numTree;
- for (reverse_iterator rit = rbegin(); rit != rend() && cnt > 0; rit++, cnt--) {
- res.push_back(rit->second.tree);
- }
- return res;
-}
-
-vector<string> CandidateSet::getBestLocalOptimalTrees(int numTree) {
- assert(numTree <= params->maxCandidates);
- if (numTree == 0) {
- numTree = params->maxCandidates;
- }
- vector<string> res;
- int cnt = numTree;
- for (reverse_iterator rit = rbegin(); rit != rend() && cnt > 0; rit++) {
- if (rit->second.localOpt) {
- res.push_back(rit->second.tree);
- cnt--;
- }
- }
- return res;
+string CandidateSet::getRandTopTree(int numTopTrees) {
+ assert(!empty());
+ if (empty())
+ return "";
+ int id = random_int(min(numTopTrees, (int) size()));
+ for (reverse_iterator it = rbegin(); it != rend(); it++) {
+ if (id == 0)
+ return it->second.tree;
+ id--;
+ }
+ assert(0);
+ return "";
+}
+
+vector<string> CandidateSet::getBestTreeStrings(int numTree) {
+ if (numTree == 0 || numTree > maxSize) {
+ numTree = maxSize;
+ }
+ vector<string> res;
+ int cnt = numTree;
+ for (reverse_iterator rit = rbegin(); rit != rend() && cnt > 0; rit++, cnt--) {
+ res.push_back(rit->second.tree);
+ }
+ return res;
}
+
+vector<string> CandidateSet::getBestTreeStringsForProcess(int numTree) {
+ int numProc = MPIHelper::getInstance().getNumProcesses();
+ int procID = MPIHelper::getInstance().getProcessID();
+
+ if (numTree < numProc)
+ numTree = numProc; // BUG FIX: make sure that each process gets at least 1 tree
+
+ vector<string> alltrees = getBestTreeStrings(numTree);
+ if (numProc == 1) return alltrees;
+
+ if (numTree == 0 || numTree > alltrees.size()) {
+ numTree = alltrees.size();
+ }
+ int cnt = 0;
+ vector<string> res;
+ // process will get trees indexed procID, procID+1*numProc, procID+2*numProc,...
+ for (cnt = procID; cnt < numTree; cnt+=numProc) {
+ res.push_back(alltrees[cnt]);
+ }
+ return res;
+}
+
+
+//vector<string> CandidateSet::getBestLocalOptimalTrees(int numTree) {
+// assert(numTree <= params->maxPopSize);
+// if (numTree == 0) {
+// numTree = params->maxPopSize;
+// }
+// vector<string> res;
+// int cnt = numTree;
+// for (reverse_iterator rit = rbegin(); rit != rend() && cnt > 0; rit++) {
+// if (rit->second.localOpt) {
+// res.push_back(rit->second.tree);
+// cnt--;
+// }
+// }
+// return res;
+//}
+
/*
bool CandidateSet::replaceTree(string tree, double score) {
CandidateTree candidate;
candidate.tree = tree;
candidate.score = score;
- candidate.topology = getTopology(tree);
+ candidate.topology = getTopologyString(tree);
if (treeTopologyExist(candidate.topology)) {
topologies[candidate.topology] = score;
for (reverse_iterator i = rbegin(); i != rend(); i++) {
@@ -133,6 +159,53 @@ bool CandidateSet::replaceTree(string tree, double score) {
}
return true;
}
+*/
+
+
+void CandidateSet::addCandidateSplits(string treeString) {
+ vector<string> taxaNames = aln->getSeqNames();
+ MTree tree(treeString, taxaNames, Params::getInstance().is_rooted);
+ SplitGraph allSplits;
+ tree.convertSplits(allSplits);
+ for (SplitGraph::iterator splitIt = allSplits.begin(); splitIt != allSplits.end(); splitIt++) {
+ int value;
+ Split *sp = candSplits.findSplit(*splitIt, value);
+ if (sp != NULL) {
+ sp->setWeight(value + 1);
+ candSplits.setValue(sp, value + 1);
+ } else {
+ sp = new Split(*(*splitIt));
+ sp->setWeight(1);
+ candSplits.insertSplit(sp, 1);
+ }
+ }
+ candSplits.setNumTree(candSplits.getNumTree() + 1);
+}
+
+void CandidateSet::removeCandidateSplits(string treeString) {
+ vector<string> taxaNames = aln->getSeqNames();
+ MTree tree(treeString, taxaNames, Params::getInstance().is_rooted);
+ SplitGraph allSplits;
+ tree.convertSplits(allSplits);
+ for (SplitGraph::iterator splitIt = allSplits.begin(); splitIt != allSplits.end(); splitIt++) {
+ int value = 0;
+ Split *sp;
+ sp = candSplits.findSplit(*splitIt, value);
+ if (value == 0) {
+ cout << "Cannot find split: ";
+ (*splitIt)->report(cout);
+ exit(1);
+ } else {
+ assert(sp->getWeight() >= 1);
+ if (sp->getWeight() > 1) {
+ sp->setWeight(value - 1);
+ } else {
+ candSplits.eraseSplit(*splitIt);
+ }
+ }
+ }
+ candSplits.setNumTree(candSplits.getNumTree() - 1);
+}
string CandidateSet::getNextCandTree() {
string tree;
@@ -147,78 +220,84 @@ string CandidateSet::getNextCandTree() {
void CandidateSet::initParentTrees() {
if (parentTrees.empty()) {
- int count = params->popSize;
- for (reverse_iterator i = rbegin(); i != rend() && count >0 ; i++, count--) {
+ int count = Params::getInstance().popSize;
+ for (reverse_iterator i = rbegin(); i != rend() && count > 0; i++, count--) {
parentTrees.push(i->second.tree);
//cout << i->first << endl;
}
}
}
-*/
-bool CandidateSet::update(string tree, double score, bool localOpt) {
- bool newTree = true;
- CandidateTree candidate;
- candidate.score = score;
- candidate.topology = getTopology(tree);
- candidate.localOpt = localOpt;
-// cout << "Updating candidate tree " << tree << endl;
- candidate.tree = tree;
-
- if (treeTopologyExist(candidate.topology)) {
- newTree = false;
- /* If tree topology already exist but the score is better, we replace the old one
- by the new one (with new branch lengths) and update the score */
- if (topologies[candidate.topology] < score) {
- removeCandidateTree(candidate.topology);
- topologies[candidate.topology] = score;
- // insert tree into candidate set
- insert(CandidateSet::value_type(score, candidate));
- } else if (candidate.localOpt) {
- CandidateSet::iterator treePtr = getCandidateTree(candidate.topology);
- treePtr->second.localOpt = candidate.localOpt;
- }
- } else {
- if (getWorstScore() < score && size() >= params->maxCandidates) {
- // remove the worst-scoring tree
- topologies.erase(begin()->second.topology);
- erase(begin());
- }
- CandidateSet::iterator it = insert(CandidateSet::value_type(score, candidate));
- topologies[candidate.topology] = score;
- if (params->fix_stable_splits && getNumLocalOptTrees() >= params->numSupportTrees) {
- int it_pos = distance(it, end());
- // The new tree is one of the numSupportTrees best trees.
- // Thus recompute supported splits
- if (it_pos <= params->numSupportTrees) {
- int nSupportedSplits = computeSplitSupport(params->numSupportTrees);
- cout << ((double) nSupportedSplits / (aln->getNSeq() - 3)) * 100
- << " % of the splits have 100% support and can be fixed." << endl;
- }
- }
- }
- assert(topologies.size() == size());
- return newTree;
+
+
+int CandidateSet::update(string newTree, double newScore) {
+ // Do not update candidate set if the new tree has worse score than the
+ // worst tree in the candidate set
+ if (newScore < begin()->first && size() >= maxSize) {
+ return -2;
+ }
+ CandidateTree candidate;
+ candidate.score = newScore;
+ candidate.topology = convertTreeString(newTree);
+ candidate.tree = newTree;
+
+ int treePos;
+ CandidateSet::iterator candidateTreeIt;
+
+ if (treeTopologyExist(candidate.topology)) {
+ // update new score if it is better the old score
+ double oldScore = topologies[candidate.topology];
+ if (oldScore < newScore) {
+ removeCandidateTree(candidate.topology);
+ insert(CandidateSet::value_type(newScore, candidate));
+ topologies[candidate.topology] = newScore;
+ }
+ assert(topologies.size() == size());
+ return -1;
+ }
+
+ candidateTreeIt = insert(CandidateSet::value_type(newScore, candidate));
+ topologies[candidate.topology] = newScore;
+
+ if (size() > maxSize) {
+ removeWorstTree();
+ }
+ assert(topologies.size() == size());
+
+ treePos = distance(candidateTreeIt, end());
+
+ return treePos;
}
vector<double> CandidateSet::getBestScores(int numBestScore) {
- if (numBestScore == 0)
- numBestScore = size();
- vector<double> res;
- for (reverse_iterator rit = rbegin(); rit != rend() && numBestScore > 0; rit++, numBestScore--) {
- res.push_back(rit->first);
- }
- return res;
+ if (numBestScore == 0)
+ numBestScore = size();
+ vector<double> res;
+ for (reverse_iterator rit = rbegin(); rit != rend() && numBestScore > 0; rit++, numBestScore--) {
+ res.push_back(rit->first);
+ }
+ return res;
}
double CandidateSet::getBestScore() {
- if (size() == 0)
- return -DBL_MAX;
- else
- return rbegin()->first;
+ if (size() == 0)
+ return -DBL_MAX;
+ else
+ return rbegin()->first;
}
-double CandidateSet::getWorstScore() {
- return begin()->first;
+string CandidateSet::convertTreeString(string treeString, int format) {
+ MTree mtree;
+ stringstream str;
+ str << treeString;
+ str.seekg(0, ios::beg);
+ mtree.readTree(str, Params::getInstance().is_rooted);
+ mtree.assignLeafID();
+ string rootName = "0";
+ mtree.root = mtree.findLeafName(rootName);
+
+ ostringstream ostr;
+ mtree.printTree(ostr, format);
+ return ostr.str();
}
string CandidateSet::getTopology(string tree) {
@@ -227,119 +306,220 @@ string CandidateSet::getTopology(string tree) {
// mtree.aln = this->aln;
// mtree.setParams(params);
MTree mtree;
-
- stringstream str;
- str << tree;
- str.seekg(0, ios::beg);
+
+ stringstream str;
+ str << tree;
+ str.seekg(0, ios::beg);
// freeNode();
- mtree.readTree(str, params->is_rooted);
+ mtree.readTree(str, Params::getInstance().is_rooted);
// mtree.setAlignment(aln);
// mtree.setRootNode(params->root);
mtree.assignLeafID();
string x = "0";
mtree.root = mtree.findLeafName(x);
-// mtree.readTreeString(tree);
-// mtree.setRootNode(params->root);
-
- ostringstream ostr;
- mtree.printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
- return ostr.str();
+ ostringstream ostr;
+ mtree.printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
+ return ostr.str();
}
double CandidateSet::getTopologyScore(string topology) {
- assert(topologies.find(topology) != topologies.end());
- return topologies[topology];
+ assert(topologies.find(topology) != topologies.end());
+ return topologies[topology];
}
void CandidateSet::clear() {
- multimap<double, CandidateTree>::clear();
- clearTopologies();
+ multimap<double, CandidateTree>::clear();
+ clearTopologies();
}
void CandidateSet::clearTopologies() {
- topologies.clear();
+ topologies.clear();
}
CandidateSet CandidateSet::getBestCandidateTrees(int numTrees) {
- CandidateSet res;
- if (numTrees >= size())
- numTrees = size();
- for (reverse_iterator rit = rbegin(); rit != rend() && numTrees > 0; rit++, numTrees--) {
- res.insert(*rit);
- }
- return res;
+ CandidateSet res;
+ if (numTrees >= size() || numTrees == 0)
+ numTrees = (int) size();
+
+ for (reverse_iterator rit = rbegin(); rit != rend() && numTrees > 0; rit++, numTrees--) {
+ res.insert(*rit);
+ }
+ return res;
+}
+
+void CandidateSet::getAllTrees(vector<string> &trees, vector<double> &scores, int format) {
+ trees.clear();
+ scores.clear();
+
+ for (reverse_iterator rit = rbegin(); rit != rend(); rit++) {
+ if (format != -1) {
+ trees.push_back(convertTreeString(rit->second.tree, format));
+ } else {
+ trees.push_back(rit->second.tree);
+ }
+ scores.push_back(rit->first);
+ }
}
bool CandidateSet::treeTopologyExist(string topo) {
- return (topologies.find(topo) != topologies.end());
+ return (topologies.find(topo) != topologies.end());
}
bool CandidateSet::treeExist(string tree) {
- return treeTopologyExist(getTopology(tree));
+ return treeTopologyExist(convertTreeString(tree));
}
CandidateSet::iterator CandidateSet::getCandidateTree(string topology) {
- for (CandidateSet::reverse_iterator rit = rbegin(); rit != rend(); rit++) {
- if (rit->second.topology == topology)
- return --(rit.base());
- }
- return end();
+ for (CandidateSet::reverse_iterator rit = rbegin(); rit != rend(); rit++) {
+ if (rit->second.topology == topology)
+ return --(rit.base());
+ }
+ return end();
}
void CandidateSet::removeCandidateTree(string topology) {
- bool removed = false;
- for (CandidateSet::reverse_iterator rit = rbegin(); rit != rend(); rit++) {
- if (rit->second.topology == topology) {
- erase( --(rit.base()) );
- topologies.erase(topology);
- removed = true;
- break;
- }
- }
- assert(removed);
-}
-
-bool CandidateSet::isStableSplit(Split& sp) {
- return stableSplit.containSplit(sp);
-}
-
-int CandidateSet::computeSplitSupport(int numTree) {
- stableSplit.clear();
- if (numTree == 0)
- numTree = getNumLocalOptTrees();
- SplitIntMap hash_ss;
- SplitGraph sg;
- MTreeSet boot_trees;
- int numMaxSupport = 0;
- vector<string> trees = getBestLocalOptimalTrees(numTree);
- assert(trees.size() > 1);
- int maxSupport = trees.size();
- boot_trees.init(trees, aln->getSeqNames(), params->is_rooted);
- boot_trees.convertSplits(aln->getSeqNames(), sg, hash_ss, SW_COUNT, -1, NULL, false);
-
- for (SplitIntMap::iterator it = hash_ss.begin(); it != hash_ss.end(); it++) {
- if (it->second == maxSupport && it->first->countTaxa() > 1) {
- numMaxSupport++;
- Split* supportedSplit = new Split(*(it->first));
- stableSplit.push_back(supportedSplit);
- }
- }
- //cout << "Number of supported splits = " << numMaxSupport << endl;
- return numMaxSupport;
-}
-
-void CandidateSet::setAln(Alignment* aln) {
- this->aln = aln;
-}
-
-int CandidateSet::getNumLocalOptTrees() {
- int numLocalOptima = 0;
- for (reverse_iterator rit = rbegin(); rit != rend(); rit++) {
- if (rit->second.localOpt) {
- numLocalOptima++;
- }
- }
- return numLocalOptima;
+ bool removed = false;
+ double treeScore;
+ // Find the score of the topology
+ treeScore = topologies[topology];
+ // Remove the topology
+ topologies.erase(topology);
+ pair<CandidateSet::iterator, CandidateSet::iterator> treeItPair;
+ // Find all trees with that score
+ treeItPair = equal_range(treeScore);
+ CandidateSet::iterator it;
+ for (it = treeItPair.first; it != treeItPair.second; ++it) {
+ if (it->second.topology == topology) {
+ erase(it);
+ removed = true;
+ break;
+ }
+ }
+ assert(removed);
+}
+
+
+void CandidateSet::removeWorstTree() {
+ topologies.erase(begin()->second.topology);
+ erase(begin());
+}
+
+int CandidateSet::computeSplitOccurences(double supportThreshold) {
+ candSplits.clear();
+ candSplits.setNumTree(size());
+
+ /* Store all splits in the best trees in candSplits.
+ * The variable numTree in SpitInMap is the number of trees, from which the splits are converted.
+ */
+ CandidateSet::iterator treeIt;
+ //vector<string> taxaNames = aln->getSeqNames();
+ for (treeIt = begin(); treeIt != end(); treeIt++) {
+ MTree tree(treeIt->second.tree, Params::getInstance().is_rooted);
+ SplitGraph splits;
+ tree.convertSplits(splits);
+ SplitGraph::iterator itg;
+ for (itg = splits.begin(); itg != splits.end(); itg++) {
+ int value;
+ Split *sp = candSplits.findSplit(*itg, value);
+ if (sp != NULL) {
+ int newHashWeight = value + 1;
+ double newSupport = (double) newHashWeight / (double) candSplits.getNumTree();
+ sp->setWeight(newSupport);
+ candSplits.setValue(sp, newHashWeight);
+ }
+ else {
+ sp = new Split(*(*itg));
+ sp->setWeight(1.0 / (double) candSplits.getNumTree());
+ candSplits.insertSplit(sp, 1);
+ }
+ }
+ }
+ int newNumStableSplits = countStableSplits(supportThreshold);
+ if (verbose_mode >= VB_MED) {
+ cout << ((double) newNumStableSplits / (aln->getNSeq() - 3)) * 100;
+ cout << " % of the splits are stable (support threshold " << supportThreshold;
+ cout << " from " << candSplits.getNumTree() << " trees)" << endl;
+ }
+
+ return numStableSplits;
+}
+
+int CandidateSet::countStableSplits(double thresHold) {
+ if (thresHold >= 1.0)
+ thresHold = 0.99;
+ if (candSplits.empty())
+ return 0;
+ int numMaxSupport = 0;
+ for (SplitIntMap::iterator it = candSplits.begin(); it != candSplits.end(); it++) {
+ if (it->first->getWeight() >= thresHold && it->first->countTaxa() > 1) {
+ //cout << "Stable support: " << it->first->getWeight() << endl;
+ numMaxSupport++;
+ }
+ }
+ return numMaxSupport;
+}
+
+void CandidateSet::reportStableSplits() {
+ if (candSplits.empty()) {
+ cout << "The set of stable splits is empty! " << endl;
+ return;
+ }
+
+// int numMaxSupport = 0;
+ for (SplitIntMap::iterator it = candSplits.begin(); it != candSplits.end(); it++) {
+ if (it->second == candSplits.getNumTree() && it->first->countTaxa() > 1) {
+ cout << it->first->getWeight() << " / " << candSplits.getNumTree() << endl;
+ assert(it->first->getWeight() == candSplits.getNumTree());
+ it->first->report(cout);
+ }
+ }
+}
+
+void CandidateSet::setAln(Alignment *aln) {
+ this->aln = aln;
}
+
+CandidateSet CandidateSet::getCandidateTrees(double score) {
+ CandidateSet res;
+ for (CandidateSet::iterator it = begin(); it != end(); it++) {
+ if (abs(it->first - score) < 0.1) {
+ res.insert(*it);
+ }
+ }
+ return res;
+}
+
+void CandidateSet::printTrees(string suffix) {
+ ofstream outTrees, outLHs;
+ string outTreesFile = string(Params::getInstance().out_prefix) + "." + suffix;
+ string outLHsFile = string(Params::getInstance().out_prefix) + "." + suffix + "_lh";
+ outTrees.open(outTreesFile.c_str());
+ outLHs.open(outLHsFile.c_str());
+ outLHs.precision(15);
+ for (reverse_iterator rit = rbegin(); rit != rend(); rit++) {
+ outLHs << rit->first << endl;
+ outTrees << rit->second.topology << endl;
+ }
+ outTrees.close();
+ outLHs.close();
+}
+
+void CandidateSet::recomputeLoglOfAllTrees(IQTree &treeObject) {
+ vector<string> allTreeStrings = getBestTreeStrings();
+ for (vector<string>:: iterator it = allTreeStrings.begin(); it != allTreeStrings.end(); it++) {
+ treeObject.readTreeString(*it);
+ double score = treeObject.optimizeAllBranches(1);
+ update(treeObject.getTreeString(), score);
+ }
+}
+
+
+
+
+
+
+
+
+
diff --git a/candidateset.h b/candidateset.h
index 48e9784..a7f3279 100644
--- a/candidateset.h
+++ b/candidateset.h
@@ -1,18 +1,37 @@
-/*
- * candidateset.h
- *
- * Created on: Jun 1, 2014
- * Author: Tung Nguyen
- */
+/***************************************************************************
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
#ifndef CANDIDATESET_H_
#define CANDIDATESET_H_
+//#include "phylotree.h"
#include "tools.h"
#include "alignment.h"
#include "mtreeset.h"
#include <stack>
#include "checkpoint.h"
+
+class IQTree;
+
struct CandidateTree {
/**
@@ -21,7 +40,6 @@ struct CandidateTree {
*/
string tree;
-
/**
* tree topology WITHOUT branch lengths
* and WITH TAXON ID (instead of taxon names)
@@ -33,15 +51,6 @@ struct CandidateTree {
* log-likelihood or parsimony score
*/
double score;
-
- /**
- * Indicate whether the tree is NNI locally optimal.
- * The reason to have this variable is that if the -reduction is
- * enabled, we will also store non-locally optimal trees in the set.
- * This is done to identify trees that belong to the same basin of attraction
- */
- bool localOpt;
-
};
@@ -51,12 +60,21 @@ struct CandidateTree {
class CandidateSet : public multimap<double, CandidateTree>, public CheckpointFactory {
public:
+
/**
* Initialization
*/
- void init(Alignment* aln, Params *params);
+ void init(Alignment* aln, int maxSize);
+
+ CandidateSet();
- CandidateSet();
+ CandidateSet(int maxSize);
+
+ /**
+ * Replace the current candidate trees by those in another candidate set
+ * @param candSet the candidate set whose trees will be copied over
+ */
+ void initTrees(CandidateSet& candSet);
/**
save object into the checkpoint
@@ -69,9 +87,10 @@ public:
virtual void restoreCheckpoint();
/**
- * return randomly one candidate tree from max_candidate
+ * return randomly one of the current best trees
+ * @param numTopTrees [IN] Number of current best trees, from which a random tree is chosen.
*/
- string getRandCandTree();
+ string getRandTopTree(int numTopTrees);
/**
* return the next parent tree for reproduction.
@@ -79,7 +98,7 @@ public:
* been used for reproduction. If all candidate trees have been used, we select the
* current best trees as the new parent trees
*/
-// string getNextCandTree();
+ string getNextCandTree();
/**
* Replace an existing tree in the candidate set
@@ -92,21 +111,21 @@ public:
/**
* create the parent tree set containing top trees
*/
-// void initParentTrees();
+ void initParentTrees();
/**
- * update/insert \a tree into the candidate set if its score is higher than the worst tree
- *
- * @param tree
- * The new tree string (with branch lengths)
- * @param score
- * The score (ML or parsimony) of \a tree
- * @param localOpt
- * Tells whether \a tree is a locally optimal (DEFAULT: true)
- * @return false if tree topology already exists
+ * update/insert \a tree into the candidate set if its score is higher than the worst tree
*
+ * @param tree
+ * The new tree string (with branch lengths)
+ * @param score
+ * The score (ML or parsimony) of \a tree
+ * @return
+ * Relative position of the new tree to the current best tree.
+ * Return -1 if the tree topology already existed
+ * Return -2 if the candidate set is not updated
*/
- bool update(string tree, double score, bool localOpt = true);
+ int update(string newTree, double newScore);
/**
* Get the \a numBestScores best scores in the candidate set
@@ -119,13 +138,6 @@ public:
vector<double> getBestScores(int numBestScores = 0);
/**
- * Get the worst score
- *
- * @return the worst score
- */
- double getWorstScore();
-
- /**
* Get best score
*
* @return the best score
@@ -140,24 +152,26 @@ public:
* @return
* Vector of current best trees
*/
- vector<string> getTopTrees(int numTree = 0);
+ vector<string> getBestTreeStrings(int numTree = 0);
/**
- * Get \a numTree best locally optimal trees
- * @param numTree
- * Number of locally optimal trees
- * @return
- * Vector of current best locally optimal trees
+ * Get \a numTree top scoring trees for this MPI process. Also work for sequential version.
+ *
+ * @param numTree
+ * Number of top scoring trees
+ * @return
+ * Vector of current best trees
*/
- vector<string> getBestLocalOptimalTrees(int numTree = 0);
+ vector<string> getBestTreeStringsForProcess(int totalNumTree);
/**
- * Get tree(s) with the best score. There could be more than one
- * tree that share the best score (this happens frequently with parsimony)
- * @return
- * A vector containing trees with the best score
+ * Return a set of trees and a set of scores
+ *
+ * @param trees vector of trees
+ * @param scores vector of tree scores
+ * @param treeFormat the NEWICK format used for tree string (WT_TAXON_ID, WT_BR_LEN, ..)
*/
- vector<string> getBestTrees();
+ void getAllTrees(vector<string> &trees, vector<double> &scores, int treeFormat = -1);
/**
* destructor
@@ -186,12 +200,25 @@ public:
*
* @param tree
* The newick tree string, from which the topology string will be generated
+ * @param convertOption
+ * Use the same options as printTree() (WT_ID, WT_BR_LEN, ...)
* @return
* Newick string of the tree topology
*/
- string getTopology(string tree);
+ string convertTreeString(const string tree, int format = WT_TAXON_ID | WT_SORT_TAXA);
/**
+ * Return a unique topology (sorted by taxon names, rooted at taxon with alphabetically smallest name)
+ * without branch lengths
+ *
+ * @param tree
+ * The newick tree string, from which the topology string will be generated
+ * @return
+ * Newick string of the tree topology
+ */
+ string getTopology(string tree);
+
+ /**
* return the score of \a topology
*
* @param topology
@@ -212,18 +239,36 @@ public:
void clearTopologies();
/**
- * Compute the split support from the \a numTree best local optimal trees in the candidate sets
- * @param numTree the number of best trees used to calculate support values
- * @return number of splits with 100% support value
+ * Collect all splits from the set of current best trees and compute for each of them the number of occurances.
+ *
+ * @param supportThres
+ * a number in (0,1] representing the support value threshold for stable splits
+ * @return number of splits with 100% support value
*/
- int computeSplitSupport(int numTree = 0);
+ int computeSplitOccurences(double supportThres);
+
+ /**
+ * Get number of stable splits
+ * @param thresHold A number between (0,1.0], all splits have support values above this threshold
+ * are considered stable
+ */
+ int countStableSplits(double thresHold);
+
+ void reportStableSplits();
/**
- * Check whether the
- * @param sp the split to check, must have the same taxon set as the trees in CandidateSet.
- * @return true if the \a supportedSplits contain \a sp, false otherwise.
+ * Update the set of stable split when a new tree is inserted
+ * to the set of best trees used for computing stable splits.
+ *
+ * This function will remove all splits that belong to oldTree and add all
+ * splits of newTree
+ *
+ * @param
+ * oldTree tree that will be replace by \a newTree
+ * @param
+ * newTree the new tree
*/
- bool isStableSplit(Split& sp);
+ void updateStableSplit(string oldTree, string newTree);
/**
* Return a pointer to the \a CandidateTree that has topology equal to \a topology
@@ -233,50 +278,104 @@ public:
iterator getCandidateTree(string topology);
/**
- * Remove the \a CandidateTree with topology equal to \a topology
+ * Remove candidate trees with topology equal to the specified topology
* @param topology
*/
void removeCandidateTree(string topology);
+ /**
+ * Remove the worst tree in the candidate set
+ */
+ void removeWorstTree();
+
/* Getter and Setter function */
void setAln(Alignment* aln);
- int getMaxCandidates() const;
- void setMaxCandidates(int maxCandidates);
- int getPopSize() const;
- void setPopSize(int popSize);
- void setIsRooted(bool isRooted);
+
const StringDoubleHashMap& getTopologies() const {
return topologies;
}
- /**
- * get number of locally optimal trees in the set
- * @return
- */
- int getNumLocalOptTrees();
-
/**
- * Return a CandidateSet containing \a numTrees of current best candidate trees
+ * Return a CandidateSet containing \a numTrees candidate trees
* @param numTrees
* @return
*/
- CandidateSet getBestCandidateTrees(int numTrees);
+ CandidateSet getBestCandidateTrees(int numTrees = 0);
- SplitGraph& getStableSplits() {
- return stableSplit;
+ /**
+ * Return a set of trees whose score are equal \a score
+ */
+ CandidateSet getCandidateTrees(double score);
+
+
+ SplitIntMap& getCandSplits() {
+ return candSplits;
}
+ /**
+ * @brief Get a random subset containing \a numSplit from the
+ * set of stable splits.
+ * @param
+ * numSplit size of the subset
+ * @param
+ * splits (OUT) a random subset of the stable splits
+ */
+ //void getRandomStableSplits(int numSplit, SplitGraph& splits);
+
+ /**
+ * Add splits from \a treeString to the current candidate splits
+ *
+ * @param tree collect splits from this tree
+ */
+ void addCandidateSplits(string treeString);
+
+ /**
+ * Remove splits that appear from \a treeString.
+ * If an existing split has weight > 1, their weight will be
+ * reduced by 1.
+ */
+ void removeCandidateSplits(string treeString);
+
+ int getNumStableSplits() const {
+ return numStableSplits;
+ }
+
+ /**
+ * Print candidate trees and their likelihood
+ */
+ void printTrees(string suffix);
+
+ /**
+ * Recompute the log-likelihood of all trees
+ * @param treeObject the tree object which store other model parameters used
+ * to compute the log-likelihood.
+ */
+ void recomputeLoglOfAllTrees(IQTree &treeObject);
+
+ int getMaxSize() const {
+ return maxSize;
+ }
+
+ void setMaxSize(int maxSize) {
+ this->maxSize = maxSize;
+ }
+
private:
+ /**
+ * Maximum number of candidate trees
+ */
+ int maxSize;
/**
- * Set of supported splits by the best trees
+ * Number of stable splits identified
*/
- SplitGraph stableSplit;
+ int numStableSplits;
/**
- * Shared params pointing to the global params
+ * Set of splits and the number of their occurences from the current best trees.
+ * The number of current best tree is parameterized.
*/
- Params* params;
+ SplitIntMap candSplits;
/**
* Map data structure storing <topology_string, score>
@@ -292,7 +391,6 @@ private:
* pointer to alignment, just to assign correct IDs for taxa
*/
Alignment *aln;
-
};
#endif /* CANDIDATESET_H_ */
diff --git a/checkpoint.cpp b/checkpoint.cpp
index 8517ef2..1b26991 100644
--- a/checkpoint.cpp
+++ b/checkpoint.cpp
@@ -30,6 +30,47 @@ Checkpoint::~Checkpoint() {
void Checkpoint::setFileName(string filename) {
this->filename = filename;
}
+
+
+void Checkpoint::load(istream &in) {
+ string line;
+ string struct_name;
+ size_t pos;
+ int listid = 0;
+ while (!in.eof()) {
+ getline(in, line);
+ pos = line.find('#');
+ if (pos != string::npos)
+ line.erase(pos);
+ line.erase(line.find_last_not_of("\n\r\t")+1);
+// trimString(line);
+ if (line.empty()) continue;
+ if (line[0] != ' ') {
+ struct_name = "";
+ }
+// trimString(line);
+ line.erase(0, line.find_first_not_of(" \n\r\t"));
+ if (line.empty()) continue;
+ pos = line.find(": ");
+ if (pos != string::npos) {
+ // mapping
+ (*this)[struct_name + line.substr(0, pos)] = line.substr(pos+2);
+ } else if (line[line.length()-1] == ':') {
+ // start a new struct
+ line.erase(line.length()-1);
+ trimString(line);
+ struct_name = line + '.';
+ listid = 0;
+ continue;
+ } else {
+ // collection
+ (*this)[struct_name + convertIntToString(listid)] = line;
+ listid++;
+ }
+ }
+}
+
+
void Checkpoint::load() {
assert(filename != "");
if (!fileExists(filename)) return;
@@ -47,41 +88,8 @@ void Checkpoint::load() {
}
if (line != header)
throw ("Invalid checkpoint file " + filename);
- string struct_name;
- size_t pos;
- int listid = 0;
- while (!in.eof()) {
- getline(in, line);
- pos = line.find('#');
- if (pos != string::npos)
- line.erase(pos);
- line.erase(line.find_last_not_of("\n\r\t")+1);
-// trimString(line);
- if (line.empty()) continue;
- if (line[0] != ' ') {
- struct_name = "";
- }
-// trimString(line);
- line.erase(0, line.find_first_not_of(" \n\r\t"));
- if (line.empty()) continue;
- pos = line.find(": ");
- if (pos != string::npos) {
- // mapping
- (*this)[struct_name + line.substr(0, pos)] = line.substr(pos+2);
- } else if (line[line.length()-1] == ':') {
- // start a new struct
- line.erase(line.length()-1);
- trimString(line);
- struct_name = line + '.';
- listid = 0;
- continue;
- } else {
- // collection
- (*this)[struct_name + convertIntToString(listid)] = line;
- listid++;
-// throw "':' is expected between key and value";
- }
- }
+ // call load from the stream
+ load(in);
in.clear();
// set the failbit again
in.exceptions(ios::failbit | ios::badbit);
@@ -111,6 +119,23 @@ void Checkpoint::setDumpInterval(double interval) {
dump_interval = interval;
}
+void Checkpoint::dump(ostream &out) {
+ string struct_name;
+ size_t pos;
+ int listid = 0;
+ for (iterator i = begin(); i != end(); i++) {
+ if ((pos = i->first.find('.')) != string::npos) {
+ if (struct_name != i->first.substr(0, pos)) {
+ struct_name = i->first.substr(0, pos);
+ out << struct_name << ':' << endl;
+ listid = 0;
+ }
+ // check if key is a collection
+ out << ' ' << i->first.substr(pos+1) << ": " << i->second << endl;
+ } else
+ out << i->first << ": " << i->second << endl;
+ }
+}
void Checkpoint::dump(bool force) {
if (filename == "")
@@ -128,25 +153,13 @@ void Checkpoint::dump(bool force) {
out = new ofstream(filename.c_str());
out->exceptions(ios::failbit | ios::badbit);
*out << header << endl;
- string struct_name;
- size_t pos;
- int listid = 0;
- for (iterator i = begin(); i != end(); i++) {
- if ((pos = i->first.find('.')) != string::npos) {
- if (struct_name != i->first.substr(0, pos)) {
- struct_name = i->first.substr(0, pos);
- *out << struct_name << ":" << endl;
- listid = 0;
- }
- // check if key is a collection
- *out << " " << i->first.substr(pos+1) << ": " << i->second << endl;
- } else
- *out << i->first << ": " << i->second << endl;
- }
+ // call dump stream
+ dump(*out);
if (compression)
((ogzstream*)out)->close();
else
((ofstream*)out)->close();
+ delete out;
// cout << "Checkpoint dumped" << endl;
} catch (ios::failure &) {
outError(ERR_WRITE_OUTPUT, filename.c_str());
@@ -218,6 +231,13 @@ void Checkpoint::startList(int nelem) {
list_element_precision.push_back(0);
}
+void Checkpoint::setListElement(int id) {
+ list_element.back() = id;
+ stringstream ss;
+ ss << setw(list_element_precision.back()) << setfill('0') << list_element.back();
+ struct_name += ss.str() + ".";
+}
+
void Checkpoint::addListElement() {
list_element.back()++;
if (list_element.back() > 0) {
diff --git a/checkpoint.h b/checkpoint.h
index a708d36..d7c478a 100644
--- a/checkpoint.h
+++ b/checkpoint.h
@@ -77,11 +77,23 @@ public:
void setHeader(string header);
/**
+ * load checkpoint information from an input stram
+ * @param in input stream
+ */
+ void load(istream &in);
+
+ /**
* load checkpoint information from file
*/
void load();
/**
+ * dump checkpoint information into an output stream
+ * @param out output stream
+ */
+ void dump(ostream &out);
+
+ /**
* dump checkpoint information into file
* @param force TRUE to dump no matter if time interval exceeded or not
*/
@@ -308,7 +320,13 @@ public:
@param nelem number of elements
*/
void startList(int nelem);
-
+
+ /**
+ set the starting list element, should only be called right after startList
+ @param id element ID
+ */
+ void setListElement(int id);
+
/**
add an element to the current list
*/
diff --git a/constrainttree.cpp b/constrainttree.cpp
new file mode 100644
index 0000000..a0f8076
--- /dev/null
+++ b/constrainttree.cpp
@@ -0,0 +1,211 @@
+//
+// C++ Implementation: constrainttree.cpp
+//
+// Description: ConstraintTree class used to guide tree search
+//
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+
+#include "phylotree.h"
+#include "constrainttree.h"
+#include "splitgraph.h"
+
+ConstraintTree::ConstraintTree() : MTree(), SplitIntMap() {
+}
+
+void ConstraintTree::initConstraint(const char *constraint_file, StrVector &fulltaxname) {
+ bool is_rooted = false;
+ MTree::init(constraint_file, is_rooted);
+ if (leafNum <= 3)
+ outError("Constraint tree must contain at least 4 taxa");
+ if (is_rooted)
+ outError("Rooted constraint tree not accepted");
+
+ // collapse any internal node of degree 2
+ NodeVector nodes;
+ getInternalNodes(nodes);
+ int num_collapsed = 0;
+ for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++)
+ if ((*it)->degree() == 2) {
+ Node *left = (*it)->neighbors[0]->node;
+ Node *right = (*it)->neighbors[1]->node;
+ double len = (*it)->neighbors[0]->length+(*it)->neighbors[1]->length;
+ left->updateNeighbor((*it), right, len);
+ right->updateNeighbor((*it), left, len);
+ delete (*it);
+ num_collapsed++;
+ if (verbose_mode >= VB_MED)
+ cout << "Node of degree 2 collapsed" << endl;
+ }
+ if (num_collapsed)
+ initializeTree();
+
+ // build taxon name to ID index
+ StrVector taxname;
+ StrVector::iterator it;
+ getTaxaName(taxname);
+ taxname_index.clear();
+ for (it = taxname.begin(); it != taxname.end(); it++)
+ taxname_index[(*it)] = it - taxname.begin();
+
+ // convert into split system
+ SplitGraph sg;
+ convertSplits(taxname, sg);
+ sg.removeTrivialSplits();
+ for (SplitGraph::iterator sit = sg.begin(); sit != sg.end(); sit++) {
+ if (!(*sit)->containTaxon(0))
+ (*sit)->invert();
+ insertSplit(new Split(**sit), 1);
+ }
+
+ // check that constraint tree has a subset of taxa
+ StringIntMap fulltax_index;
+ for (it = fulltaxname.begin(); it != fulltaxname.end(); it++)
+ fulltax_index[(*it)] = it - fulltaxname.begin();
+
+ bool err = false;
+
+ for(it = taxname.begin(); it != taxname.end(); it++)
+ if (fulltax_index.find(*it) == fulltax_index.end()) {
+ cerr << "ERROR: Taxon " << (*it) << " in constraint tree does not appear in full tree" << endl;
+ err = true;
+ }
+ if (err) {
+ outError("Bad constraint tree (see above)");
+ }
+
+}
+
+
+bool ConstraintTree::isCompatible(StrVector &tax1, StrVector &tax2) {
+
+ assert(!empty());
+
+ if (tax1.size() <= 1 || tax2.size() <= 1)
+ return true;
+
+ Split sp1(leafNum);
+ Split sp2(leafNum);
+
+ StrVector::iterator it;
+ StringIntMap::iterator mit;
+
+ int tax_count1 = 0;
+
+ for (it = tax1.begin(); it != tax1.end(); it++)
+ if ((mit = taxname_index.find(*it)) != taxname_index.end()) {
+ // taxon found
+ tax_count1++;
+ sp1.addTaxon(mit->second);
+ }
+ if (tax_count1 <= 1)
+ return true;
+
+ int tax_count2 = 0;
+ for (it = tax2.begin(); it != tax2.end(); it++)
+ if ((mit = taxname_index.find(*it)) != taxname_index.end()) {
+ // taxon found
+ tax_count2++;
+ sp2.addTaxon(mit->second);
+ }
+
+ if (tax_count2 <= 1)
+ return true;
+
+ if (tax_count1 + tax_count2 == leafNum) {
+ // tax1 and tax2 form all taxa in the constraint tree
+
+ // quick check if this split is contained in the tree
+ Split *res = NULL;
+ if (sp1.containTaxon(0))
+ res = findSplit(&sp1);
+ else
+ res = findSplit(&sp2);
+ if (res) return true;
+
+ // otherwise, check for compatibility with all splits
+ for (iterator sit = begin(); sit != end(); sit++)
+ if (!sit->first->compatible(sp1))
+ return false;
+ return true;
+ } else {
+ // partial split
+ assert(tax_count1 + tax_count2 < leafNum);
+ Split taxa_mask(sp1);
+ taxa_mask += sp2;
+ Split* subsp = sp1.extractSubSplit(taxa_mask);
+ bool res = true;
+ for (iterator sit = begin(); sit != end(); sit++) {
+ Split *subit = sit->first->extractSubSplit(taxa_mask);
+ if (!subit->compatible(*subsp)) {
+ res = false;
+ delete subit;
+ break;
+ }
+ delete subit;
+ }
+ delete subsp;
+ return res;
+ }
+}
+
+bool ConstraintTree::isCompatible(Node *node1, Node *node2) {
+ if (empty())
+ return true;
+ StrVector taxset1, taxset2;
+ getUnorderedTaxaName(taxset1, node1, node2);
+ getUnorderedTaxaName(taxset2, node2, node1);
+ return isCompatible(taxset1, taxset2);
+}
+
+bool ConstraintTree::isCompatible (MTree *tree) {
+ if (empty())
+ return true;
+ NodeVector nodes1, nodes2;
+ tree->generateNNIBraches(nodes1, nodes2);
+// tree->getAllInnerBranches(nodes1, nodes2);
+ StrVector taxset1, taxset2;
+
+ // check that all internal branches are compatible with constraint
+ for (int i = 0; i < nodes1.size(); i++) {
+ taxset1.clear();
+ taxset2.clear();
+ getUnorderedTaxaName(taxset1, nodes1[i], nodes2[i]);
+ getUnorderedTaxaName(taxset2, nodes2[i], nodes1[i]);
+ if (!isCompatible(taxset1, taxset2))
+ return false;
+ }
+ return true;
+}
+
+
+
+bool ConstraintTree::isCompatible(NNIMove &nni) {
+ if (empty())
+ return true;
+ // check for consistency with constraint tree
+ StrVector taxset1, taxset2;
+
+ // get taxa set 1 (below node1)
+ FOR_NEIGHBOR_DECLARE(nni.node1, nni.node2, it)
+ if (it != nni.node1Nei_it) {
+ getUnorderedTaxaName(taxset1, (*it)->node, nni.node1);
+ }
+ //taxset1 also includes taxa below node2Nei_it if doing NNI
+ getUnorderedTaxaName(taxset1, (*nni.node2Nei_it)->node, nni.node2);
+
+ // get taxa set 1 (below node1)
+ FOR_NEIGHBOR(nni.node2, nni.node1, it)
+ if (it != nni.node2Nei_it) {
+ getUnorderedTaxaName(taxset2, (*it)->node, nni.node2);
+ }
+ //taxset2 also includes taxa below node1Nei_it if doing NNI
+ getUnorderedTaxaName(taxset2, (*nni.node1Nei_it)->node, nni.node1);
+
+// getUnorderedTaxaName(taxset1, node1, node2);
+// getUnorderedTaxaName(taxset2, node2, node1);
+
+ return isCompatible(taxset1, taxset2);
+}
diff --git a/constrainttree.h b/constrainttree.h
new file mode 100644
index 0000000..6749fc6
--- /dev/null
+++ b/constrainttree.h
@@ -0,0 +1,81 @@
+//
+// C++ Interface: phylotree.h
+//
+// Description:
+//
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+
+#ifndef CONSTRAINTTREE_H
+#define CONSTRAINTTREE_H
+
+#include "mtree.h"
+#include "alignment.h"
+
+struct NNIMove;
+
+/**
+ ConstraintTree used to guide tree search.
+ Note that constraint tree may contain only a subset of taxa from a full tree.
+*/
+class ConstraintTree : public MTree, public SplitIntMap {
+public:
+
+ ConstraintTree();
+
+ /**
+ initialize constraint tree
+ @param constraint_file the name of the constraint tree file
+ @param fulltaxname the full list of all taxa names
+ */
+ void initConstraint(const char *constraint_file, StrVector &fulltaxname);
+
+ /**
+ check if a "partial" split defined by two taxa name sets is compatible with the constraint tree.
+ The union of 2 taxa set do not need to comprise all taxa in the constraint tree.
+ @param[in] tax1 names of taxa in one side of split
+ @param[in] tax2 names of taxa in other side of split
+ @return true if the split is compatible with all splits in the constraint tree, false otherwise.
+ */
+ bool isCompatible(StrVector &tax1, StrVector &tax2);
+
+ /**
+ check if a branch defined by two nodes in any tree is compatible or not
+ @param node1 one end node of the branch
+ @param node2 the other end node of the same branch
+ @return TRUE if the branch is compatible, FALSE otherwise
+ */
+ bool isCompatible(Node *node1, Node *node2);
+
+ /**
+ @param tree input tree
+ @return TRUE if input tree is compatible with constraint, FALSE otherwise
+ */
+ bool isCompatible (MTree *tree);
+
+
+ /**
+ check if an NNI is compatible with the constraint tree or not
+ @param nni an NNIMove
+ @return TRUE if the NNI is compatible, FALSE otherwise
+ */
+ bool isCompatible(NNIMove &nni);
+
+ /**
+ @param taxname taxon name to search for
+ @return TRUE if constraint tree has a taxon, FALSE otherwise
+ */
+ bool hasTaxon(string &taxname) {
+ return taxname_index.find(taxname) != taxname_index.end();
+ }
+
+protected:
+
+ /* map from taxon name to its index, used for quick taxon name search */
+ StringIntMap taxname_index;
+
+};
+
+#endif
\ No newline at end of file
diff --git a/example/example.nex b/example/example.nex
index 620068c..e3d323e 100644
--- a/example/example.nex
+++ b/example/example.nex
@@ -1,9 +1,9 @@
#nexus
begin sets;
- charset part1 = 1-99\3 2-99\3;
- charset part2 = 3-99\3;
- charset part3 = 100-384;
+ charset part1 = 1-999\3 2-999\3;
+ charset part2 = 3-999\3;
+ charset part3 = 1000-1998;
charpartition mine = HKY:part1, GTR+G:part2, GTR+G: part3;
end;
diff --git a/example/example.phy b/example/example.phy
index 8637b06..cbf0eb3 100644
--- a/example/example.phy
+++ b/example/example.phy
@@ -1,46 +1,18 @@
- 44 384
-FL-1-103 atgcgcatcacccaaggc---------------------accttctccttcctgcccgacctcacggcggcccaggtcaaggcccagatccagtatgcgctggaccagaactgggcggtctcggtggagtacacggacgatccc------------------------------------------------------catccccggaacacctattgggagatgtggggcctgcccatgttcgacctgcgcgatgccgccggcgtctatggcgaggtcgaggcctgccgcaccgcccatcccggcaagtatgtgcgggtgaacgccttcgactccaatcgcgggtgggagacggtgcgcctctccttcatcgtccagcgtccg
-OSH-1-103 atgcgcatcacccaaggc---------------------tgcttctcgttcctgcccgacctgaccgacgagcagatctcggcgcaggtggactattgcctcggccgcggctgggccgtgagcctcgaacataccgacgacccg------------------------------------------------------catccccggaacacctactgggaaatgtggggcatgccgatgttcgacctgcgcgaccccaagggcgtgatgatcgagctggacgagtgccgcaaggcctggcccggccgctacatccgcatcaatgccttcgattccacccgcggcttcgagacggtcacgatgagcttcatcgtcaaccgcccc
-CEU-1-103 atgcgcatcactcaaggc---------------------actttttccttcctgcccgaactgaccgacgagcagatcaccaaacagctcgaatactgcctgaaccagggctgggcggtcggcctcgaatacaccgacgacccg------------------------------------------------------cacccgcgcaacacgtactgggagatgttcgggctgccgatgttcgacctgcgcgatgccgccggcatcctgatggaaatcaacaacgcgcggaacaccttccccaaccactacatccgcgtcacggccttcgattcgacgcatacggtggagtcggtggtgatgtcgttcatcgtcaatcgtccc
-TH-1-103 atgagacttacacaaggc---------------------gcattttcgttcttacctgacttaacagatgagcaaatcgtaaaacaaattcaatatgctatcagcaaaaactgggctttaaacgttgaatggacagatgatccg------------------------------------------------------caccctcgcaacgcatactgggatttatggggattaccattatttggtattaaagatccagcggctgtaatgtttgaaatcaatgcttgccgtaaagctaaaccagcttgttacgtaaaagtaaatgcgtttgataactcacgtggtgtagaaagctgctgcttatcttttatcgttcaacgtcct
-CAa1-103 atgaaactaacacaagga---------------------gctttctcatttcttcctgacttaactgatgcgcaagtaactaagcaaatccagtacgctttaaataagagttgggctatttcgattgaatatactgatgatccg------------------------------------------------------cacccacgtaacagttactgggagatgtggggccttcctctattcgatgttaaggatccagctgcgattcttttcgaaatcaacatggctcgtaaggctaagcctaactactaccttaaaatagcttgttttgataacacacgtggtatcgaaagttgtgtactttctttcattgtacaacgtcct
-CAb1-103 gtgagagttacacaagga---------------------acattttcttttctaccagacctgacaaatgatcaaatcagaaaacaaattcaatatgccataaataaaggatgggcattgagtgtagaatatacagatgaccct------------------------------------------------------cacccacggaattcttactgggaaatgtggggactgcctttatttgatgtcaaagaccctgcggcaattatgtttgaagttgaagcttgtcgaaaagagaaaagcaactattatattaagctattagcttttgattcaaccaaaggagttgaaagtacagcaatgtcctttatggtcaataggcct
-SI-1-103 atgagagttacacaagga---------------------tgtttttcgtttttaccagatttaagtgatgatcaaattaaacaacaagtttcttacgctatgagcaaaggttgggcggttagtgtagaatggacagatgatcca------------------------------------------------------catccacgtaactcatattgggaattatggggtcttcctttatttgatgttaaagatccagctgcagttatgtatgaacttgctgaatgtagaaaagttaacccagaaggttatattaaaattaatgctttcgatgctagtattggtacagaaagttgtgtaatgtcttttattgtacaacgtcct
-LU-1-103 gtgagacttacacaagga---------------------gctttttcttatttaccagatttaactgatgcacaaatcatcaaacaaattgactactgcttaagcagaggttggtctgttggtgttgaatggactgatgatcca------------------------------------------------------cacccacgtaacgcttactgggaactatggggtcttccattatttgacgtaaaagattcttcagcaattttatacgaagttaatgaatgtcgtcgtttaaaccctgaaggttacattaaattagttgctttcaacgcagcacgtggtactgaaagtagtgcatctgcttttattgtacaacgtcca
-SU-1-103 gtgagaataactcaaggt---------------------accttttcttttttgccggacttgactgatgaacaaatcaaaaaacaaattgattatatgatatctaaaaaattagctataggtattgaatatactaacgacata------------------------------------------------------catcctagaaattcattttgggaaatgtggggattacctctatttgaggtcacagatccagctccagtattatttgaaattaatgcttgtcgtaaagcaaaaagtaatttctatatcaaggtagtaggattttcttctgaaagaggtatagaaagtacaataatttcatttattgtaaatagacca
-RP-56-175 atgcaggtgtggccaccagttggcaagaagaagtttgagaccctttcataccttccacccctcactgatgagcaattgcttaaggaagtagagtatcttctaaggaagggatgggttccatgtgttgaatttgagttggagaaa------------------ggatttgtccaccgtcagtacaacagttcaccaggatactatgatggacgttactggacaatgtggaggttgccattgtttggaaccactgatgctgctcaggtgttgaaggaagttgctgaatgtaaagcagaatacccagaagctttcatccgtatcatcggatttgacaacgttcgt------caagtgcaatgcattagtttcattgcaagcacaccc
-A-14-133 atgcaggtgtggcctccaattggaaagaagaagtttgagactctttcctatttgccaccattgacgagagatcaattgttgaaagaagttgaataccttctgaggaagggatgggttccatgcttggaatttgagttgctcaaa------------------ggatttgtgtacggtgagcacaacaagtcaccaagatactatgatggaagatactggacaatgtggaagcttcctatgtttggcaccactgatcctgctcaagtcgtgaaggaggttgatgaagttgttgccgcttaccccgaagctttcgttcgtgtcatcggtttcaacaacgttcgt------caagttcaatgcatcagtttcattgcacacacacca
-PR-57-176 atgcaggtgtggccaccacgtaatttgaagaagtttgagaccctatcataccttccaactctttccgaggagtcattgttgaaggagatcaactaccttctaatcaagggatgggttccttgccttgagttcgaagttggaccg------------------gcacatgtataccgtgagaacaacaagtcaccaggatactatgacggaaggtactggacaatgtggaagctacccatgttcggatgcactgacgcatcccaagttgcagctgaggtggtcgagtgcaagaacgcttaccctgatgcccacgtcagaatcattggattcgacaacaagcgt------caagtccagtgcatcagtttcattgcctacaaacct
-PY-61-180 atgcaggtgtggcctccactcggactgaagaagttcgagaccctctcttaccttcctcccctttcttccgagtccttggccaaggaagttgactacctcctccgcaagaactgggttccctgcttggaatttgagttggagact------------------ggattcgtgtaccgtgagaaccacaggtccccaggatactatgatggaaggtactggacaatgtggaagctgcccatgttcggatgcaccgactcttcccaggtgttgaaggagctggaagaggccaagaaggcttacccccagtccttcatccgtatcatcggattcgacaatgtccgt------caagtgcagtgcatcagtttcatcgcttacaagcct
-MGI-58-176 atgcaggtgtggccgccggagggcctgaagaagttcgagaccctctcctacctcccccctctctccgtcgaggacctcgccaaggaggtggactacctcctccgcaacgactgggttccctgcatcgagttctccaaggaa---------------------gggttcgtgtaccgcgagaaccacgcgtcgcccgggtactacgacgggcggtactggacgatgtggaagctgcccatgttcggctgcaccgacgccagccaggtgatcgccgaggtggaggaggccaagaaggcctaccccgagtacttcgtcagaatcatcggcttcgacaacaagcgc------caagtccagtgcatcagcttcatcgcctacaagccc
-SCR-58-177 tgcatggtgtggccaccactaggaatgaagaagtttgagactctgtcttacctgccccctctatccgaagagtcattgttgaaggaggtccaataccttctcaacaatggatgggttccctgcttggaattcgagcccactcac------------------ggatttgtgtaccgtgagcacggaaacacaccaggatactacgatggacgttactggacaatgtggaagttgcccatgttcggttgcactgacccatcccaggttgttgctgagctcgaggaggccaagaaggcttaccctgaggccttcatccgtatcataggattcgacaacgtgcgt------caagtccagtgtgtcagtttcatcgcctacaagccc
-SA-60-179 atgaaggtgtggccaccacttggattgaggaagttcgagactctttcttacctgcctgatatgagtaacgaacaattgtcaaaggaatgtgactaccttctcaggaatggatgggttccctgcgttgaattcgacatcggaagc------------------ggattcgtgtaccgtgagaaccacaggtcaccaggattctacgatggacgttactggaccatgtggaagctccctatgtttggctgcaccgactcatctcaggtgattcaggagattgaggaggctaagaaggaataccccgacgcattcatcagggttattggctttgacaacgtccgt------caagtccagtgcatcagtttcatcgcctacaagccc
-BR-60-179 atgcaggtatggccaccacgtgggaagaagttctacgagactctctcataccttccaccccttacaagggagcaattggccaaggaagttgaataccttcttcgcaagggatgggttccttgcttggaattcgagttggagcat------------------ggaaccgtgtaccgtgagtaccacagatcaccagggtactatgatggtcgttactggaccatgtggaagctgcccatgtttggttgcacagatgcagtgcaggtgttgcaggagcttgatgagatgattaaagcttacccagattgctatggtaggatcattggtttcgacaatgttcgc------caagtccagtgcattagtttccttgcctacaagcct
-CPL-58-177 atgcaggtgtggccaccaattaacaagaagaagtacgagactctctcatacctccctgatttgagccaagagcaattgcttagcgaaattgagtaccttttgaaaagtggatgggttccttgcttggaattcgaaactgagcgc------------------ggatttgtctaccgtgaacaccaccattcaccaggatactatgacggcaggtactggaccatgtggaagctacctatgttcggatgcactgatgccacccaagtgttggctgaggtggaagaggcgaagaaggcatacccacaggcctgggtccgtattattggattcgacaacgtgcgt------caagtgcagtgcatcagtttcattgcctacaagcca
-LTU-59-178 atgcaggtgtggccaccaattaacatgaagaaatacgagacattgtcataccttcctgacttgtccgatgagcaattgctcaaggaagttgagtaccttttgaaaaatggatgggttccttgcttggaattcgagactgagcac------------------ggatttgtgtaccgtgagcacaacagctcaccaggatactacgatggtagatactggaccatgtggaagttgcctatgtttgggtgcactgacggaacccaggtgttggctgaggttcaagaggccaagaatgcgtacccacaggcctggatccgtattatcggattcgacaacgttcgt------caagtgcagtgcatcagtttcattgcctacaagcca
-TSP-58-177 atgcaggtgtggcccccatatggcaagaagaagtacgagactctctcataccttcctgatttaaccgacgagcaattgctcaaggagattgagtaccttttgaacaagggatgggttccttgcttggaatttgagactgagcac------------------ggatttgtctaccgtgaataccacgcctcacctagatactatgatggaaggtactggaccatgtggaagttgcccatgtttgggtgcactgatgcaactcaggtgttgggtgagctccaagaggccaagaaggcttaccctaatgcatggatcagaatcatcggattcgacaacgtccgt------caagtgcaatgcatcagtttcattgcctacaagcca
-YBN-56-175 atgcaggtgtggccaccagttggcaagaagaagtttgagactctttcctacctgccagaccttgatgatgcacaattggcaaaggaagtagaataccttcttaggaagggatggattccttgcttggaattcgagttggagcac------------------ggtttcgtgtaccgtgagcacaacaggtcactaggatactacgatggacgctactggaccatgtggaagctgcctatgtttggttgcactgatgcttctcaggtgttgaaggagcttcaagaggctaagactgcataccccaacggcttcatccgtatcatcggattcgacaacgttcgc------caagtgcagtgcatcagcttcatcgcctacaagccc
-AN-56-175 atgaaggtgtggccaccacttggattgaagaagtacgagactctctcatacttaccaccactaactgaaactcagttggctaaggaagtcgactacttgctccgcaaaaaatgggttccttgtttggaattcgagttggagcac------------------ggttttgtctaccgtgagaacgccagatcccccggatactatgacggaagatactggacaatgtggaaattgcctatgttcggttgcaccgactcagcccaagtgatgaaggagcttgctgaatgcaagaaggagtacccccaggcctggatccgtatcatcggatttgacaatgttcgt------caagttcaatgtatcatgttcattgcttccaggcca
-HI-60-179 atgcaggtgtggcctcctcttgggaagaagaagttcgagacactctcatacctccccgatcttacacccgtacagttggctaaggaagtagattaccttcttcgctctaaatggattccttgcttggaattcgaattagaggag------------------ggattcgtgcaccgtaagtactcgagcttacccacgtactacgatggacgctactggaccatgtggaaactgcccatgtttgggtgcactgactcggctcaggtgttggaggagcttgagaattgcaagaaggaataccccaatgcattcattagaatcattgggttcgacaacgttcgt------caagtgcagtgcattagtttcattgcctacaagcct
-ANA-56-175 atgaaggtgtggccaccagttggaaagaagaagtttgagaccctctcttaccttcctgaccttaccgaagttgaattgggtaaggaagtcgactaccttctccgcaacaagtggattccttgtgttgaattcgagttggagcac------------------gggtttgtttaccgtgagcacggaagcacccccggatactacgatggccgttactggacaatgtggaagcttcccttgttcggatgcactgactctgctcaagtgttgaaggaagtccaagaatgcaaaacggagtaccctaacgctttcatcaggatcatcggattcgacaacaaccgt------caggtccagtgcatcagtttcatcgcctacaagcca
-ZE-48-166 atgcaggtgtggccggcctacggcaacaagaagttcgagacgctgtcgtacctgccgccgctgtcgacggacgacctgctgaagcaggtggactacctgctgcgcaacggctggataccctgcctcgagttcagcaaggtc---------------------ggcttcgtgtaccgcgagaactccacctccccgtgctactacgacggccgctactggaccatgtggaagctgcccatgttcggctgcaacgacgccacccaggtgtacaaggagctgcaggaggccatcaaatcctacccggacgccttccaccgcgtcatcggcttcgacaacatcaag------cagacgcagtgcgtcagcttcatcgcctacaagccc
-EAT-48-166 atgcaggtgtggccaattgagggcatcaagaagttcgagaccctgtcttacttgccacccctctccacggaggccctcttgaagcaggtcgactacttgatccgctccaagtgggtgccctgcctcgagttcagcaaggtt---------------------ggcttcgtcttccgtgagcacaacagctcccccgggtactacgacggtcgatactggacaatgtggaagctgcctatgttcgggtgcaccgacgccacacaggtgctcaacgaggtggaggaggttaagaaggagtaccctgatgcgtatgtccgcgtcatcggtttcgacaacatgcgc------caggtgcaatgcgtcagcttcattgccttcaggcca
-YSA-46-164 atgcaggtgtggccgattgagggcatcaagaagttcgagaccctctcctacctgccaccgctcaccgtggaggacctcctgaagcagatcgagtacctagctccgttccaagtggtgccctgcctcgagttcagcaaggtc---------------------ggatttgtctaccgtgagaaccacaagtcccctggatactacgacggcaggtactggaccatgtggaagctgcccatgttcgggtgcaccgacgccacccaggtcgtcaaggagctcgaggaggccaagaaggcgtaccctgatgcattcgtccgtatcatcggcttcgacaacgttagg------caggtgcagctcatcagcttcatcgcctacaacccg
-TH-52-170 atgcaggtgtggcctccattcggaaaccccaagtttgagactctgtcctacctccctacgctaaccgaggagcagctggtgaaggaggttgagtacttgttgaggaacaagtgggtgccttgtctagagtttgatctggaa---------------------ggatccatctcgaggaagtataataggagcccggggtactacgatgggagatactgggtgatgtggaagttgccgatgtttgggtgcacagaggcatctcaggtgataaacgaggtgagagagtgtgccaaggcataccccaaagccttcatccgtgtcattggctttgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagccc
-LA-68-186 atgcaggtgtggcctccttacgcgaataaaaagtttgagactctgtcgtatctccctcgcttgaccccggagcaactggtgaaggaggtggagtacctgctgaagaacaagtgggtgccctgcctggaattcgaggaggat---------------------ggtgaaataaagagagtgtatgggaatagcccagggtactacgacgggagatactgggtgatgtggaagctgcctatgttcggatgcacagaggcatcgcaggtgttgaacgaggtgaacgagtgtgcgaaggcataccccaacgccttcatccgcgtcatcggattcgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagcct
-GR-854-978 atgaaggtgtggaaccccgtcaacaacaagaagttcgagaccttctcctacctgccccccctgtctgacgcccagatcgccaagcaggtggacatgatcattgccaaggggctctccccctgcctggagttcgccgccccggagaacagcttcatcgccaatgacaacactgtgcgcttcagcggcaccgctgcaggctactatgacaaccggtactggaccatgtggaagctgcccatgttcggctgcacggacgccagccaggtgctgcgtgagatctccgagtgccgcagggcctacccccagtgctacgtccgc---ctggccttcgactccgtcaag------caggtgcaggtgatctcgttcgtggtgcagcgcccc
-MO-29-154 ttcaaggtctggcagcccgtgaacaacaagcagtacgagaccttctcctacctgccccccctgaccaaccagaagatcggccgtcaggtcgactacatcatcaacaacggctggaccccctgcttggagttcgctgacccctccacctccttcgtcagcaacgcgaacgccgtgcgcctccagggtgtctccgctggctactacgacaacaggtactggaccatgtggaagctgcccatgttcggctgcactgaccccagccaggtgctgcgcgaggtgtccgcctgccaggtggccttccccaacgtgtacatccgcctggttgccttcgacaacgtcaag------caggtgcagtgcatgggcttcctagtgcagcgcccc
-OE-36-161 atgatggtatggtagccctttaacaataagttctttgagaccttctcgtacttgccccctctcactgacgaccaaatcaccaagcaagtggactacatcttgagaaacaattggactccttgtctggagtttgcgggatccgaccaagcgtatgtgacccacgacaacacggtaagaatgggagattgtgcatccacttatcaggacaacagatattggaccatgtggaaattgcctatgttcggttgcattgatggatcgcaagtgttgaccgaaatttcagcttgcactaaggcctttcctgatgcctacatccgtttggtgtgttttgatgcaaatagg------caagtccaaatttccggctttttggtacataggccc
-EME-43-168 atgatggtttggtagcccttcaacaacaaaatgtttgaaactttttccttcttgcctcccttgactgatgaacaaattagcaaacaagtggactacatcttggccaactcctggaccccctgtcttgaatttgcagcttctgatcaagcttatgctggcaatgaaaattgcatcagaatgggacctgtggcttctacctaccaagacaatagatattggacaatgtggaagctacctatgtttggatgcacagacggctctcaagtgttgagcgagatccaagcatgcacaaatgctttccccgatgcttacatcagattggtttgttttgacgcaaacaga------taggtgtaaatttctggatttttggtgcacagacct
-LRE-46-171 atgatggtctggaccccggtcaacaacaagatgttcgagaccttctcctacctgccccccctgagcgacgagcagatcgccgcccaggtcgactacattgtcgccaacggctggatcccctgcctggagttcgctgagtcggacaaggcctacgtgtccaacgagtcggccatccgcttcggcagcgtgtcttgcctgtactacgacaaccgctactggaccatgtggaagctgcccatgttcggctgccgcgaccccatgcaggtgctgcgcgagatcgtcgcctgcaccaaggccttccccgatgcctacgtgcgcctggtggccttcgacaaccagaag------caggtgcagatcatgggcttcctggtccagcgcccc
-P6-2-107 atgaaaactctgcccaaa------gagcgtcgtttcgagactttctcgtacctgcctcccctcagcgatcgccaaatcgctgcacaaatcgagtacatgatcgagcaaggcttccaccccttgatcgagttcaacgagcac------------------------------------------------------tcgaatccggaagagttctactggacgatgtggaagctccccctgtttgactgcaagagccctcagcaagtcctcgatgaagtgcgtgagtgccgcagcgaatacggtgattgctacatccgtgtcgctggcttcgacaacatcaag------cagtgccaaaccgtgagcttcatcgttcatcgtccc
-HO-1-106 atgaaaactctgcccaaa------gagcgtcgctacgaaaccctttcctacctgccccccctgagcgatcagcaaattgctcgccagattgagtacatggtgcgcgaaggctatattcccgccgtggaattcaacgaagat------------------------------------------------------tccgacgcgaccacctgctactggaccatgtggaagttgcccctgttccacgccacttctacccaagaagtgttgggcgaagtgcgcgagtgccgcaccgaataccccaactgctacatccgcgtagttggtttcgacaacatcaag------cagtgtcagtccgtgagcttcatcgttcacaagccc
-SP-1-106 atgcaaaccttaccaaaa------gagcgtcgttacgaaaccctttcttacttaccccccctcaccgacgttcaaatcgaaaagcaagtccagtacattctgagccaaggctacattccagccgttgagttcaacgaagtt------------------------------------------------------tctgaacctaccgaactttattggacactgtggaagctacctttgtttggtgctaaaacatcccgtgaagtattggcagaagttcaatcttgccgttctcaatatcctggtcactacatccgtgttgtaggatttgacaatattaag------cagtgccaaatcctgagcttcatcgttcacaaaccc
-PA-1-105 ---atgcaacttagagta------gaacgtaagttcgaaactttttcttatttaccaccattaaacgaccaacagattgcgcgtcaattacaatacgcactttccaatggttatagcccagcaatcgaattcagttttaca------------------------------------------------------ggtaaagctgaagacttagtatggactttatggaaattacctttatttggtgcacaatctcctgaagaagtacttagcgaaattcaagcttgtaaacaacagttccctaatgcttacattcgtgttgtagcatttgactctatcaga------caagttcaaactttaatgttcttagtttacaaacca
-NE-2-109 gctgaaatgcaggattacaagcaaagcctcaaatatgagactttctcttatcttccacccatgaacgcggaacgcatccgcgctcaaatcaagtacgcaattgctcaaggctggagccccggcattgagcacgtagaagtgaaa------------------------------------------------------aactccatgaaccaatattggtacatgtggaaacttcccttcttcggcgaacaaaatgtcgacaacgtgttggctgaaattgaagcgtgtcgtagtgcgtatccaacacaccaggtcaaactggtggcttatgacaactatgcg------caaagcttaggtctggccttcgtggtctaccgcggc
-IFE-2-109 gctgacattcaggactacaactcaacacccaagtacgaaaccttctcttatttgccggcaatgggaccggaaaaaatgcgccgtcagatcgcctatctcatcaatcagggctggaaccccggcatcgagcatgtggaacctgaa------------------------------------------------------cgcgcatcaacatactactggtacatgtggaagttacccatgttcggcgaacagtcggtggacaccgtgatcatggagttggaagcatgccatcgcgctcaccccggccatcacgtgcgcttggtcgggtatgacaattactcg------cagagccagggcagcgcttttgtggtgtttcgcggg
-HS-9-115 ---tcgagcgtcagcgatccgtcgagccgcaagttcgagaccttctcctacctgcccgaactcggcgtggaaaagatccgcaagcaggtcgagtacatcgtcagcaagggctggaacccggccgtcgagcacaccgagccggag------------------------------------------------------aacgccttcgaccactactggtacatgtggaagctgccgatgttcggcgaaaccgacgtggacgccatcctggccgaggccgaggcatgccacaaggcgcatccctcgcatcacgtgcgcctgatcggctacgacaactatgcc------cagtcgcaaggcactgccatggtgatcttccgcggc
-RVI-7-114 agttccagcctcgaagacgtcaacagccgcaagttcgagaccttctcctacctgccgcgcatggatgccgaccgcatccgcaagcaggtcgagtacatcgtctccaagggctggaacccggccatcgagcacaccgagccggaa------------------------------------------------------aacgccttcgatcactactggtacatgtggaagctgccgatgttcggcgagaccgacatcgacaccatcctcaaggaggccgaagcctgccacaaggcgcaccccaacaatcacgtgcgtctgatcggcttcgacaactatgcc------cagtccaagggcgccgagatggtggtctatcgcggc
-IFE-8-115 aaatcccgtctctccgacccggcgagcgcgaagttcgagacactgtcttacctgcccgccctgaccgcggacgagatccgtcaacaggttgcgtatattgtttccaagggctggaatccggcggtagaacataccgaaccggaa------------------------------------------------------aacgccttcggcaactactggtatatgtggaagttgcccatgttcggcgaaacggacgtggacaccattctgaaagaagcggaacgctgccataagcggaatccccataaccacgtccgtatcgtcggctatgataacttcaag------cagtcccagggtacttccctggtagtctatcggggc
-RVI-5-112 agcagcatgggcgatcacgccaccatcggccgctacgagaccttttcctatctgccgccgctcaaccgcgaggagatcctggagcagatcctctacatcctcgacaacggctggaacgcctcgctggagcacgagcatccggat------------------------------------------------------cgcgccttcgagtattactggccgatgtggaagatgcccttcttcggcgaacaggatccgaacgtgatcctgaccgagatcgagtcctgccggcgcagctatccggaccatcacgtccggctggtcggctacgacacctacgcc------cagagcaagggacattccttcctggcgcaccgcccg
-
+17 1998
+LngfishAu CTCCCACACCCCAGGAACCAGCAGTGATTAACATTGAGCATAAGCGAAGCTTGACTCAGCCACCTCGGGCCGGTAAACCTCGTGCAGCCACCGCGGTTATACGAAGGACCCGATTGATGTCTAGGCGTAAAGGTGATTAATAGACTAATTAGAGTTAAAACCCCATCCAGCCGCGCATCCATAAAATCTAGACTACAACTACTTTCACGAAAGCTAAGATACAAACTGGGATTAGAT-CC-ACTTGCTCAGCCATAAACTTTGACTACTAAAAGGTCCGCCAGTACTACGAGGGCCAGCTTAAAACCCAAAGGACTTGCGGTGCCTACCCACCTAGAGGAGCCTGTTCTAGAACCGATAATCCACGTTAAACCTCACCCTTCTTGCCCCGTCTATATACCACCGTCGCCAGCTTACCCCGGGGTGAAATAAGCACAATTGTCAACCAAAAACGTCAGGTCGAGGTGTAGCGAATGAAGCGG [...]
+LngfishSA CAACCACACCCCAGGAAACAGCAGTAATTAATCTTAGGCATAAGTGAAACTTGACCTAGTTATTAAAAATCGGCCAATCTCGTGCAGCCGCCGCGGTTATACGAGAGATTTTATTGATAAATTGGCGTATAGGTGATTAGAATACTTATTAAAATTTAACTTTAGCCAAGCTGCGCTTCCGCAAAATCATTATTAAATTATTCCTCACGAAAGCCAAGAAACAAACTAGGATTAGATCCCTACTTGCTTGGCTATAAACTATAGTTTTTAATCAACTCGCCAGGACTACTAGCACAAGCTAAAAACTCAAAGGACTTGCGGTGCCTACCCACCTAGAGGAGCCTGTCCTAAAACCGATAATCCACGTTTTACCTAACCCTTTTTGCCCAGCCTATATACCGCCGTCGCCAGCCAACCCCGGAGAAATATAGGCAAAATTACTAGTTAAATACGTCAGGTCGAGGTGTAGCATATGAAGTGG [...]
+LngfishAf CAACCACACCCCAGGACACAGCAGTAATTAAAATTGGACATAAGTGTAACTTGATCCAGCCATTAAAAGTTGGCCAACCTCGTGCAGCCGCCGCGGTTACACGAGGAACTTAATTGATGCCTCGGCGTATAGATGATTAGAGAACTTTCTAAAATCAAATATTGGCCCTGCTGCGCGCTCGCAAACTCAAAATTAAATACATCCTCATGAAAGTCAGGAAACAAACTAGGATTAGATCCCTACTTGCCTGACCCTAAACTATGACTCTTAATAAGCCCGCCAGAACTACAAGCCCAAGCTTAAAACCCAAAGGACTTGCGGTGCCTACCCACCTAGAGGAGCCTGTTCTAGAACCGATAATCCACGTTTTACCCAACCTTCCCTAGCCAGCCTATATACCGCCGTCGCCAGCCAACCCCCGGAGACTATTGGCAGAATAGTACATCTAGCACGTCAGGTCGAGGTGTAGCACATGAGAAGG [...]
+Frog AAATTTGGTCCTGTGATTCAGCAGTGATAAACATTGAACATGAGCGAAGCTCGATTCAGTTATAAAGAGTTGGTCAATCTCGTGCAGCCGCCGCGGTTATACGAGAAACTCAATTGATATTTTGGCGTAAACGTGATTAAGAACCCAACTAGAGTCAAACTCCAACCAAGCTGCGCTTTCGTAAGAACACGAAACAATACACTCTCACGACCGCTAGGAAACAAACTGGGATTAGATCCCCACTTGCCTAGCCATAAACTTTGACTTACGAAAAATCCGCCAGAACTACGAGCCTAAGCTTAAAACCCAAAGGACTTGCGGTGCTCACCCACCTAGAGGAGCCTGTTCTGTAATCGATACCCCTCGCTAAACCTCACCCTTCTTGCCCCGCCTATATACCACCGTCGCCAGCCCACCTCGGAGATTCTTAGGCTTAATGATTTCATCAACACGTCAGGTCAAGGTGTAGCATATGAAGTGG [...]
+Turtle CTTCCACACCCCAGGACTCAGCAGTGATAAAAATTAAGCATAAGCGAAGCTTGACTTAGTCACAATGAGCTGGTAAATCTCGTGCAGCCACCGCGGTTACACAAGAAGCCCAACTAACGACAAGGCGTAAAAGTGATTAAATACCCATTTAAGGTGAACTACTTACTTCGCTGCGCAAAAGTACACAGAAAATAAAGACTATTCCCACGATCGCTAAAACACAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACCTAGATTTTTACAAAATCCGCCAGAATTACGAGCAAAAGCTTAAAACTCTAAGGACTTGCGGTGCTCACCCACCTAGAGGAGCCTGTTCTATAATCGATAACCCACGATCTACCTCACCTCTCTTGCCCAGCCTATATACCACCGTCGCCAGCTTACCCCGGGATACAATAAGCAAGATAAAACCATTAACAAGTCAGGTCAAGGTGTAGCTAACTGAGATG [...]
+Sphenodon CTCCCACACCCCAGGACACAGCAGTGATTAATATTAAGCATAAGTGAAACTTGACTTAGTTAAGAACGGCCGGTCAATTTCGTGCAGCCACCGCGGTTAAACGATAGGCCGAAGTAAGGCCAAGGCGTAAAAGTAACTAAACCCCCTTCTAAACCCAAGAAAAAACTAAGATGAGTTGTAAAACCTCTGAAGATAAGTAAAATCTTACTAAAATTAAGGGTCAAACTGGGATTAGATCCCCACTTGCTTAATCCTAAACATCGACTAATACAAGGTTCGCCCGAACTACCAGCAAAAGCTAGAAACCCTAAAGACTTGCGGTGCCCAACCCCCTAGAGGAGCCTGTTCTATAATTGATGATCCGCAATAAACCTCACCTTTTTCGCCCAGCCTATATACCGCCGTCGCCAGTCTACCTTGAAGAACTATAGGTCAAACAGAACCGCTAATACGTCAGGTCAAGGTGCAGCCAATAAAATGG [...]
+Lizard CTTCCACACCCAAGGCATCAGCAGTGATAAACATTAAGCATAAGCGAAGCTTGACTTAGTTATTAAGGGCCGGTAAACCTCGTGCAGCCACCGCGGTTATACGAGGGGCCCAAGCAGCGACTCGGCGTAAATGTGGCCAAAACCAACATAAAAACTAAACAATAGCCTAACCGAAACACAGGAAATGCAAACGTAACGTAATTCCCACGAAAACTAAGAAACAAACTAGGATTAGATCCCTACTTGCTTAGTCGTTAATACGATATTACACTAAATCCGCCAGAACTACGAGCGAAAGCTTAAAACTCAAAGGACTTGCGGTGCTCACCGACTTAGAGGAGCCTGTCCTATAATCGATACTCCACGCTAAACCTCACCATCTTTGCCCAGCCTATATACCGCCGTCGTCAACCTACCTCAGAGAAAAATAAGTACAAAAGTAAAACTAAAACGTCAGGTCAAGGTGTAGCTAATAGAATGG [...]
+Crocodile CTCCCACACCCCAGGCCACAGCAGTAGTTAATATTAGGCATAAGCGAAGCCTGACCTAGTAAGGAAGGGCCGGTTAATCTCGTGCAGCGACCGCGGTTATACGACAGACCCAAATAATGATACGGCGTAAAGACGACTATATTCCCTACCTAGGGAAGAATAACCCCAGGCTGAGCCATAGGAATAACATATTCAAAACAACTCTCGTGAAAGCTAGGACATAAACTAAGATTAGATCCTTACTTACCTAGCTGTAACACAATAATCAAACCTAATTCGCCAAAACTACGAGCAATAGCTTAAAACTCAAAGGACTTGCGGCACTTACCCCCCTAGAGGAGCCTGTCCTATAATCGACAGTACACGTTACACCCGACCCCTTTAGCCCAGTCTGTATACCGCCGTCGCAAGCCCGTCCCAGGGAAACACGCGCGCAACAGTCGAGCTAACACGTCAGGTCAAGGTGCAGCCAACAAGGTGG [...]
+Bird CTACCACACCCCAGGACTCAGCAGTAATTAACCTTAAGCATAAGTGTAACTTGACTTAGCCACAAAGGGTTGGTAAATCTTGTGCAGCCACCGCGGTCATACAAGAAACCCAATCAATGCTACGGCGTAAAGGTGGCCAATTTGCACCCTAAGATTAAAATGCAACCAAGCTGAGCCTAAGAAACCCCAAATCCAAGTTAATTCCCACGAAAGCTAGGACCCAAACTGGGATTAGATCCCCACTTGCCTAGCCCTAAATCTAGATTCCCACACAATCCGCCTGAACTACGAGCACAAGCTTAAAACTCTAAGGACTTGCGGTGCCCACCCACCTAGAGGAGCCTGTTCTATAATCGATAATCCACGATTCACCCAACCCCCCTTGCCCAGCCTACATACCGCCGTCGCCAGCCCACCTCTAAGAACAATGAGCTCAATAGCCTCGCTAATAAGACAGGTCAAGGTATAGCCTATGGGGTGG [...]
+Human CTACCACACCCCAGGAAACAGCAGTGATTAACCTTTAGCATAAACGAAGTTTAACTAAGCTATAAAGGGTTGGTCAATTTCGTGCAGCCACCGCGGTCACACGATTAACCCAATCAATGAAGCGGCGTAAAGGTGTTTTGACCCTCCCATAAAGCTAAAACTCACCTGAGTTGAACTCCAGTAAAATTACGAAAAAAATCTGACACACAATAGCTAAGACCCAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACCTCAACTAACACAAAGCTCGCCAGCACTACGAGCCACAGCTTAAAACTCAAAGGACCTGCGGTGCTTATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCCCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTGAGGCTACATAAGCGCAAGTACCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGG [...]
+Seal CCACCACACCCCAGGATACAGCAGTAATAAAAATTAAGCATGAACGAAGTTTGACTAAGCTATAAAGGGTTGGTAAATTTCGTGCAGCCACCGCGGTCATACGATTAACCCAACTAATGGCCCGGCGTAAAGGTGTTAAGACAACCCACTAAAGCTAAAACCTAACCAAGCCGAGCTACCGTAAAATCACGAAAACAATTCTGTGCACGATAGCTAAGATCCAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACATAAATTCATACAAAATTCGCCAGAACTACTAGCAACAGCTTAAAACTCAAAGGACTTGCGGTGCTTACCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATAAACCTCACCTTCCTTGCTCAGTCTATATACCGCCATCTTCAGCAAACCCTTAGGAACAATAAGCACAATAACTACATAAAAAAGTTAGGTCAAGGTGTAACCTATGGAATGG [...]
+Cow CTACCACACCCCAGGAAACAGCAGTGACAAAAATTAAGCATAAACGAAGTTTGACTAAGTTATAAAGGGTTGGTAAATCTCGTGCAGCCACCGCGGTCATACGATTAACCCAACTAACGGAGTGGCGTAAAAGTGTTAAGCCCATACCATAGGGTTAAATTCTAACTAAGCTGAGCCATGATAAAATGACGAAAACAAGCCGACGCACTATAGCTAAGACCCAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACACAGATTACAACAAAATTCGCCAGTACTACTAGCAACAGCTTAAAACTCAAAGGACTTGCGGTGCTTATCCTTCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGATAAACCTCACCATTCTTGCTCAGTCTATATACCGCCATCTTCAGCAAACCCTAAGGAAAAATAAGCGTAATTAGTACATAAAAACGTTAGGTCAAGGTGTAACCTATGAAATGG [...]
+Whale CTACCACGCCCCAGGACACAGCAGTGATAAAAATTAAGCATAAACGAAGTTCGACTAAGTCATAAAGGGTTGGTAAACTTCGTGCAGCCACCGCGGTCATACGATCGACCCAATTAATGAAGCGGCGTAAAGGTGTTAAGACCACATGATAAAGTCAAACCTTAATTAAGCTGAGCCCTAATAAGCCTACGAAAAAAAATCTGCACACGACAGCTAAGATCCAAACTGGGATTAGATCCCCACTTGCTTAGTCGTAAACCCCAATCACAACAAGATTCGCCAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTTGCGGTGCCTACCCATCTAGAGGAGCCTGTTCTGTAACCGATAAACCCCGATCAACCTCACCACCCTTGCTCAGTCTATATACCGCCATCTTCAGCAAACCCTAAGGGAGAATAAGCATAACCACTACATAAAAACGTTAGGTCAAGGTGTAACCCATGGGTTGG [...]
+Mouse CTACCACACCCCAGGACTCAGCAGTGATAAATATTAAGCATAAACGAAGTTTGACTAAGTTATACAGGGTTGGTAAATTTCGTGCAGCCACCGCGGTCATACGATTAACCCAACTAATATCTTGGCGTAAAAGTGTCAATAAATAAATATAGAATTAAAATCCAACTTATATGATTCATTGTAAACTAACGAAAAGCTTTATATACACGACAGCTAAGACCCAAACTGGGATTAGATCCCCACTTGCTTAGCCATAAACCTAAATTAATACAAAATTTGCCAGAACTACTAGCCATAGCTTAAAACTCAAAGGACTTGCGGTACTTATCCATCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGCTCTACCTCACCTCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTAAGGTATTATAAGCAAAAGAACAACATAAAAACGTTAGGTCAAGGTGTAGCCAATGAAATGG [...]
+Rat CTACCACACCCCAGGACTCAGCAGTGATAAATATTAAGCATGAACGAAGTTTGACTAAGCTATACAGGGTTGGTAAATTTCGTGCAGCCACCGCGGTCATACGATTAACCCAACTAATATTTTGGCGTAAAAGTGCCAATAAATCTCAATAGAATTAAAATCCAACTTATATGATTCATTGTAAGCCAACGAAAAACTTTATATGCACGATAGCTAAGACCCAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACCTTAATTAATACAAAATTTGCCAGAACTACTAGCTACAGCTTAAAACTCAAAGGACTTGCGGTACTTATCCATCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGTTCTACCTTACCCTTCTCGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTAAGGCACTATAAGCACAAGAACAACATAAAAACGTTAGGTCAAGGTGTAGCCAATGAAGCGG [...]
+Platypus CCTCCACACCCCAGGACACAGCAGTAATAGAAATTAGTCATAAACGCAGTTTGAACAAGTCATCAAGAGTCGGTAAATTTCGTGCAGCCACCGCGGTCATACGATTGACTCAACTAACAATAAGGTGTAAAAGTGTTTAAAATTAAACATAAGATTAAAGTAGAACTAAACTGAGTCATAGTAAAGCTACGAAAAGCAATTGATACACGATAGCTAAGGTACAAACTGGGATTAGATCCCCACTTGCTTAGCCCTAAACTCAAGTTTAAACAAAACTCACCAGAACTACTAGCAACAGCTTAAAACTCAAAGGACTTGCGGTGCTTACCCCTCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGATACACCTCACCTCTTTTGCCCTGTCTATATACCGCCATCGTCAGCCAACCCTAAGGAACAATAGGCGTAATCATTTCATAAAAACGTTAGGTCAAGGTGTAGCCTATAAGATGG [...]
+Opossum CTTCCACACCCCAGGAGACAGCAGTGATTAAAATTAAGCATAAACGAAGTTTGACTAAGTCATTTAGGGTTGGTCAATTTCGTGCAGCCACCGCGGTCATACGATTAACCCAATTAATAATAAGGCGTAAAGGTGTTTAGTAATACAAATAAAGTTAATAATTAACTAAACTGCGTTCTAGTAAAATAATAAAAAAACACCGATACACGAAAACTAAGACACAAACTGGGATTAGATCCCCACTTGCTTAGTAATAAACTAAAATTTAAACAAAATTCGCCAGAACTACTAGCAATTGCTTAAAACTCAAAGGACTTGCGGTGCCCACCCACCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGATAAACCAGACCTATCTTGCCCAGCCTATATACCGCCATCGTCAGCTAACCTTTAAGAATTATAAGCAAAATCAAAACATAAAAACGTTAGGTCAAGGTGTAGCATATGATAAGG [...]
diff --git a/gsl/gauss.cpp b/gsl/gauss.cpp
index b6c2d0e..089c418 100644
--- a/gsl/gauss.cpp
+++ b/gsl/gauss.cpp
@@ -47,6 +47,18 @@
//#include <gsl/gsl_cdf.h>
#define GSL_DBL_EPSILON 2.2204460492503131e-16
+#ifndef M_2_SQRTPI
+#define M_2_SQRTPI 1.12837916709551257389615890312154517 /* 2/sqrt(pi) */
+#endif
+
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880168872420969808 /* sqrt(2) */
+#endif
+
+#ifndef M_SQRT1_2
+#define M_SQRT1_2 0.707106781186547524400844362104849039 /* 1/sqrt(2) */
+#endif
+
#ifndef M_1_SQRT2PI
#define M_1_SQRT2PI (M_2_SQRTPI * M_SQRT1_2 / 2.0)
#endif
diff --git a/hashsplitset.h b/hashsplitset.h
index f86fdd0..ea06192 100644
--- a/hashsplitset.h
+++ b/hashsplitset.h
@@ -122,7 +122,24 @@ public:
* @param use_index TRUE to map to index of splits in sg, FALSE to map to split weights
*/
void buildMap(SplitGraph &sg, bool use_index = true);
-
+
+ int getNumTree() {
+ return numTree;
+ }
+
+ void setNumTree(int maxValue) {
+ this->numTree = maxValue;
+ }
+
+private:
+
+ /**
+ * The maximum weight value. If the splits are generated from n trees and splits of every tree
+ * all have weight = 1, then maxValue = n
+ * This variable is used to determine whether a split appear on all input trees.
+ */
+ int numTree;
+
};
#endif
diff --git a/iqtree.cpp b/iqtree.cpp
index 0a9f34c..979bc4a 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2009 by BUI Quang Minh *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -25,17 +27,18 @@
#include "model/modelgtr.h"
#include "model/rategamma.h"
#include <numeric>
-#include "pll/pllInternal.h"
+#include "tools.h"
+#include "MPIHelper.h"
#include "pllnni.h"
-#include "vectorclass/vectorclass.h"
-#include "vectorclass/vectormath_common.h"
+#ifdef _IQTREE_MPI
+#include <mpi.h>
+#endif
-Params *globalParam;
+Params *globalParams;
Alignment *globalAlignment;
extern StringIntMap pllTreeCounter;
-
IQTree::IQTree() : PhyloTree() {
IQTree::init();
}
@@ -46,8 +49,6 @@ void IQTree::init() {
k_delete = k_delete_min = k_delete_max = k_delete_stay = 0;
dist_matrix = NULL;
var_matrix = NULL;
- nni_count_est = 0.0;
- nni_delta_est = 0;
// curScore = 0.0; // Current score of the tree
cur_pars_score = -1;
// enable_parsimony = false;
@@ -55,8 +56,8 @@ void IQTree::init() {
nni_cutoff = -1e6;
nni_sort = false;
testNNI = false;
- print_tree_lh = false;
- write_intermediate_trees = 0;
+// print_tree_lh = false;
+// write_intermediate_trees = 0;
// max_candidate_trees = 0;
logl_cutoff = 0.0;
len_scale = 10000;
@@ -64,7 +65,22 @@ void IQTree::init() {
duplication_counter = 0;
//boot_splits = new SplitGraph;
pll2iqtree_pattern_index = NULL;
- fastNNI = true;
+
+ treels_name = Params::getInstance().out_prefix;
+ treels_name += ".treels";
+ out_lh_file = Params::getInstance().out_prefix;
+ out_lh_file += ".treelh";
+ site_lh_file = Params::getInstance().out_prefix;
+ site_lh_file += ".sitelh";
+
+ if (Params::getInstance().print_tree_lh) {
+ out_treelh.open(out_lh_file.c_str());
+ out_sitelh.open(site_lh_file.c_str());
+ }
+
+ if (Params::getInstance().write_intermediate_trees)
+ out_treels.open(treels_name.c_str());
+
}
IQTree::IQTree(Alignment *aln) : PhyloTree(aln) {
@@ -77,41 +93,49 @@ void IQTree::setCheckpoint(Checkpoint *checkpoint) {
candidateTrees.setCheckpoint(checkpoint);
}
-void IQTree::saveCheckpoint() {
- stop_rule.saveCheckpoint();
- candidateTrees.saveCheckpoint();
-
- if (boot_samples.size() > 0 && !boot_trees.front().empty()) {
- checkpoint->startStruct("UFBoot");
-// CKP_SAVE(max_candidate_trees);
- CKP_SAVE(logl_cutoff);
- // save boot_samples and boot_trees
- int id = 0;
+void IQTree::saveUFBoot(Checkpoint *checkpoint) {
+ checkpoint->startStruct("UFBoot");
+ if (MPIHelper::getInstance().isWorker()) {
+ CKP_SAVE(sample_start);
+ CKP_SAVE(sample_end);
checkpoint->startList(boot_samples.size());
+ checkpoint->setListElement(sample_start-1);
// TODO: save boot_trees_brlen
- for (vector<BootValType* >::iterator it = boot_samples.begin(); it != boot_samples.end(); it++, id++) {
+ for (int id = sample_start; id != sample_end; id++) {
checkpoint->addListElement();
stringstream ss;
ss.precision(10);
ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
checkpoint->put("", ss.str());
-// string &bt = boot_trees[id];
-// CKP_SAVE(bt);
-// double bl = boot_logl[id];
-// CKP_SAVE(bl);
-// double bol=boot_orig_logl[id];
-// CKP_SAVE(bol);
-// int bc = boot_counts[id];
-// CKP_SAVE(bc);
}
checkpoint->endList();
+ } else {
+ CKP_SAVE(logl_cutoff);
CKP_SAVE(boot_consense_logl);
int boot_splits_size = boot_splits.size();
CKP_SAVE(boot_splits_size);
- checkpoint->endStruct();
+ checkpoint->startList(boot_samples.size());
+ // TODO: save boot_trees_brlen
+ for (int id = 0; id != boot_samples.size(); id++) {
+ checkpoint->addListElement();
+ stringstream ss;
+ ss.precision(10);
+ ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
+ checkpoint->put("", ss.str());
+ }
+ checkpoint->endList();
+ }
+ checkpoint->endStruct();
+}
+void IQTree::saveCheckpoint() {
+ stop_rule.saveCheckpoint();
+ candidateTrees.saveCheckpoint();
+
+ if (boot_samples.size() > 0 && !boot_trees.front().empty()) {
+ saveUFBoot(checkpoint);
// boot_splits
- id = 0;
+ int id = 0;
for (vector<SplitGraph*>::iterator sit = boot_splits.begin(); sit != boot_splits.end(); sit++, id++) {
checkpoint->startStruct("UFBootSplit" + convertIntToString(id));
(*sit)->saveCheckpoint();
@@ -122,6 +146,27 @@ void IQTree::saveCheckpoint() {
PhyloTree::saveCheckpoint();
}
+void IQTree::restoreUFBoot(Checkpoint *checkpoint) {
+ checkpoint->startStruct("UFBoot");
+ // save boot_samples and boot_trees
+ int id;
+ checkpoint->startList(params->gbo_replicates);
+ int sample_start, sample_end;
+ CKP_RESTORE(sample_start);
+ CKP_RESTORE(sample_end);
+ checkpoint->setListElement(sample_start-1);
+ for (id = sample_start; id != sample_end; id++) {
+ checkpoint->addListElement();
+ string str;
+ checkpoint->getString("", str);
+ assert(!str.empty());
+ stringstream ss(str);
+ ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
+ }
+ checkpoint->endList();
+ checkpoint->endStruct();
+}
+
void IQTree::restoreCheckpoint() {
PhyloTree::restoreCheckpoint();
stop_rule.restoreCheckpoint();
@@ -144,18 +189,6 @@ void IQTree::restoreCheckpoint() {
checkpoint->getString("", str);
stringstream ss(str);
ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
-// string bt;
-// CKP_RESTORE(bt);
-// boot_trees[id] = bt;
-// double bl;
-// CKP_RESTORE(bl);
-// boot_logl[id] = bl;
-// double bol;
-// CKP_RESTORE(bol);
-// boot_orig_logl[id] = bol;
-// int bc;
-// CKP_RESTORE(bc);
-// boot_counts[id] = bc;
}
checkpoint->endList();
CKP_RESTORE(boot_consense_logl);
@@ -178,14 +211,16 @@ void IQTree::restoreCheckpoint() {
}
void IQTree::initSettings(Params ¶ms) {
- searchinfo.speednni = params.speednni;
+
searchinfo.nni_type = params.nni_type;
optimize_by_newton = params.optimize_by_newton;
- setLikelihoodKernel(params.SSE);
- candidateTrees.init(this->aln, ¶ms);
-// if (params.maxtime != 1000000) {
-// params.autostop = false;
-// }
+ if (num_threads > 0)
+ setLikelihoodKernel(params.SSE, num_threads);
+ else
+ setLikelihoodKernel(params.SSE, params.num_threads);
+ candidateTrees.init(this->aln, 200);
+ intermediateTrees.init(this->aln, 200000);
+
if (params.min_iterations == -1) {
if (!params.gbo_replicates) {
if (params.stop_condition == SC_UNSUCCESS_ITERATION) {
@@ -253,12 +288,12 @@ void IQTree::initSettings(Params ¶ms) {
nni_sort = params.nni_sort;
testNNI = params.testNNI;
- globalParam = ¶ms;
+ globalParams = ¶ms;
globalAlignment = aln;
- write_intermediate_trees = params.write_intermediate_trees;
+ //write_intermediate_trees = params.write_intermediate_trees;
- if (write_intermediate_trees > 2 || params.gbo_replicates > 0) {
+ if (Params::getInstance().write_intermediate_trees > 2 || params.gbo_replicates > 0) {
save_all_trees = 1;
}
if (params.gbo_replicates > 0) {
@@ -268,7 +303,7 @@ void IQTree::initSettings(Params ¶ms) {
}
// if (params.gbo_replicates > 0 && params.do_compression)
// save_all_br_lens = true;
- print_tree_lh = params.print_tree_lh;
+// print_tree_lh = params.print_tree_lh;
// max_candidate_trees = params.max_candidate_trees;
// if (max_candidate_trees == 0)
// max_candidate_trees = aln->getNSeq() * params.step_iterations;
@@ -294,6 +329,20 @@ void IQTree::initSettings(Params ¶ms) {
cout << "Generating " << params.gbo_replicates << " samples for ultrafast bootstrap (seed: " << params.ran_seed << ")..." << endl;
// allocate memory for boot_samples
boot_samples.resize(params.gbo_replicates);
+ sample_start = 0;
+ sample_end = boot_samples.size();
+
+ // compute the sample_start and sample_end
+ if (MPIHelper::getInstance().getNumProcesses() > 1) {
+ int num_samples = boot_samples.size() / MPIHelper::getInstance().getNumProcesses();
+ if (boot_samples.size() % MPIHelper::getInstance().getNumProcesses() != 0)
+ num_samples++;
+ sample_start = MPIHelper::getInstance().getProcessID() * num_samples;
+ sample_end = sample_start + num_samples;
+ if (sample_end > boot_samples.size())
+ sample_end = boot_samples.size();
+ }
+
size_t orig_nptn = getAlnNPattern();
#ifdef BOOT_VAL_FLOAT
size_t nptn = get_safe_upper_limit_float(orig_nptn);
@@ -488,17 +537,7 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
params->numNNITrees = params->min_iterations;
int fixed_number = 0;
setParsimonyKernel(kernel);
-
- candidateTrees.init(aln, params);
-// restoreCheckpoint();
-// if (leafNum != 0) {
-// if (!candidateTrees.empty()) {
-// readTreeString(candidateTrees.getTopTrees(1)[0]);
-// cout << endl << "CHECKPOINT: Current best tree restored, LogL: " << candidateTrees.getBestScore() << endl;
-// } else
-// cout << endl << "CHECKPOINT: Initial tree restored" << endl;
-// return;
-// } else
+
if (params->user_file) {
// start the search with user-defined tree
cout << "Reading input tree file " << params->user_file << " ..." << endl;
@@ -511,18 +550,17 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
fixed_number = wrapperFixNegativeBranch(false);
params->numInitTrees = 1;
params->numNNITrees = 1;
- // change to old kernel if tree is multifurcating
-// if ((params->SSE == LK_EIGEN || params->SSE == LK_EIGEN_SSE) && !isBifurcating()) {
-// cout << "NOTE: Changing to old kernel as input tree is multifurcating" << endl;
-// params->SSE = LK_SSE;
-// }
if (params->pll)
pllReadNewick(getTreeString());
} else if (CKP_RESTORE(initTree)) {
readTreeString(initTree);
cout << endl << "CHECKPOINT: Initial tree restored" << endl;
} else {
- switch (params->start_tree) {
+ START_TREE_TYPE start_tree = params->start_tree;
+ // only own parsimony kernel supports constraint tree
+ if (!constraintTree.empty())
+ start_tree = STT_PARSIMONY;
+ switch (start_tree) {
case STT_PARSIMONY:
// Create parsimony tree using IQ-Tree kernel
if (kernel == LK_EIGEN_SSE)
@@ -543,7 +581,7 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
case STT_PLL_PARSIMONY:
cout << endl;
cout << "Create initial parsimony tree by phylogenetic likelihood library (PLL)... ";
- pllInst->randomNumberSeed = params->ran_seed;
+ pllInst->randomNumberSeed = params->ran_seed + MPIHelper::getInstance().getProcessID();
pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
resetBranches(pllInst);
pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back,
@@ -569,6 +607,9 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
checkpoint->dump(true);
}
+ if (!constraintTree.isCompatible(this))
+ outError("Initial tree is not compatible with constraint tree");
+
if (fixed_number) {
cout << "WARNING: " << fixed_number << " undefined/negative branch lengths are initialized with parsimony" << endl;
}
@@ -587,7 +628,31 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
}
}
-void IQTree::createInitTrees(int nParTrees) {
+int IQTree::addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID) {
+ double curBestScore = candidateTrees.getBestScore();
+ int pos = candidateTrees.update(treeString, score);
+ if (updateStopRule) {
+ stop_rule.setCurIt(stop_rule.getCurIt() + 1);
+ if (score > curBestScore) {
+ if (pos != -1) {
+ stop_rule.addImprovedIteration(stop_rule.getCurIt());
+ cout << "BETTER TREE FOUND at iteration " << stop_rule.getCurIt() << ": " << score << endl;
+ } else {
+ cout << "UPDATE BEST LOG-LIKELIHOOD: " << score << endl;
+ }
+ bestcandidate_changed = true;
+ // COMMENT OUT: not safe with MPI version
+// printResultTree();
+ }
+
+ curScore = score;
+ printIterationInfo(sourceProcID);
+ }
+ return pos;
+}
+
+void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
+
if (nParTrees > 0) {
if (params->start_tree == STT_RANDOM_TREE)
cout << "Generating " << nParTrees << " random trees... ";
@@ -596,17 +661,22 @@ void IQTree::createInitTrees(int nParTrees) {
cout.flush();
}
double startTime = getRealTime();
- int numDupPars = 0;
+
#ifdef _OPENMP
StrVector pars_trees;
if (params->start_tree == STT_PARSIMONY && nParTrees >= 1) {
pars_trees.resize(nParTrees);
+ if (aln->ordered_pattern.empty())
+ aln->orderPatternByNumChars();
#pragma omp parallel
{
PhyloTree tree;
+ if (params->constraint_tree_file) {
+ tree.constraintTree.initConstraint(params->constraint_tree_file, aln->getSeqNames());
+ }
tree.setParams(params);
tree.setParsimonyKernel(params->SSE);
- #pragma omp for
+ #pragma omp for schedule(dynamic)
for (int i = 0; i < nParTrees; i++) {
tree.computeParsimonyTree(NULL, aln);
pars_trees[i] = tree.getTreeString();
@@ -614,12 +684,18 @@ void IQTree::createInitTrees(int nParTrees) {
}
}
#endif
+
+ int init_size = candidateTrees.size();
+
+ int processID = MPIHelper::getInstance().getProcessID();
+// unsigned long curNumTrees = candidateTrees.size();
for (int treeNr = 1; treeNr <= nParTrees; treeNr++) {
+ int parRandSeed = Params::getInstance().ran_seed + processID * nParTrees + treeNr;
string curParsTree;
/********* Create parsimony tree using PLL *********/
if (params->start_tree == STT_PLL_PARSIMONY) {
- pllInst->randomNumberSeed = params->ran_seed + treeNr * 12345;
+ pllInst->randomNumberSeed = parRandSeed;
pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
resetBranches(pllInst);
pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
@@ -636,153 +712,163 @@ void IQTree::createInitTrees(int nParTrees) {
} else if (params->start_tree == STT_PARSIMONY) {
/********* Create parsimony tree using IQ-TREE *********/
#ifdef _OPENMP
- curParsTree = pars_trees[treeNr-1];
+ if (params->start_tree == STT_PARSIMONY)
+ curParsTree = pars_trees[treeNr-1];
+ else
+ curParsTree = generateParsimonyTree(parRandSeed);
#else
- computeParsimonyTree(NULL, aln);
- curParsTree = getTreeString();
+ curParsTree = generateParsimonyTree(parRandSeed);
#endif
- } else {
- assert(0);
}
-
- if (candidateTrees.treeExist(curParsTree)) {
- numDupPars++;
- continue;
- } else {
- if (params->count_trees) {
- string tree = getTopology();
- if (pllTreeCounter.find(tree) == pllTreeCounter.end()) {
- // not found in hash_map
- pllTreeCounter[curParsTree] = 1;
- } else {
- // found in hash_map
- pllTreeCounter[curParsTree]++;
- }
- }
- candidateTrees.update(curParsTree, -DBL_MAX, false);
+
+ int pos = addTreeToCandidateSet(curParsTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
+ // if a duplicated tree is generated, then randomize the tree
+ if (pos == -1) {
+ readTreeString(curParsTree);
+ string randTree = doRandomNNIs();
+ addTreeToCandidateSet(randTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
}
}
-
- double parsTime = getRealTime() - startTime;
- if (nParTrees > 0) {
- cout << parsTime << " seconds ";
- cout << candidateTrees.size() << " distinct starting trees" << endl;
- }
+ if (nParTrees > 0)
+ cout << getRealTime() - startTime << " second" << endl;
/****************************************************************************************
- Compute logl of all parsimony trees
+ Compute logl of all initial trees
*****************************************************************************************/
- cout << "Computing log-likelihood of " << candidateTrees.size() << " initial trees ... ";
+ vector<string> initTreeStrings = candidateTrees.getBestTreeStrings();
+ candidateTrees.clear();
+
+ if (init_size < initTreeStrings.size())
+ cout << "Computing log-likelihood of " << initTreeStrings.size() - init_size << " initial trees ... ";
startTime = getRealTime();
-// CandidateSet candTrees = candidateTrees.getBestCandidateTrees(candidateTrees.size());
- CandidateSet candTrees = candidateTrees;
- for (CandidateSet::iterator it = candTrees.begin(); it != candTrees.end(); ++it) {
+ for (vector<string>::iterator it = initTreeStrings.begin(); it != initTreeStrings.end(); ++it) {
string treeString;
double score;
- if (it->first == -DBL_MAX) {
- readTreeString(it->second.tree);
+ readTreeString(*it);
+ if (it-initTreeStrings.begin() >= init_size)
treeString = optimizeBranches(2);
- score = getCurScore();
- } else {
- treeString = it->second.tree;
- score = it->first;
+ else {
+ computeLogL();
+ treeString = getTreeString();
}
- candidateTrees.update(treeString, score);
+ score = getCurScore();
+ candidateTrees.update(treeString,score);
}
-
- if (verbose_mode >= VB_MED) {
- vector<double> bestScores = candidateTrees.getBestScores(candidateTrees.size());
- for (vector<double>::iterator it = bestScores.begin(); it != bestScores.end(); it++)
- cout << (*it) << " ";
- cout << endl;
+
+ if (Params::getInstance().writeDistImdTrees)
+ intermediateTrees.initTrees(candidateTrees);
+
+ if (init_size < initTreeStrings.size())
+ cout << getRealTime() - startTime << " seconds" << endl;
+
+ if (nParTrees > 0) {
+ cout << "Current best score: " << candidateTrees.getBestScore() << endl;
}
+/*
+ //---- NON-BLOCKING COMMUNICATION
+#ifdef _IQTREE_MPI
+ vector<string> trees;
+ vector<double> scores;
+ // FIX BUG: send candidateTrees instead of intermediateTrees
+ candidateTrees.getAllTrees(trees, scores, WT_TAXON_ID + WT_BR_LEN + WT_BR_LEN_SHORT);
+ // Send all trees to other processes
+ MPIHelper::getInstance().distributeTrees(trees, scores, TREE_TAG);
+
+ // Get trees from other nodes
+ cout << "Getting initial trees from other processes ... " << endl;
+ int maxNumTrees = (nParTrees + 2) * (MPIHelper::getInstance().getNumProcesses() - 1);
+ MPI_CollectTrees(true, maxNumTrees, false);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+*/
- double loglTime = getRealTime() - startTime;
- cout << loglTime << " seconds" << endl;
-}
+ //---- BLOCKING COMMUNICATION
+ syncCandidateTrees(nNNITrees, false);
-void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
- bool finishedInitTree = checkpoint->getBool("finishedInitTree");
+ vector<string> bestInitTrees; // Set of best initial trees for doing NNIs
- if (finishedInitTree) {
- cout << "CHECKPOINT: " << min(nParTrees, (int)candidateTrees.size()) << " initial trees restored" << endl;
- } else {
- createInitTrees(nParTrees);
- checkpoint->putBool("finishedInitTree", true);
- saveCheckpoint();
- checkpoint->dump();
- }
+ bestInitTrees = candidateTrees.getBestTreeStringsForProcess(nNNITrees);
- // Only select the best nNNITrees for doing NNI search
- CandidateSet initParsimonyTrees = candidateTrees.getBestCandidateTrees(nNNITrees);
+ cout << endl;
+ cout << "Do NNI search on " << bestInitTrees.size() << " best initial trees" << endl;
+ stop_rule.setCurIt(0);
candidateTrees.clear();
+ candidateTrees.setMaxSize(Params::getInstance().numSupportTrees);
- cout << "Optimizing top " << initParsimonyTrees.size() << " initial trees with NNI..." << endl;
- double startTime = getCPUTime();
- /*********** START: Do NNI on the best parsimony trees ************************************/
- CandidateSet::reverse_iterator rit = initParsimonyTrees.rbegin();
-
-// stop_rule.setCurIt(0);
- if (stop_rule.getCurIt() > 0) {
- int step = stop_rule.getCurIt();
- for (; rit != initParsimonyTrees.rend() && step > 0; ++rit, step--) {
- // increase iterator accordingly
- candidateTrees.update(rit->second.tree, rit->first);
- }
- cout << "CHECKPOINT: " << stop_rule.getCurIt() << " initial iterations restored" << endl;
+ for (vector<string>::iterator it = bestInitTrees.begin(); it != bestInitTrees.end(); it++) {
+ readTreeString(*it);
+ doNNISearch();
+ string treeString = getTreeString();
+ addTreeToCandidateSet(treeString, curScore, true, MPIHelper::getInstance().getProcessID());
+ if (Params::getInstance().writeDistImdTrees)
+ intermediateTrees.update(treeString, curScore);
+//#ifdef _IQTREE_MPI
+// MPIHelper::getInstance().distributeTree(getTreeString(), curScore, TREE_TAG);
+// MPI_CollectTrees(false, maxNumTrees, true);
+//#endif
}
- for (; rit != initParsimonyTrees.rend(); ++rit) {
- stop_rule.setCurIt(stop_rule.getCurIt() + 1);
- int nniCount, nniStep;
- double initLogl, nniLogl;
- string tree;
- readTreeString(rit->second.tree);
- computeLogL();
-// THIS HAPPEN WHENEVER USING FULL PARTITION MODEL
-// if (isSuperTree() && params->partition_type == 0) {
-// if (verbose_mode >= VB_MED)
-// cout << "curScore: " << getCurScore() << " expected score: " << rit->first << endl;
-// optimizeBranches(2);
-// }
- initLogl = getCurScore();
- tree = doNNISearch(nniCount, nniStep);
- nniLogl = getCurScore();
- cout << "Iteration " << stop_rule.getCurIt() << " / LogL: " << getCurScore();
- if (verbose_mode >= VB_MED) {
- cout << " / NNI count, steps: " << nniCount << "," << nniStep;
- cout << " / Parsimony logl " << initLogl << " / NNI logl: " << nniLogl;
- }
- cout << " / Time: " << convert_time(getRealTime() - params->start_real_time) << endl;
-
- bool betterScore = false;
- // Better tree or score is found
- if (getCurScore() > candidateTrees.getBestScore() + params->modeps) {
- // Re-optimize model parameters (the sNNI algorithm)
- tree = optimizeModelParameters(false, params->modeps * 10);
- getModelFactory()->saveCheckpoint();
- betterScore = true;
- }
- bool newTree = candidateTrees.update(tree, getCurScore());
- if (betterScore) {
- if (newTree && nniCount != 0)
- cout << "BETTER TREE FOUND at iteration " << stop_rule.getCurIt() << ": "
- << getCurScore() << endl;
- else
- cout << "BETTER SCORE FOUND at iteration " << stop_rule.getCurIt() << ": "
- << getCurScore() << endl;
- }
- saveCheckpoint();
- checkpoint->dump();
-// if (params.partition_type)
-// ((PhyloSuperTreePlen*)&iqtree)->printNNIcasesNUM();
+
+ //---- BLOCKING COMMUNICATION
+ syncCandidateTrees(Params::getInstance().numSupportTrees, true);
+
+/*
+#ifdef _IQTREE_MPI
+ //------ NON-BLOCKING COMMUNICATION
+ // FIX BUG: send candidateTrees instead of intermediateTrees
+ candidateTrees.getAllTrees(trees, scores, WT_TAXON_ID + WT_BR_LEN + WT_BR_LEN_SHORT);
+ // Send all trees to other processes
+ MPIHelper::getInstance().distributeTrees(trees, scores, TREE_TAG);
+
+ // Get trees from other nodes
+ cout << "Getting top candidate trees from other processes ... " << endl;
+ MPI_CollectTrees(true, maxNumTrees, true);
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+*/
+
+// #ifdef _IQTREE_MPI
+// // Send trees
+// MPIHelper::getInstance().distributeTrees(nniTrees, nniScores, TREE_TAG);
+// MPI_Barrier(MPI_COMM_WORLD);
+// // Receive trees
+// maxNumTrees = treesPerProc * (MPIHelper::getInstance().getNumProcesses() - 1);
+// MPI_CollectTrees(true,maxNumTrees,true);
+// #endif
+// if (params->fixStableSplits && candidateTrees.size() > 1) {
+// candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold, Params::getInstance().numSupportTrees);
+// }
+}
+
+string IQTree::generateParsimonyTree(int randomSeed) {
+ string parsimonyTreeString;
+ if (params->start_tree == STT_PLL_PARSIMONY) {
+ pllInst->randomNumberSeed = randomSeed;
+ pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst,
+ pllPartitions, params->sprDist);
+ resetBranches(pllInst);
+ pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
+ pllInst->start->back, PLL_FALSE, PLL_TRUE, PLL_FALSE,
+ PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
+ parsimonyTreeString = string(pllInst->tree_string);
+ PhyloTree::readTreeString(parsimonyTreeString);
+ wrapperFixNegativeBranch(true);
+ parsimonyTreeString = getTreeString();
+ } else if (params->start_tree == STT_RANDOM_TREE) {
+ generateRandomTree(YULE_HARDING);
+ wrapperFixNegativeBranch(true);
+ parsimonyTreeString = getTreeString();
+ } else {
+ computeParsimonyTree(NULL, aln);
+ parsimonyTreeString = getTreeString();
}
- double nniTime = getCPUTime() - startTime;
- cout << "Average CPU time for 1 NNI search: " << nniTime / initParsimonyTrees.size() << endl;
+ return parsimonyTreeString;
+
}
void IQTree::initializePLL(Params ¶ms) {
@@ -791,11 +877,11 @@ void IQTree::initializePLL(Params ¶ms) {
pllAttr.saveMemory = PLL_FALSE;
pllAttr.useRecom = PLL_FALSE;
pllAttr.randomNumberSeed = params.ran_seed;
- pllAttr.numberOfThreads = params.num_threads; /* This only affects the pthreads version */
+ pllAttr.numberOfThreads = max(params.num_threads, 1); /* This only affects the pthreads version */
if (pllInst != NULL) {
pllDestroyInstance(pllInst);
}
- /* Create a PLL instance */
+ /* Create a PLL getInstance */
pllInst = pllCreateInstance(&pllAttr);
/* Read in the alignment file */
@@ -1291,68 +1377,249 @@ void IQTree::doParsimonyReinsertion() {
fixNegativeBranch(false);
}
-
-int IQTree::removeBranches(NodeVector& nodes1, NodeVector& nodes2, SplitGraph& splits) {
- if (splits.size() == 0)
- return 0;
- NodeVector _nodes1, _nodes2;
- NodeVector::iterator it1, it2;
- _nodes1 = nodes1;
- _nodes2 = nodes2;
- nodes1.clear();
- nodes2.clear();
- for (it1 = _nodes1.begin(), it2 = _nodes2.begin(); it1 != _nodes1.end() && it2 != _nodes2.end(); it1++, it2++) {
- Split* sp = getSplit(*it1, *it2);
- if (!splits.containSplit(*sp)) {
- nodes1.push_back(*it1);
- nodes2.push_back(*it2);
+void IQTree::getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches) {
+ if (tabuSplits.size() == 0) {
+ return;
+ }
+ for (Branches::iterator it = allBranches.begin(); it != allBranches.end(); it++) {
+ if (isInnerBranch(it->second.first, it->second.second)) {
+ int nodeID1 = it->second.first->id;
+ int nodeID2 = it->second.second->id;
+ Branch curBranch = it->second;
+ Split* sp = getSplit(it->second.first, it->second.second);
+ if (!tabuSplits.containSplit(*sp)) {
+ nonTabuBranches.insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
+ } else {
+ if (tabuBranches != NULL) {
+ tabuBranches->insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
+ }
+ }
+ delete sp;
}
- delete sp;
+
}
- return (_nodes1.size() - nodes1.size());
}
-void IQTree::doRandomNNIs(int numNNI) {
- NodeVector nodes1, nodes2;
- //SplitGraph usedSplits;
- NodeVector::iterator it1, it2;
- int cntNNI = 0;
- while (cntNNI < numNNI) {
- nodes1.clear();
- nodes2.clear();
- getAllInnerBranches(nodes1, nodes2, &candidateTrees.getStableSplits());
- // remove all used splits
- //removeBranches(nodes1, nodes2, usedSplits);
- if (nodes1.size() == 0) {
- assert(nodes2.size() == 0);
- break;
- }
- // randomly take an inner branch and do a random NNI
- int index = random_int(nodes1.size());
- doOneRandomNNI(nodes1[index], nodes2[index]);
-// if (params->fix_stable_splits) {
-// Split* newSp = getSplit(nodes1[index], nodes2[index]);
-// usedSplits.push_back(newSp);
-// }
- cntNNI++;
+void IQTree::getSplitBranches(Branches &branches, SplitIntMap &splits, Node *node, Node *dad) {
+ if (!node) {
+ node = root;
+ }
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if (isInnerBranch((*it)->node, node)) {
+ Branch curBranch;
+ curBranch.first = (*it)->node;
+ curBranch.second = node;
+ Split* curSplit;
+ Split *sp = (*it)->split;
+ assert(sp != NULL);
+ curSplit = new Split(*sp);
+ if (curSplit->shouldInvert())
+ curSplit->invert();
+ if (splits.findSplit(curSplit) != NULL) {
+ //curSplit->report(cout);
+ branches.insert(pair<int,Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
+ }
+ delete curSplit;
+ }
+ getSplitBranches(branches, splits, (*it)->node, node);
+ }
+}
+
+bool IQTree::shouldEvaluate(Split *curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits) {
+ bool answer = true;
+ /******************** CHECK TABU SPLIT **************************/
+ if (tabuSplits.findSplit(curSplit) != NULL) {
+ answer = false;
+ } else if (!candSplits.empty()) {
+ Split *_curSplit;
+ /******************** CHECK STABLE SPLIT **************************/
+ int value;
+ _curSplit = candSplits.findSplit(curSplit, value);
+ if (_curSplit == NULL || _curSplit->getWeight() <= params->stableSplitThreshold) {
+ answer = true;
+ } else { // add a stable branch with a certain probability
+ double rndDbl = random_double();
+ if (rndDbl > params->stableSplitThreshold) {
+ answer = true;
+ } else {
+ answer = false;
+ }
+ }
+ } else {
+ answer = true;
+ }
+ return answer;
+}
+
+
+void IQTree::getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candSplits,Branches &nonNNIBranches, Branches &nniBranches, Node *node, Node *dad) {
+ if (!node) {
+ node = root;
+ }
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if (isInnerBranch((*it)->node, node)) {
+ Branch curBranch;
+ curBranch.first = (*it)->node;
+ curBranch.second = node;
+ int branchID = pairInteger(curBranch.first->id, curBranch.second->id);
+
+ if (params->fixStableSplits) {
+ Split *curSplit;
+ Split *sp = (*it)->split;
+ assert(sp != NULL);
+ curSplit = new Split(*sp);
+ if (curSplit->shouldInvert())
+ curSplit->invert();
+ if (shouldEvaluate(curSplit, tabuSplits, candSplits)) {
+ nniBranches.insert(pair<int, Branch>(branchID, curBranch));
+ } else {
+ nonNNIBranches.insert(pair<int, Branch>(branchID, curBranch));
+ }
+ delete curSplit;
+ } else {
+ nniBranches.insert(pair<int, Branch>(branchID, curBranch));
+ }
+ }
+ getNNIBranches(tabuSplits, candSplits, nonNNIBranches, nniBranches, (*it)->node, node);
+ }
+}
+
+void IQTree::getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &stableBranches, Node *node, Node *dad) {
+ if (!node) {
+ node = root;
+ }
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if (isInnerBranch((*it)->node, node)) {
+ Branch curBranch;
+ curBranch.first = (*it)->node;
+ curBranch.second = node;
+ Split *curSplit;
+ Split *sp = (*it)->split;
+ assert(sp != NULL);
+ curSplit = new Split(*sp);
+ if (curSplit->shouldInvert())
+ curSplit->invert();
+ int occurences;
+ sp = candSplits.findSplit(curSplit, occurences);
+ if (sp != NULL) {
+ if ( sp->getWeight() >= supportValue) {
+ stableBranches.insert(
+ pair<int, Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
+ }
+ }
+ delete curSplit;
+ }
+ getStableBranches(candSplits, supportValue, stableBranches, (*it)->node, node);
+ }
+}
+
+string IQTree::perturbStableSplits(double suppValue) {
+ int numRandNNI = 0;
+ Branches stableBranches;
+// initTabuSplits.clear();
+// stableBranches = getStableBranches(candidateTrees.getCandSplits(), suppValue);
+// int maxRandNNI = stableBranches.size() / 2;
+ do {
+ getStableBranches(candidateTrees.getCandSplits(), suppValue, stableBranches);
+ vector<NNIMove> randomNNIs;
+ vector<NNIMove> compatibleNNIs;
+ for (map<int, Branch>::iterator it = stableBranches.begin(); it != stableBranches.end(); it++) {
+ NNIMove randNNI = getRandomNNI(it->second);
+ if (constraintTree.isCompatible(randNNI))
+ randomNNIs.push_back(randNNI);
+ }
+ getCompatibleNNIs(randomNNIs, compatibleNNIs);
+ for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
+ doNNI(*it);
+ numRandNNI++;
+// Split *sp = getSplit(it->node1, it->node2);
+// Split *tabuSplit = new Split(*sp);
+// if (tabuSplit->shouldInvert()) {
+// tabuSplit->invert();
+// }
+// initTabuSplits.insertSplit(tabuSplit, 1);
+ }
+ } while (stableBranches.size() > 0);
+
+ if (verbose_mode >= VB_MAX) {
+ cout << "Tree perturbation: number of random NNI performed = " << numRandNNI << endl;
}
- //cout << "Number of random NNI performed: " << cntNNI << endl;
setAlignment(aln);
setRootNode(params->root);
+ clearAllPartialLH();
+
if (isSuperTree()) {
((PhyloSuperTree*) this)->mapTrees();
}
+ if (params->pll) {
+ pllReadNewick(getTreeString());
+ }
+
+ resetCurScore();
+ return getTreeString();
+}
+
+string IQTree::doRandomNNIs(bool storeTabu) {
+ int cntNNI = 0;
+ int numRandomNNI;
+ Branches nniBranches;
+ Branches nonNNIBranches;
+ if (storeTabu) {
+ Branches stableBranches;
+ getStableBranches(candidateTrees.getCandSplits(), Params::getInstance().stableSplitThreshold, stableBranches);
+ int numNonStableBranches = leafNum - 3 - stableBranches.size();
+ numRandomNNI = numNonStableBranches;
+ } else {
+ numRandomNNI = floor((leafNum - 3) * Params::getInstance().initPS);
+ }
+
+ initTabuSplits.clear();
+ while (cntNNI < numRandomNNI) {
+ nniBranches.clear();
+ nonNNIBranches.clear();
+ getNNIBranches(initTabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
+ if (nniBranches.size() == 0) break;
+ // Convert the map data structure Branches to vector of Branch
+ vector<Branch> vectorNNIBranches;
+ for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); ++it) {
+ vectorNNIBranches.push_back(it->second);
+ }
+ int randInt = random_int((int) vectorNNIBranches.size());
+ NNIMove randNNI = getRandomNNI(vectorNNIBranches[randInt]);
+ if (constraintTree.isCompatible(randNNI)) {
+ // only if random NNI satisfies constraintTree
+ doNNI(randNNI);
+ if (storeTabu) {
+ Split *sp = getSplit(randNNI.node1, randNNI.node2);
+ Split *tabuSplit = new Split(*sp);
+ if (tabuSplit->shouldInvert()) {
+ tabuSplit->invert();
+ }
+ initTabuSplits.insertSplit(tabuSplit, 1);
+ }
+ }
+ cntNNI++;
+ }
+ if (verbose_mode >= VB_MAX)
+ cout << "Tree perturbation: number of random NNI performed = " << cntNNI << endl;
+ setAlignment(aln);
+ setRootNode(params->root);
+ if (isSuperTree()) {
+ ((PhyloSuperTree*) this)->mapTrees();
+ }
if (params->pll) {
pllReadNewick(getTreeString());
}
+ clearAllPartialLH();
resetCurScore();
+ return getTreeString();
}
-
void IQTree::doIQP() {
if (verbose_mode >= VB_DEBUG)
drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
@@ -1732,9 +1999,7 @@ extern pllUFBootData * pllUFBootDataPtr;
string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
if (logl_epsilon == -1)
- logl_epsilon = params->modeps;
-// if (params->opt_gammai)
-// logl_epsilon = 0.1;
+ logl_epsilon = params->modelEps;
cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl;
double stime = getRealTime();
string newTree;
@@ -1791,14 +2056,14 @@ string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
return newTree;
}
-void IQTree::printBestScores(int numBestScore) {
+void IQTree::printBestScores() {
vector<double> bestScores = candidateTrees.getBestScores(params->popSize);
for (vector<double>::iterator it = bestScores.begin(); it != bestScores.end(); it++)
cout << (*it) << " ";
cout << endl;
}
-void IQTree::computeLogL() {
+double IQTree::computeLogL() {
if (params->pll) {
if (curScore == -DBL_MAX) {
pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
@@ -1813,7 +2078,7 @@ void IQTree::computeLogL() {
// }
curScore = computeLikelihood();
}
-// lhComputed = true;
+ return curScore;
}
string IQTree::optimizeBranches(int maxTraversal) {
@@ -1834,83 +2099,167 @@ string IQTree::optimizeBranches(int maxTraversal) {
// clearAllPartialLH();
// lhComputed = true;
// }
- curScore = optimizeAllBranches(maxTraversal);
+ curScore = optimizeAllBranches(maxTraversal, params->loglh_epsilon, PLL_NEWZPERCYCLE);
tree = getTreeString();
}
return tree;
}
+void IQTree::collectBootTrees() {
+#ifdef _IQTREE_MPI
+ if (boot_trees.size() == 0)
+ return;
+ // send UFBoot trees between processes
+ if (MPIHelper::getInstance().isMaster()) {
+ MPIHelper::getInstance().sendMsg(BOOT_TAG, "BOOT TREES PLEASE!");
+ TreeCollection trees;
+ int count = 0;
+ do {
+ int source = MPIHelper::getInstance().receiveTrees(trees, BOOT_TREE_TAG);
+ if (source > 0) {
+ count++;
+ assert(trees.getNumTrees() == boot_trees.size());
+ int better_trees = 0;
+ for (int id = 0; id < trees.getNumTrees(); id++)
+ if (trees.getScores()[id] > boot_logl[id]) {
+ boot_trees[id] = trees.getTreeStrings()[id];
+ boot_logl[id] = trees.getScores()[id];
+ better_trees++;
+ }
+ trees.clear();
+ cout << better_trees << " better bootstrap trees from process " << source << endl;
+ }
+ } while (count < MPIHelper::getInstance().getNumProcesses()-1);
+ } else {
+ // worker
+ if (MPIHelper::getInstance().checkMsg(BOOT_TAG))
+ MPIHelper::getInstance().sendTrees(PROC_MASTER, boot_trees, boot_logl, BOOT_TREE_TAG);
+ string msg;
+ if (MPIHelper::getInstance().checkMsg(LOGL_CUTOFF_TAG, msg)) {
+ logl_cutoff = convert_double(msg.c_str());
+ cout << "Log-likelihood cutoff on original alignment: " << logl_cutoff << endl;
+ }
+ }
+#endif
+}
+
double IQTree::doTreeSearch() {
cout << "--------------------------------------------------------------------" << endl;
- cout << "| OPTIMIZING CANDIDATE TREE SET |" << endl;
+ cout << "| INITIALIZING CANDIDATE TREE SET |" << endl;
cout << "--------------------------------------------------------------------" << endl;
- // PLEASE PRINT TREE HERE!
- printResultTree();
- string treels_name = params->out_prefix;
- treels_name += ".treels";
- string out_lh_file = params->out_prefix;
- out_lh_file += ".treelh";
- string site_lh_file = params->out_prefix;
- site_lh_file += ".sitelh";
- if (params->print_tree_lh) {
- out_treelh.open(out_lh_file.c_str());
- out_sitelh.open(site_lh_file.c_str());
+ double initCPUTime = getRealTime();
+ int treesPerProc = (params->numInitTrees) / MPIHelper::getInstance().getNumProcesses() - candidateTrees.size();
+ if (params->numInitTrees % MPIHelper::getInstance().getNumProcesses() != 0) {
+ treesPerProc++;
}
+ if (treesPerProc < 0)
+ treesPerProc = 0;
+ // Master node does one tree less because it already created the BIONJ tree
+// if (MPIHelper::getInstance().isMaster()) {
+// treesPerProc--;
+// }
- if (params->write_intermediate_trees)
- out_treels.open(treels_name.c_str());
+ // Make sure to get at least 1 tree
+ if (treesPerProc < 1 && params->numInitTrees > candidateTrees.size())
+ treesPerProc = 1;
- if (params->write_intermediate_trees && save_all_trees != 2) {
- printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
- }
+ /* Initialize candidate tree set */
+ if (!getCheckpoint()->getBool("finishedCandidateSet")) {
+ initCandidateTreeSet(treesPerProc, params->numNNITrees);
+ // write best tree to disk
+ printBestCandidateTree();
+ saveCheckpoint();
+ getCheckpoint()->putBool("finishedCandidateSet", true);
+ getCheckpoint()->dump(true);
+ } else {
+ cout << "CHECKPOINT: Candidate tree set restored, best LogL: " << candidateTrees.getBestScore() << endl;
+ }
+ assert(candidateTrees.size() != 0);
+ cout << "Finish initializing candidate tree set (" << candidateTrees.size() << ")" << endl;
+
+
+ cout << "Current best tree score: " << candidateTrees.getBestScore() << " / CPU time: " <<
+ getRealTime() - initCPUTime << endl;
+ cout << "Number of iterations: " << stop_rule.getCurIt() << endl;
+
+// string treels_name = params->out_prefix;
+// treels_name += ".treels";
+// string out_lh_file = params->out_prefix;
+// out_lh_file += ".treelh";
+// string site_lh_file = params->out_prefix;
+// site_lh_file += ".sitelh";
+//
+// if (params->print_tree_lh) {
+// out_treelh.open(out_lh_file.c_str());
+// out_sitelh.open(site_lh_file.c_str());
+// }
+
+// if (params->write_intermediate_trees)
+// out_treels.open(treels_name.c_str());
+
+// if (params->write_intermediate_trees && save_all_trees != 2) {
+// printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
+// }
setRootNode(params->root);
- // keep the best tree into a string
- //stringstream bestTreeStream;
- //stringstream bestTopoStream;
-// string perturb_tree_string;
- string imd_tree;
- //printTree(bestTreeStream, WT_TAXON_ID + WT_BR_LEN);
- //printTree(bestTopoStream, WT_TAXON_ID + WT_SORT_TAXA);
- //string best_tree_topo = bestTopoStream.str();
-
- // if not zero, it means already recovered from checkpoint
- if (stop_rule.getLastImprovedIteration() == 0)
- stop_rule.addImprovedIteration(1);
- else
- cout << "CHECKPOINT: " << stop_rule.getCurIt() << " search iterations restored" << endl;
+
+ if (!getCheckpoint()->getBool("finishedCandidateSet"))
+ cout << "CHECKPOINT: " << stop_rule.getCurIt() << " search iterations restored" << endl;
+
searchinfo.curPerStrength = params->initPS;
+ double cur_correlation = 0.0;
- double cur_correlation = 0.0;
- /*====================================================
- * MAIN LOOP OF THE IQ-TREE ALGORITHM
- *====================================================*/
- while(!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
- stop_rule.setCurIt(stop_rule.getCurIt() + 1);
+ if ((Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation) && candidateTrees.size() > 1) {
+ candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
+ }
+
+ // tracking of worker candidate set is changed from master candidate set
+ candidateset_changed.resize(MPIHelper::getInstance().getNumProcesses(), false);
+ bestcandidate_changed = false;
+
+ /*==============================================================================================================
+ MAIN LOOP OF THE IQ-TREE ALGORITHM
+ *=============================================================================================================*/
+
+ bool optimization_looped = false;
+ if (!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
+ cout << "--------------------------------------------------------------------" << endl;
+ cout << "| OPTIMIZING CANDIDATE TREE SET |" << endl;
+ cout << "--------------------------------------------------------------------" << endl;
+ optimization_looped = true;
+ }
+
+ // count threshold for computing bootstrap correlation
+ int ufboot_count, ufboot_count_check;
+ stop_rule.getUFBootCountCheck(ufboot_count, ufboot_count_check);
+
+ while (!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
+
+/*
+#ifdef _IQTREE_MPI
+ // check stopping rule
+ if (MPIHelper::getInstance().isMaster()) {
+ if (stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
+ MPIHelper::getInstance().sendMsg(STOP_TAG, "STOP!");
+ break;
+ }
+ } else {
+ if(MPIHelper::getInstance().checkMsg(STOP_TAG)) {
+ break;
+ }
+ }
+#else
+ if (stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation))
+ break;
+#endif
+*/
searchinfo.curIter = stop_rule.getCurIt();
// estimate logl_cutoff for bootstrap
if (!boot_orig_logl.empty())
logl_cutoff = *min_element(boot_orig_logl.begin(), boot_orig_logl.end());
-// if (/*params->avoid_duplicated_trees && max_candidate_trees > 0 &&*/ stop_rule.getCurIt() > 2 /* && treels_logl.size() > 1000*/) {
-// int predicted_iteration = ((stop_rule.getCurIt()+params->step_iterations-1)/params->step_iterations)*params->step_iterations;
-// int num_entries = floor(max_candidate_trees * ((double) stop_rule.getCurIt() / predicted_iteration));
-// if (num_entries < treels_logl.size() * 0.9) {
-// DoubleVector logl = treels_logl;
-// nth_element(logl.begin(), logl.begin() + (treels_logl.size() - num_entries), logl.end());
-// logl_cutoff = logl[treels_logl.size() - num_entries] - 1.0;
-// } else
-// logl_cutoff = 0.0;
-// if (verbose_mode >= VB_MED) {
-// if (stop_rule.getCurIt() % 10 == 0) {
-// cout << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
-// cout << endl;
-// }
-// }
-// }
-
if (estimate_nni_cutoff && nni_info.size() >= 500) {
estimate_nni_cutoff = false;
estimateNNICutoff(params);
@@ -1918,73 +2267,51 @@ double IQTree::doTreeSearch() {
Alignment *saved_aln = aln;
- /*----------------------------------------
- * Perturb the tree
- *---------------------------------------*/
- double perturbScore = 0.0;
- int numStableBranches = aln->getNSeq() - 3 - candidateTrees.getStableSplits().size();
- // Change from floor to ceil to make sure perturbing at least 1 branch
- int numPerturb = ceil(searchinfo.curPerStrength * numStableBranches);
- bool treechanged = false;
- if (iqp_assess_quartet == IQP_BOOTSTRAP) {
- // create bootstrap sample
- Alignment* bootstrap_alignment;
- if (aln->isSuperAlignment())
- bootstrap_alignment = new SuperAlignment;
- else
- bootstrap_alignment = new Alignment;
- bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
- setAlignment(bootstrap_alignment);
- initializeAllPartialLh();
- clearAllPartialLH();
- curScore = optimizeAllBranches();
- } else {
- if (params->snni) {
-// string candidateTree = candidateTrees.getRandCandTree();
-// readTreeString(candidateTree);
- readTreeString(candidateTrees.getRandCandTree());
-// if (params->fix_stable_splits)
-// assert(containsSplits(candidateTrees.getStableSplits()));
- if (params->iqp) {
- doIQP();
- } else {
- doRandomNNIs(numPerturb);
- }
- } else {
- readTreeString(candidateTrees.getBestTrees()[0]);
- doIQP();
- }
-// perturb_tree_string = getTreeString();
- if (params->count_trees) {
- string perturb_tree_topo = getTopology();
- if (pllTreeCounter.find(perturb_tree_topo) == pllTreeCounter.end()) {
- // not found in hash_map
- pllTreeCounter[perturb_tree_topo] = 1;
- } else {
- // found in hash_map
- pllTreeCounter[perturb_tree_topo]++;
- }
- }
+ string curTree;
+ /*----------------------------------------
+ * Perturb the tree
+ *---------------------------------------*/
+ doTreePerturbation();
+
+ /*----------------------------------------
+ * Optimize tree with NNI
+ *----------------------------------------*/
+ pair<int, int> nniInfos; // <num_NNIs, num_steps>
+ nniInfos = doNNISearch();
+ curTree = getTreeString();
+ int pos = addTreeToCandidateSet(curTree, curScore, true, MPIHelper::getInstance().getProcessID());
+ if (pos != -2 && pos != -1 && (Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation))
+ candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
+
+ if (MPIHelper::getInstance().isWorker() || MPIHelper::getInstance().gotMessage())
+ syncCurrentTree();
- double oldScore = curScore;
- computeLogL();
- perturbScore = curScore;
- if (perturbScore < oldScore - 0.01)
- treechanged = true;
+/*
+#ifdef _IQTREE_MPI
+ //----------- NON-BLOCKING COMMUNICATION ---------//
+ int maxNumTrees = (MPIHelper::getInstance().getNumProcesses() - 1) * 2;
+ if (MPIHelper::getInstance().isMaster()) {
+ // master: receive tree from WORKERS
+ bool candidateset_changed = MPI_CollectTrees(false, maxNumTrees, true);
+ if (candidateset_changed) {
+ vector<string> bestTrees = candidateTrees.getBestTreeStrings(Params::getInstance().popSize);
+ vector<double> bestScores = candidateTrees.getBestScores(Params::getInstance().popSize);
+ MPIHelper::getInstance().distributeTrees(bestTrees, bestScores, TREE_TAG);
+ }
+ } else {
+ // worker: always send tree to MASTER
+ MPIHelper::getInstance().sendTree(PROC_MASTER, getTreeString(), curScore, TREE_TAG);
+ MPI_CollectTrees(false, maxNumTrees, true);
}
+#endif
+*/
- /*----------------------------------------
- * Optimize tree with NNI
- *---------------------------------------*/
- int nni_count = 0;
- int nni_steps = 0;
-
- imd_tree = doNNISearch(nni_count, nni_steps);
-
- if (nni_count == 0 && params->snni && numPerturb > 0 && treechanged) {
- assert(0 && "BUG: NNI could not improved perturbed tree");
- }
+ // TODO: cannot check yet, need to somehow return treechanged
+// if (nni_count == 0 && params->snni && numPerturb > 0 && treechanged) {
+// assert(0 && "BUG: NNI could not improved perturbed tree");
+// }
+//
if (iqp_assess_quartet == IQP_BOOTSTRAP) {
// restore alignment
delete aln;
@@ -1994,107 +2321,83 @@ double IQTree::doTreeSearch() {
}
if (isSuperTree()) {
- ((PhyloSuperTree*) this)->computeBranchLengths();
+ ((PhyloSuperTree *) this)->computeBranchLengths();
}
- /*----------------------------------------
+ /*----------------------------------------
* Print information
*---------------------------------------*/
- double realtime_remaining = stop_rule.getRemainingTime(stop_rule.getCurIt(), cur_correlation);
- cout.setf(ios::fixed, ios::floatfield);
-
- // only print every 10th iteration or high verbose mode
- if (stop_rule.getCurIt() % 10 == 0 || verbose_mode >= VB_MED) {
- cout << ((iqp_assess_quartet == IQP_BOOTSTRAP) ? "Bootstrap " : "Iteration ") << stop_rule.getCurIt() << " / LogL: ";
- if (verbose_mode >= VB_MED)
- cout << perturbScore << " -> ";
- cout << curScore;
- if (verbose_mode >= VB_MED)
- cout << " / (NNIs, Steps): (" << nni_count << "," << nni_steps << ")";
- cout << " / Time: " << convert_time(getRealTime() - params->start_real_time);
-
- if (stop_rule.getCurIt() > 10) {
- cout << " (" << convert_time(realtime_remaining) << " left)";
- }
- cout << endl;
- }
+ //printInterationInfo();
- if (params->write_intermediate_trees && save_all_trees != 2) {
- printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
- }
+// if (params->write_intermediate_trees && save_all_trees != 2) {
+// printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
+// }
- /*----------------------------------------
- * Update if better tree is found
- *---------------------------------------*/
- if (curScore > candidateTrees.getBestScore() + params->modeps) {
- if (params->snni) {
- imd_tree = optimizeModelParameters();
- getModelFactory()->saveCheckpoint();
- }
- if (!candidateTrees.treeExist(imd_tree)) {
- stop_rule.addImprovedIteration(stop_rule.getCurIt());
- cout << "BETTER TREE FOUND at iteration " << stop_rule.getCurIt() << ": " << curScore << endl;
- } else {
- cout << "UPDATE BEST LOG-LIKELIHOOD: " << curScore << endl;
- }
- printResultTree();
+ if (params->snni && verbose_mode >= VB_DEBUG) {
+ printBestScores();
}
- candidateTrees.update(imd_tree, curScore);
- if (params->snni && verbose_mode >= VB_MED) {
- printBestScores(params->popSize);
- }
-
// DTH: make pllUFBootData usable in summarizeBootstrap
- if(params->pll && params->online_bootstrap && (params->gbo_replicates > 0))
+ if (params->pll && params->online_bootstrap && (params->gbo_replicates > 0))
pllConvertUFBootData2IQTree();
// DTH: Carefully watch the -pll case here
-
- /*----------------------------------------
- * convergence criterion for ultrafast bootstrap
- *---------------------------------------*/
- if ((stop_rule.getCurIt()) % (params->step_iterations / 2) == 0 && params->stop_condition == SC_BOOTSTRAP_CORRELATION) {
- // compute split support every half step
+ /*----------------------------------------
+ * convergence criterion for ultrafast bootstrap
+ *---------------------------------------*/
+
+ // workers send bootstrap trees, TODO: blocking communication
+// if (params->stop_condition == SC_BOOTSTRAP_CORRELATION && MPIHelper::getInstance().isWorker())
+// collectBootTrees();
+
+ // MASTER receives bootstrap trees and perform stop convergence test
+ if ((stop_rule.getCurIt()) >= ufboot_count &&
+ params->stop_condition == SC_BOOTSTRAP_CORRELATION && MPIHelper::getInstance().isMaster()) {
+// collectBootTrees();
+ ufboot_count += params->step_iterations/2;
+ // compute split support every half step
SplitGraph *sg = new SplitGraph;
summarizeBootstrap(*sg);
sg->removeTrivialSplits();
sg->setCheckpoint(checkpoint);
boot_splits.push_back(sg);
-// if (params->max_candidate_trees == 0)
-// max_candidate_trees = treels_logl.size() * (stop_rule.getCurIt() + (params->step_iterations / 2)) /
-// stop_rule.getCurIt();
-// cout << "NOTE: " << treels_logl.size() << " bootstrap candidate trees evaluated (logl-cutoff: " << logl_cutoff << ")" << endl;
- cout << "Log-likelihood cutoff on original alignment: " << logl_cutoff << endl;
-
- // check convergence every full step
- if (stop_rule.getCurIt() % params->step_iterations == 0) {
- cur_correlation = computeBootstrapCorrelation();
- cout << "NOTE: Bootstrap correlation coefficient of split occurrence frequencies: " << cur_correlation << endl;
- if (!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
-// if (params->max_candidate_trees == 0) {
-// max_candidate_trees = treels_logl.size() * (stop_rule.getCurIt() + params->step_iterations) /
-// stop_rule.getCurIt();
-// }
-// cout << "INFO: UFBoot does not converge, continue " << params->step_iterations << " more iterations" << endl;
- }
- }
+ cout << "Log-likelihood cutoff on original alignment: " << logl_cutoff << endl;
+// MPIHelper::getInstance().sendMsg(LOGL_CUTOFF_TAG, convertDoubleToString(logl_cutoff));
+
+ // check convergence every full step
+ if (stop_rule.getCurIt() >= ufboot_count_check) {
+ ufboot_count_check += params->step_iterations;
+ cur_correlation = computeBootstrapCorrelation();
+ cout << "NOTE: Bootstrap correlation coefficient of split occurrence frequencies: " <<
+ cur_correlation << endl;
+ if (!stop_rule.meetCorrelation(cur_correlation)) {
+ cout << "NOTE: UFBoot does not converge, continue at least " << params->step_iterations << " more iterations" << endl;
+ }
+ }
+ if (params->gbo_replicates && params->online_bootstrap && params->print_ufboot_trees)
+ writeUFBootTrees(*params);
+
} // end of bootstrap convergence test
// print UFBoot trees every 10 iterations
- if (params->gbo_replicates && params->online_bootstrap && params->print_ufboot_trees &&
- stop_rule.getCurIt() % 10 == 0)
- writeUFBootTrees(*params);
saveCheckpoint();
checkpoint->dump();
-
- //if (params->partition_type)
- // ((PhyloSuperTreePlen*)this)->printNNIcasesNUM();
-
+
+ if (bestcandidate_changed) {
+ printBestCandidateTree();
+ bestcandidate_changed = false;
+ }
+
+ //if (params->partition_type)
+ // ((PhyloSuperTreePlen*)this)->printNNIcasesNUM();
+
}
- readTreeString(candidateTrees.getTopTrees()[0]);
+ if (optimization_looped)
+ sendStopMessage();
+
+ readTreeString(candidateTrees.getBestTreeStrings()[0]);
if (testNNI)
outNNI.close();
@@ -2106,220 +2409,370 @@ double IQTree::doTreeSearch() {
}
// DTH: pllUFBoot deallocation
- if(params->pll) {
+ if (params->pll) {
pllDestroyUFBootData();
}
+#ifdef _IQTREE_MPI
+ cout << "Total number of trees received: " << MPIHelper::getInstance().getNumTreeReceived() << endl;
+ cout << "Total number of trees sent: " << MPIHelper::getInstance().getNumTreeSent() << endl;
+ cout << "Total number of NNI searches done by myself: " << MPIHelper::getInstance().getNumNNISearch() << endl;
+ MPIHelper::getInstance().resetNumbers();
+// MPI_Finalize();
+// if (MPIHelper::getInstance().getProcessID() != MASTER) {
+// exit(0);
+// }
+#endif
+
+
return candidateTrees.getBestScore();
+
+}
+
+void IQTree::printIterationInfo(int sourceProcID) {
+ double realtime_remaining = stop_rule.getRemainingTime(stop_rule.getCurIt());
+ cout.setf(ios_base::fixed, ios_base::floatfield);
+
+ // only print every 10th iteration or high verbose mode
+ if (stop_rule.getCurIt() % 10 == 0 || verbose_mode >= VB_MED) {
+ cout << ((iqp_assess_quartet == IQP_BOOTSTRAP) ? "Bootstrap " : "Iteration ") << stop_rule.getCurIt() <<
+ " / LogL: ";
+ cout << curScore;
+ cout << " / Time: " << convert_time(getRealTime() - params->start_real_time);
+
+ if (stop_rule.getCurIt() > 20) {
+ cout << " (" << convert_time(realtime_remaining) << " left)";
+ }
+ if (MPIHelper::getInstance().getNumProcesses() > 1)
+ cout << " / Process: " << sourceProcID;
+ cout << endl;
+ }
+}
+
+//void IQTree::estimateLoglCutoffBS() {
+// if (params->avoid_duplicated_trees && max_candidate_trees > 0 && treels_logl.size() > 1000) {
+// int predicted_iteration;
+// predicted_iteration = ((stop_rule.getCurIt() + params->step_iterations - 1) / params->step_iterations)
+// * params->step_iterations;
+// int num_entries = (int) floor(max_candidate_trees * ((double) stop_rule.getCurIt() / predicted_iteration));
+// if (num_entries < treels_logl.size() * 0.9) {
+// DoubleVector logl = treels_logl;
+// nth_element(logl.begin(), logl.begin() + (treels_logl.size() - num_entries), logl.end());
+// logl_cutoff = logl[treels_logl.size() - num_entries] - 1.0;
+// } else
+// logl_cutoff = 0.0;
+// if (verbose_mode >= VB_MED) {
+// if (stop_rule.getCurIt() % 10 == 0) {
+// cout << treels.size() << " trees, " << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
+// if (params->store_candidate_trees)
+// cout << " duplicates= " << duplication_counter << " ("
+// << (int) round(100 * ((double) duplication_counter / treels_logl.size())) << "%)" << endl;
+// else
+// cout << endl;
+// }
+// }
+// }
+//}
+
+#ifdef _IQTREE_MPI
+bool IQTree::MPI_CollectTrees(bool allTrees, int maxNumTrees, bool updateStopRule) {
+ if (MPIHelper::getInstance().getNumProcesses() == 1)
+ return false;
+ TreeCollection outTrees;
+ double start = getRealTime();
+ MPIHelper::getInstance().receiveTrees(allTrees, maxNumTrees, outTrees, TREE_TAG);
+ double commTime = getRealTime() - start;
+ if (verbose_mode >= VB_MED && outTrees.getNumTrees()> 0) {
+ cout << outTrees.getNumTrees() << " trees received from other processes in ";
+ cout << commTime << " seconds" << endl;
+ }
+ if (commTime > 1.0) {
+ cout << "WARNING: Communication time (" << commTime << " sec) is too slow. Please increase MP_BUFFER_MEM and MP_EAGER_LIMIT" << endl;
+ }
+
+// PhyloTree phyloTree;
+// phyloTree.aln = this->aln;
+// phyloTree.setParams(&(Params::getInstance()));
+
+ bool candidateset_changed = false;
+
+ for (int i = 0; i < outTrees.getNumTrees(); i++) {
+ pair<string, double> tree = outTrees.getTree(i);
+ if (tree.first == "notree") {
+ if (updateStopRule) {
+ stop_rule.setCurIt(stop_rule.getCurIt() + 1);
+ curScore = tree.second;
+ cout << "Bad tree with score: " << tree.second << " skipped" << endl;
+ printIterationInfo(outTrees.getSourceProcID()[i]);
+ }
+ } else {
+// phyloTree.readTreeString(tree.first, true);
+// string treeString = phyloTree.getTreeString();
+ int pos = addTreeToCandidateSet(tree.first, tree.second, updateStopRule, outTrees.getSourceProcID()[i]);
+ if (pos >= 0 && pos < params->popSize)
+ candidateset_changed = true;
+ }
+ }
+ return candidateset_changed;
+}
+#endif
+
+double IQTree::doTreePerturbation() {
+ if (iqp_assess_quartet == IQP_BOOTSTRAP) {
+ // create bootstrap sample
+ Alignment *bootstrap_alignment;
+ if (aln->isSuperAlignment())
+ bootstrap_alignment = new SuperAlignment;
+ else
+ bootstrap_alignment = new Alignment;
+ bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
+ setAlignment(bootstrap_alignment);
+ initializeAllPartialLh();
+ clearAllPartialLH();
+ curScore = optimizeAllBranches();
+ } else {
+ if (params->snni) {
+ if (Params::getInstance().five_plus_five) {
+ readTreeString(candidateTrees.getNextCandTree());
+ } else {
+ readTreeString(candidateTrees.getRandTopTree(Params::getInstance().popSize));
+ }
+ if (Params::getInstance().iqp) {
+ doIQP();
+ } else if (Params::getInstance().adaptPertubation) {
+ perturbStableSplits(Params::getInstance().stableSplitThreshold);
+ } else {
+ doRandomNNIs(Params::getInstance().tabu);
+ }
+ } else {
+ // Using the IQPNNI algorithm (best tree is selected)
+ readTreeString(getBestTrees()[0]);
+ doIQP();
+ }
+ if (params->count_trees) {
+ string perturb_tree_topo = getTopologyString(false);
+ if (pllTreeCounter.find(perturb_tree_topo) == pllTreeCounter.end()) {
+ // not found in hash_map
+ pllTreeCounter[perturb_tree_topo] = 1;
+ } else {
+ // found in hash_map
+ pllTreeCounter[perturb_tree_topo]++;
+ }
+ }
+ //optimizeBranches(1);
+ curScore = computeLogL();
+ }
+ return curScore;
}
/****************************************************************************
Fast Nearest Neighbor Interchange by maximum likelihood
****************************************************************************/
-string IQTree::doNNISearch(int& nniCount, int& nniSteps) {
- string treeString;
+pair<int, int> IQTree::doNNISearch() {
+
+ computeLogL();
+ double curBestScore = getBestScore();
+
+ if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
+ printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
+ }
+
+ pair<int, int> nniInfos; // Number of NNIs and number of steps
if (params->pll) {
if (params->partition_file)
outError("Unsupported -pll -sp combination!");
- curScore = pllOptimizeNNI(nniCount, nniSteps, searchinfo);
+ curScore = pllOptimizeNNI(nniInfos.first, nniInfos.second, searchinfo);
pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE,
PLL_TRUE, 0, 0, 0, PLL_SUMMARIZE_LH, 0, 0);
- treeString = string(pllInst->tree_string);
-// readTreeString(treeString);
+ readTreeString(string(pllInst->tree_string));
} else {
- curScore = optimizeNNI(nniCount, nniSteps);
+ nniInfos = optimizeNNI(Params::getInstance().speednni);
if (isSuperTree()) {
((PhyloSuperTree*) this)->computeBranchLengths();
}
- treeString = getTreeString();
if (params->print_trees_site_posterior)
computePatternCategories();
}
- return treeString;
+ // Better tree or score is found
+ if (getCurScore() > curBestScore + params->modelEps) {
+ // Re-optimize model parameters (the sNNI algorithm)
+ optimizeModelParameters(false, params->modelEps * 10);
+ getModelFactory()->saveCheckpoint();
+ }
+ MPIHelper::getInstance().setNumNNISearch(MPIHelper::getInstance().getNumNNISearch() + 1);
+
+ return nniInfos;
}
-double IQTree::optimizeNNI(int &nni_count, int &nni_steps) {
- bool rollBack = false;
- nni_count = 0;
- int numNNIs = 0; // number of NNI to be applied in each step
- const int MAXSTEPS = aln->getNSeq(); // maximum number of NNI steps
- NodeVector nodes1, nodes2;
- DoubleVector lenvec;
- for (nni_steps = 1; nni_steps <= MAXSTEPS; nni_steps++) {
+pair<int, int> IQTree::optimizeNNI(bool speedNNI) {
+ unsigned int totalNNIApplied = 0;
+ unsigned int numSteps = 0;
+ const int MAXSTEPS = leafNum;
+ unsigned int numInnerBranches = leafNum - 3;
+ double curBestScore = candidateTrees.getBestScore();
+
+ Branches nniBranches;
+ Branches nonNNIBranches;
+ vector<NNIMove> positiveNNIs;
+ vector<NNIMove> appliedNNIs;
+ SplitIntMap tabuSplits;
+ if (!initTabuSplits.empty()) {
+ tabuSplits = initTabuSplits;
+ }
+
+ for (numSteps = 1; numSteps <= MAXSTEPS; numSteps++) {
+
+// cout << "numSteps = " << numSteps << endl;
double oldScore = curScore;
- if (!rollBack) { // tree get improved and was not rollbacked
- if (save_all_trees == 2) {
- saveCurrentTree(curScore); // BQM: for new bootstrap
- }
- if (verbose_mode >= VB_DEBUG) {
- cout << "Doing NNI round " << nni_steps << endl;
- if (isSuperTree()) {
- ((PhyloSuperTree*) this)->printMapInfo();
- }
+ if (save_all_trees == 2) {
+ saveCurrentTree(curScore); // BQM: for new bootstrap
+ }
+ if (verbose_mode >= VB_DEBUG) {
+ cout << "Doing NNI round " << numSteps << endl;
+ if (isSuperTree()) {
+ ((PhyloSuperTree*) this)->printMapInfo();
}
+ }
- nonConfNNIs.clear(); // Vector containing non-conflicting positive NNIs
-// optBrans.clear(); // Vector containing branch length of the positive NNIs
-// orgBrans.clear(); // Vector containing all current branch of the tree
- plusNNIs.clear(); // Vector containing all positive NNIs
-// saveBranches(); // save all current branch lengths
- saveBranchLengths(lenvec);
- initPartitionInfo(); // for super tree
- int numRemoved;
- if (nodes1.size() == 0) {
- assert (nodes2.size() == 0);
- getAllInnerBranches(nodes1, nodes2, &candidateTrees.getStableSplits());
- assert(nodes1.size() == (aln->getNSeq() - 3 - candidateTrees.getStableSplits().size()));
- } else {
- // exclude stable splits from NNI evaluation
- numRemoved = removeBranches(nodes1, nodes2, candidateTrees.getStableSplits());
- }
-// cout << "Number of splits removed: " << numRemoved << endl;
- assert(nodes1.size() == nodes2.size());
-// for (int i = 0; i < nodes1.size(); i++) {
-// cout << "(" << nodes1[i]->id << "," << nodes2[i]->id << ") ; ";
-// }
-// cout << endl;
-// printTree(cout, WT_TAXON_ID + WT_INT_NODE + WT_NEWLINE);
- evalNNIs(nodes1, nodes2);
-
-// if (!nni_sort) {
-// evalNNIs(); // generate all positive NNI moves
-// } else {
-// evalNNIsSort(params->approximate_nni);
-// }
+ // save all current branch lengths
+ DoubleVector lenvec;
+ saveBranchLengths(lenvec);
- /* sort all positive NNI moves (descending) */
- sort(plusNNIs.begin(), plusNNIs.end());
- if (verbose_mode >= VB_DEBUG) {
- cout << "curScore: " << curScore << endl;
- for (int i = 0; i < plusNNIs.size(); i++) {
- cout << "Logl of positive NNI " << i << " : " << plusNNIs[i].newloglh << endl;
- }
- }
+ // for super tree
+ initPartitionInfo();
- if (plusNNIs.size() == 0) {
- break;
- }
+ nniBranches.clear();
+ nonNNIBranches.clear();
+
+ bool startSpeedNNI;
+ // When tabu and speednni are combined, speednni is only start from third steps
+ if (!initTabuSplits.empty() && numSteps < 3) {
+ startSpeedNNI = false;
+ } else if (speedNNI && !appliedNNIs.empty()) {
+ startSpeedNNI = true;
+ } else {
+ startSpeedNNI = false;
+ }
+
+ if (startSpeedNNI) {
+ // speedNNI option: only evaluate NNIs that are 2 branches away from the previously applied NNI
+ Branches filteredNNIBranches;
+ filterNNIBranches(appliedNNIs, filteredNNIBranches);
+ for (Branches::iterator it = filteredNNIBranches.begin(); it != filteredNNIBranches.end(); it++) {
+ Branch curBranch = it->second;
+ PhyloNeighbor* nei = (PhyloNeighbor*) curBranch.first->findNeighbor(curBranch.second);
+ Split* curSplit = nei->split;
+ bool tabu = false;
+ bool stable = false;
+ if (!tabuSplits.empty()) {
+ int value;
+ if (tabuSplits.findSplit(curSplit, value) != NULL)
+ tabu = true;
+ }
+ if (!candidateTrees.getCandSplits().empty()) {
+ int value;
+ if (candidateTrees.getCandSplits().findSplit(curSplit, value) != NULL)
+ stable = true;
- /* remove conflicting NNIs */
- genNonconfNNIs();
- numNNIs = nonConfNNIs.size();
- if (verbose_mode >= VB_DEBUG) {
- for (int i = 0; i < nonConfNNIs.size(); i++) {
- cout << "Log-likelihood of non-conflicting NNI " << i << " : " << nonConfNNIs[i].newloglh << endl;
+ }
+ if (!tabu && !stable) {
+ int branchID = pairInteger(curBranch.first->id, curBranch.second->id);
+ nniBranches.insert(pair<int, Branch>(branchID, curBranch));
}
}
+ } else {
+ getNNIBranches(tabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
}
- // Apply all non-conflicting positive NNIs
- doNNIs(numNNIs);
- if (verbose_mode >= VB_DEBUG) {
- cout << "NNI step: " << nni_steps << " / Number of NNIs applied: " << numNNIs << endl;
+ if (!tabuSplits.empty()) {
+ tabuSplits.clear();
}
- nodes1.clear();
- nodes2.clear();
- if (searchinfo.speednni) {
- getBranchesForNNI(nodes1, nodes2, appliedNNIs);
- appliedNNIs.clear();
+ positiveNNIs.clear();
+ evaluateNNIs(nniBranches, positiveNNIs);
+
+ if (positiveNNIs.size() == 0) {
+ if (!nonNNIBranches.empty() && totalNNIApplied == 0) {
+ evaluateNNIs(nonNNIBranches, positiveNNIs);
+ if (positiveNNIs.size() == 0) {
+ break;
+ }
+ } else {
+ break;
+ }
}
- // FOR TUNG: If you want to introduce this heuristic, please confirm with reevaluation again.
-// if (numNNIs > 1) {
- // Re-estimate branch lengths of the new tree
- curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
-// } else {
-// curScore = computeLikelihood();
-// }
+ /* sort all positive NNI moves (ASCENDING) */
+ sort(positiveNNIs.begin(), positiveNNIs.end());
+ /* remove conflicting NNIs */
+ appliedNNIs.clear();
+ getCompatibleNNIs(positiveNNIs, appliedNNIs);
- // curScore should be larger than score of the best NNI
- if (curScore >= nonConfNNIs.at(0).newloglh - params->loglh_epsilon) {
- nni_count += numNNIs;
- rollBack = false;
- if (params->reduction) {
- string newickToplogy = getTopology();
- string newickString = getTreeString();
- if (candidateTrees.treeTopologyExist(newickToplogy)) {
- double oldScore = candidateTrees.getTopologyScore(newickToplogy);
- if (curScore > oldScore)
- candidateTrees.update(newickString, curScore, false);
- break;
- } else {
- candidateTrees.update(newickString, curScore, false);
- }
- }
- } else {
- /* tree cannot be worse if only 1 NNI is applied */
- if (numNNIs == 1 && curScore < nonConfNNIs.at(0).newloglh - 1.0) {
- cout.precision(15);
- cout << "BUG: current logl=" << curScore << " < " << nonConfNNIs.at(0).newloglh
- << "(best NNI)" << endl;
- assert(0);
- }
- if (verbose_mode >= VB_MED) {
- cout << "New score = " << curScore << " after applying " << numNNIs <<
- " is worse than score = " << nonConfNNIs.at(0).newloglh
- << " of the best NNI. Roll back tree ..." << endl;
- }
+ // do non-conflicting positive NNIs
+ doNNIs(appliedNNIs);
+ curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
- // restore the tree by reverting all NNIs
- for (int i = 0; i < numNNIs; i++)
- doNNI(nonConfNNIs.at(i));
- // restore the branch lengths
-// restoreAllBrans();
+ if (curScore < appliedNNIs.at(0).newloglh - params->loglh_epsilon) {
+ //cout << "Tree getting worse: curScore = " << curScore << " / best score = " << appliedNNIs.at(0).newloglh << endl;
+ // tree cannot be worse if only 1 NNI is applied
+ assert(appliedNNIs.size() != 1);
+ doNNIs(appliedNNIs);
restoreBranchLengths(lenvec);
- // This is important because after restoring the branch lengths, all partial
- // likelihood need to be cleared.
-// if (params->lh_mem_save == LM_PER_NODE) {
-// initializeAllPartialLh();
-// } else
clearAllPartialLH();
-
- // UPDATE: the following is not needed as clearAllPartialLH() is now also defined for SuperTree
- // BQM: This was missing: one should also clear all subtrees of a supertree
-// if (isSuperTree()) {
-// PhyloSuperTree *stree = (PhyloSuperTree*)this;
-// for (PhyloSuperTree::iterator it = stree->begin(); it != stree->end(); it++) {
-// (*it)->clearAllPartialLH();
-// }
-// }
- rollBack = true;
- // only apply the best NNI
- numNNIs = 1;
- curScore = oldScore;
+ // only do the best NNI
+ appliedNNIs.resize(1);
+ doNNIs(appliedNNIs);
+// doNNI(appliedNNIs[0]);
+ totalNNIApplied++;
+ curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
+ assert(curScore > appliedNNIs.at(0).newloglh - params->loglh_epsilon);
+ } else {
+ totalNNIApplied += appliedNNIs.size();
}
- // BUG in following line, causing premature break by rollBack! that's why commented out
-// if (curScore - oldScore < 0.1)
-// break;
- }
- if (nni_count == 0 && verbose_mode >= VB_MED) {
- cout << "NOTE: Tree is already NNI-optimized" << endl;
+
+ if (curScore - oldScore < params->loglh_epsilon)
+ break;
+
+ if (params->snni && (curScore > curBestScore + 0.1)) {
+ curBestScore = curScore;
+ }
+
+ if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
+ printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
+ }
+
+ if (Params::getInstance().writeDistImdTrees) {
+ intermediateTrees.update(getTreeString(), curScore);
+ }
}
- if (nni_steps == MAXSTEPS) {
- cout << "WARNING: NNI search needs unusual large number of steps (" << MAXSTEPS << ") to converge!" << endl;
+
+ if (totalNNIApplied == 0 && verbose_mode >= VB_MED) {
+ cout << "NOTE: Input tree is already NNI-optimal" << endl;
}
- return curScore;
-}
-void IQTree::getBranchesForNNI(NodeVector& nodes1, NodeVector& nodes2, vector<NNIMove>& nnis) {
- assert(nodes1.size() == nodes2.size());
- for (vector<NNIMove>::iterator it = nnis.begin(); it != nnis.end(); it++) {
- if (!branchExist((*it).node1, (*it).node2, nodes1, nodes2)) {
- assert(isInnerBranch((*it).node1, (*it).node2));
- nodes1.push_back((*it).node1);
- nodes2.push_back((*it).node2);
+ if (numSteps == MAXSTEPS) {
+ cout << "WARNING: NNI search needs unusual large number of steps (" << numInnerBranches << ") to converge!" << endl;
}
- getInnerBranches(nodes1, nodes2, 2, (*it).node1, (*it).node2);
- getInnerBranches(nodes1, nodes2, 2, (*it).node2, (*it).node1);
+ return make_pair(numSteps, totalNNIApplied);
}
+void IQTree::filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &nniBranches) {
+ for (vector<NNIMove>::iterator it = appliedNNIs.begin(); it != appliedNNIs.end(); it++) {
+ Branch curBranch;
+ curBranch.first = it->node1;
+ curBranch.second = it->node2;
+ int branchID = pairInteger(it->node1->id, it->node2->id);
+ if (nniBranches.find(branchID) == nniBranches.end())
+ nniBranches.insert(pair<int,Branch>(branchID, curBranch));
+ getSurroundingInnerBranches(it->node1, it->node2, 2, nniBranches);
+ getSurroundingInnerBranches(it->node2, it->node1, 2, nniBranches);
+ }
}
double IQTree::pllOptimizeNNI(int &totalNNICount, int &nniSteps, SearchInfo &searchinfo) {
- if((globalParam->online_bootstrap == PLL_TRUE) && (globalParam->gbo_replicates > 0)) {
+ if((globalParams->online_bootstrap == PLL_TRUE) && (globalParams->gbo_replicates > 0)) {
pllInitUFBootData();
}
searchinfo.numAppliedNNIs = 0;
@@ -2467,34 +2920,35 @@ void IQTree::pllDestroyUFBootData(){
}
-void IQTree::doNNIs(int nni2apply, bool changeBran) {
- for (int i = 0; i < nni2apply; i++) {
- doNNI(nonConfNNIs.at(i));
- appliedNNIs.push_back(nonConfNNIs.at(i));
+void IQTree::doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran) {
+ for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
+ doNNI(*it);
if (!params->leastSquareNNI && changeBran) {
// apply new branch lengths
- changeNNIBrans(nonConfNNIs.at(i));
+ changeNNIBrans(*it);
}
}
// 2015-10-14: has to reset this pointer when read in
current_it = current_it_back = NULL;
-
+
}
-void IQTree::genNonconfNNIs() {
- for (vector<NNIMove>::iterator iterMove = plusNNIs.begin(); iterMove != plusNNIs.end(); iterMove++) {
- bool choosen = true;
- for (vector<NNIMove>::iterator iterNextMove = nonConfNNIs.begin(); iterNextMove != nonConfNNIs.end();
- iterNextMove++) {
- if ((*iterMove).node1 == (*(iterNextMove)).node1 || (*iterMove).node2 == (*(iterNextMove)).node1
- || (*iterMove).node1 == (*(iterNextMove)).node2 || (*iterMove).node2 == (*(iterNextMove)).node2) {
- choosen = false;
+void IQTree::getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs) {
+ compatibleNNIs.clear();
+ for (vector<NNIMove>::iterator it1 = nniMoves.begin(); it1 != nniMoves.end(); it1++) {
+ bool select = true;
+ for (vector<NNIMove>::iterator it2 = compatibleNNIs.begin(); it2 != compatibleNNIs.end(); it2++) {
+ if ((*it1).node1 == (*(it2)).node1
+ || (*it1).node2 == (*(it2)).node1
+ || (*it1).node1 == (*(it2)).node2
+ || (*it1).node2 == (*(it2)).node2) {
+ select = false;
break;
}
}
- if (choosen) {
- nonConfNNIs.push_back(*iterMove);
+ if (select) {
+ compatibleNNIs.push_back(*it1);
}
}
}
@@ -2559,39 +3013,35 @@ void IQTree::setDelete(int _delete) {
k_delete = _delete;
}
-void IQTree::evalNNIs(PhyloNode *node, PhyloNode *dad) {
- if (!node) {
- node = (PhyloNode*) root;
- }
- // internal branch
- if (!node->isLeaf() && dad && !dad->isLeaf()) {
- NNIMove myMove = getBestNNIForBran(node, dad, NULL);
- if (myMove.newloglh > curScore + params->loglh_epsilon) {
- addPositiveNNIMove(myMove);
+void IQTree::evaluateNNIs(Branches &nniBranches, vector<NNIMove> &positiveNNIs) {
+ for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
+ NNIMove nni = getBestNNIForBran((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, NULL);
+ if (nni.newloglh > curScore) {
+ positiveNNIs.push_back(nni);
}
- }
- FOR_NEIGHBOR_IT(node, dad, it){
- evalNNIs((PhyloNode*) (*it)->node, node);
+ // synchronize tree during optimization step
+ if (MPIHelper::getInstance().isMaster() && candidateset_changed.size() > 0
+ && MPIHelper::getInstance().gotMessage()) {
+ syncCurrentTree();
+ }
}
}
-void IQTree::evalNNIs(NodeVector& nodes1, NodeVector& nodes2) {
- if (!nodes1.empty()) {
- assert(!nodes2.empty());
- assert(nodes1.size() == nodes2.size());
- NodeVector::iterator it1;
- NodeVector::iterator it2;
- for (it1 = nodes1.begin(), it2 = nodes2.begin(); it1 != nodes1.end() && it2 != nodes2.end(); it1++, it2++) {
- assert(isInnerBranch(*it1, *it2));
- NNIMove myMove = getBestNNIForBran((PhyloNode*) *it1, (PhyloNode*) *it2, NULL);
- if (myMove.newloglh > curScore + params->loglh_epsilon) {
- addPositiveNNIMove(myMove);
- }
- }
- } else {
- evalNNIs();
+//Branches IQTree::getReducedListOfNNIBranches(Branches &previousNNIBranches) {
+// Branches resBranches;
+// for (Branches::iterator it = previousNNIBranches.begin(); it != previousNNIBranches.end(); it++) {
+// getSurroundingInnerBranches(it->second.first, it->second.second, 2, resBranches);
+// getSurroundingInnerBranches(it->second.second, it->second.first, 2, resBranches);
+// }
+//}
+
+double IQTree::optimizeNNIBranches(Branches &nniBranches) {
+ for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
+ optimizeOneBranch((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, true, PLL_NEWZPERCYCLE);
}
+ curScore = computeLikelihoodFromBuffer();
+ return curScore;
}
/**
@@ -2697,7 +3147,7 @@ void IQTree::saveCurrentTree(double cur_logl) {
// treels_logl.push_back(cur_logl);
// num_trees_for_rell++;
- if (write_intermediate_trees)
+ if (Params::getInstance().write_intermediate_trees)
printTree(out_treels, WT_NEWLINE | WT_BR_LEN);
int nptn = getAlnNPattern();
@@ -2724,7 +3174,7 @@ void IQTree::saveCurrentTree(double cur_logl) {
// online bootstrap
// int ptn;
// int updated = 0;
- int nsamples = boot_samples.size();
+// int nsamples = boot_samples.size();
ostringstream ostr;
string tree_str, tree_str_brlen;
setRootNode(params->root);
@@ -2740,7 +3190,7 @@ void IQTree::saveCurrentTree(double cur_logl) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
- for (int sample = 0; sample < nsamples; sample++) {
+ for (int sample = sample_start; sample < sample_end; sample++) {
double rell = 0.0;
{
@@ -2771,7 +3221,7 @@ void IQTree::saveCurrentTree(double cur_logl) {
}
}
}
- if (print_tree_lh) {
+ if (Params::getInstance().print_tree_lh) {
out_treelh << cur_logl;
double prob;
#ifdef BOOT_VAL_FLOAT
@@ -2887,7 +3337,7 @@ void IQTree::summarizeBootstrap(Params ¶ms, MTreeSet &trees) {
freeNode();
// RARE BUG FIX: to avoid cases that identical seqs were removed and leaf name happens to be IDs
MTree::readTree(tree_stream, rooted);
-
+
assignLeafNames();
if (isSuperTree()) {
((PhyloSuperTree*) this)->mapTrees();
@@ -3073,9 +3523,9 @@ double IQTree::computeBootstrapCorrelation() {
return corr;
}
-void IQTree::addPositiveNNIMove(NNIMove myMove) {
- plusNNIs.push_back(myMove);
-}
+//void IQTree::addPositiveNNIMove(NNIMove myMove) {
+// plusNNIs.push_back(myMove);
+//}
void IQTree::printResultTree(string suffix) {
setRootNode(params->root);
@@ -3083,13 +3533,18 @@ void IQTree::printResultTree(string suffix) {
return;
string tree_file_name = params->out_prefix;
tree_file_name += ".treefile";
+ if (MPIHelper::getInstance().isWorker()) {
+ return;
+ stringstream processTreeFile;
+ processTreeFile << tree_file_name << "." << MPIHelper::getInstance().getProcessID();
+ tree_file_name = processTreeFile.str();
+ }
if (suffix.compare("") != 0) {
- string iter_tree_name = tree_file_name + "." + suffix;
- printTree(iter_tree_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
- } else {
- printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
+ tree_file_name += "." + suffix;
}
- //printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH);
+ printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
+ if (verbose_mode >= VB_MED)
+ cout << "Best tree printed to " << tree_file_name << endl;
}
void IQTree::printResultTree(ostream &out) {
@@ -3097,6 +3552,18 @@ void IQTree::printResultTree(ostream &out) {
printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
}
+void IQTree::printBestCandidateTree() {
+ if (MPIHelper::getInstance().isWorker())
+ return;
+ string tree_file_name = params->out_prefix;
+ tree_file_name += ".treefile";
+ readTreeString(candidateTrees.getBestTreeStrings(1)[0]);
+ setRootNode(params->root);
+ printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
+ if (verbose_mode >= VB_MED)
+ cout << "Best tree printed to " << tree_file_name << endl;
+}
+
void IQTree::printPhylolibTree(const char* suffix) {
pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE, 1, 0, 0, 0,
@@ -3119,8 +3586,9 @@ void IQTree::printIntermediateTree(int brtype) {
computePatternLikelihood(pattern_lh, &logl);
}
- if (write_intermediate_trees)
+ if (Params::getInstance().write_intermediate_trees)
printTree(out_treels, brtype);
+
if (params->print_tree_lh) {
out_treelh.precision(10);
out_treelh << logl;
@@ -3140,8 +3608,286 @@ void IQTree::printIntermediateTree(int brtype) {
}
int x = save_all_trees;
save_all_trees = 2;
- evalNNIs();
+ // TODO Why is evalNNI() is called in this function?
+ //evalNNIs();
+ Branches innerBranches;
+ vector<NNIMove> positiveNNIs;
+ getInnerBranches(innerBranches);
+ evaluateNNIs(innerBranches, positiveNNIs);
save_all_trees = x;
}
+void IQTree::convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs) {
+ for (int i = 0; i < numNNIs; i++) {
+ Split *sp = new Split(*getSplit(compatibleNNIs[i].node1, compatibleNNIs[i].node2));
+ if (sp->shouldInvert()) {
+ sp->invert();
+ }
+ nniSplits.insertSplit(sp, 1);
+ }
+}
+
+double IQTree::getBestScore() {
+ return candidateTrees.getBestScore();
+}
+
+vector<string> IQTree::getBestTrees(int numTrees) {
+ return candidateTrees.getBestTreeStrings(numTrees);
+}
+
+
+/*******************************************
+ MPI stuffs
+*******************************************/
+
+void IQTree::syncCandidateTrees(int nTrees, bool updateStopRule) {
+ if (MPIHelper::getInstance().getNumProcesses() == 1)
+ return;
+
+#ifdef _IQTREE_MPI
+ // gather trees to Master
+
+ Checkpoint *ckp = new Checkpoint;
+
+ if (MPIHelper::getInstance().isMaster()) {
+ // update candidate set at master
+ int trees = 0;
+ for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
+ int worker = MPIHelper::getInstance().recvCheckpoint(ckp);
+ CandidateSet cset;
+ cset.setCheckpoint(ckp);
+ cset.restoreCheckpoint();
+ for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
+ addTreeToCandidateSet(it->second.tree, it->second.score, updateStopRule, worker);
+ trees += ckp->size();
+ ckp->clear();
+ }
+ cout << trees << " candidate trees gathered from workers" << endl;
+ // get the best candidate trees
+ int numTrees = max(nTrees, MPIHelper::getInstance().getNumProcesses());
+ CandidateSet bestCandidates = candidateTrees.getBestCandidateTrees(numTrees);
+ int saved_numNNITrees = params->numNNITrees;
+ params->numNNITrees = numTrees;
+ bestCandidates.setCheckpoint(ckp);
+ bestCandidates.saveCheckpoint();
+ params->numNNITrees = saved_numNNITrees;
+ } else {
+ // send candidate set to master
+ CandidateSet cset = candidateTrees.getBestCandidateTrees();
+ cset.setCheckpoint(ckp);
+ cset.saveCheckpoint();
+ MPIHelper::getInstance().sendCheckpoint(ckp, PROC_MASTER);
+ cout << ckp->size() << " candidate trees sent to master" << endl;
+ ckp->clear();
+ }
+
+ // broadcast candidate trees from master to worker
+ MPIHelper::getInstance().broadcastCheckpoint(ckp);
+ cout << ckp->size() << " trees broadcasted to workers" << endl;
+
+ if (MPIHelper::getInstance().isWorker()) {
+ // update candidate set at worker
+ CandidateSet cset;
+ cset.setCheckpoint(ckp);
+ cset.restoreCheckpoint();
+ for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
+ addTreeToCandidateSet(it->second.tree, it->second.score, false, PROC_MASTER);
+ }
+
+ delete ckp;
+#endif
+}
+
+void IQTree::syncCurrentTree() {
+ if (MPIHelper::getInstance().getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+ //------ BLOCKING COMMUNICATION ------//
+ Checkpoint *checkpoint = new Checkpoint;
+ string tree;
+ double score;
+
+ if (MPIHelper::getInstance().isMaster()) {
+ // master: receive tree from WORKERS
+ int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
+ MPIHelper::getInstance().increaseTreeReceived();
+ CKP_RESTORE(tree);
+ CKP_RESTORE(score);
+ int pos = addTreeToCandidateSet(tree, score, true, worker);
+ if (pos >= 0 && pos < params->popSize) {
+ // candidate set is changed, update for other workers
+ for (int w = 0; w < candidateset_changed.size(); w++)
+ if (w != worker)
+ candidateset_changed[w] = true;
+ }
+
+ if (boot_samples.size() > 0) {
+ restoreUFBoot(checkpoint);
+ }
+
+ // send candidate trees to worker
+ checkpoint->clear();
+ if (boot_samples.size() > 0)
+ CKP_SAVE(logl_cutoff);
+ if (candidateset_changed[worker]) {
+ CandidateSet cset = candidateTrees.getBestCandidateTrees(Params::getInstance().popSize);
+ cset.setCheckpoint(checkpoint);
+ cset.saveCheckpoint();
+ candidateset_changed[worker] = false;
+ MPIHelper::getInstance().increaseTreeSent(Params::getInstance().popSize);
+ }
+ MPIHelper::getInstance().sendCheckpoint(checkpoint, worker);
+ } else {
+ // worker: always send tree to MASTER
+ tree = getTreeString();
+ score = curScore;
+ CKP_SAVE(tree);
+ CKP_SAVE(score);
+ if (boot_samples.size() > 0) {
+ saveUFBoot(checkpoint);
+ }
+ MPIHelper::getInstance().sendCheckpoint(checkpoint, PROC_MASTER);
+ MPIHelper::getInstance().increaseTreeSent();
+
+ // now receive the candidate set
+ MPIHelper::getInstance().recvCheckpoint(checkpoint, PROC_MASTER);
+ if (checkpoint->getBool("stop")) {
+ cout << "Worker gets STOP message!" << endl;
+ stop_rule.shouldStop();
+ } else {
+ CandidateSet cset;
+ cset.setCheckpoint(checkpoint);
+ cset.restoreCheckpoint();
+ for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
+ addTreeToCandidateSet(it->second.tree, it->second.score, false, MPIHelper::getInstance().getProcessID());
+ MPIHelper::getInstance().increaseTreeReceived(cset.size());
+ if (boot_samples.size() > 0)
+ CKP_RESTORE(logl_cutoff);
+ }
+ }
+
+ delete checkpoint;
+
+#endif
+}
+
+void IQTree::sendStopMessage() {
+ if (MPIHelper::getInstance().getNumProcesses() == 1)
+ return;
+#ifdef _IQTREE_MPI
+
+ Checkpoint *checkpoint = new Checkpoint;
+ checkpoint->putBool("stop", true);
+ stringstream ss;
+ checkpoint->dump(ss);
+ string str = ss.str();
+ string tree;
+ double score;
+
+ cout << "Sending STOP message to workers" << endl;
+
+ // send STOP message to all processes
+ if (MPIHelper::getInstance().isMaster()) {
+ // repeatedly send stop message to all workers
+ for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
+// string buf;
+// int worker = MPIHelper::getInstance().recvString(buf);
+ checkpoint->clear();
+ int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
+ MPIHelper::getInstance().increaseTreeReceived();
+ CKP_RESTORE(tree);
+ CKP_RESTORE(score);
+ addTreeToCandidateSet(tree, score, true, worker);
+ MPIHelper::getInstance().sendString(str, worker, TREE_TAG);
+ }
+ }
+
+ delete checkpoint;
+
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+}
+
+
+int PhyloTree::testNumThreads() {
+#ifndef _OPENMP
+ return 1;
+#else
+ int max_procs = countPhysicalCPUCores();
+ cout << "Measuring multi-threading efficiency up to " << max_procs << " CPU cores" << endl;
+ DoubleVector runTimes;
+ int bestProc = 0;
+ double saved_curScore = curScore;
+ int num_iter = 1;
+
+ // generate different trees
+ int tree;
+ double min_time = max_procs; // minimum time in seconds
+ StrVector trees;
+ trees.push_back(getTreeString());
+
+ for (int proc = 1; proc <= max_procs; proc++) {
+
+ omp_set_num_threads(proc);
+ setLikelihoodKernel(sse, proc);
+ initializeAllPartialLh();
+
+ double beginTime = getRealTime();
+ double runTime, logl;
+
+ for (tree = 0; tree < trees.size(); tree++) {
+ readTreeString(trees[tree]);
+ logl = optimizeAllBranches(num_iter);
+ runTime = getRealTime() - beginTime;
+
+ // too fast, increase number of iterations
+ if (runTime*10 < min_time && proc == 1 && tree == 0) {
+ int new_num_iter = 10;
+ cout << "Increase to " << new_num_iter << " rounds for branch lengths" << endl;
+ logl = optimizeAllBranches(new_num_iter - num_iter);
+ num_iter = new_num_iter;
+ runTime = getRealTime() - beginTime;
+ }
+
+ // considering at least 2 trees
+ if ((runTime < min_time && proc == 1) || trees.size() == 1) {
+ // time not reached, add more tree
+// readTreeString(trees[0]);
+// doRandomNNIs();
+ generateRandomTree(YULE_HARDING);
+ wrapperFixNegativeBranch(true);
+ trees.push_back(getTreeString());
+ }
+ curScore = saved_curScore;
+ }
+
+ if (proc == 1)
+ cout << trees.size() << " trees examined" << endl;
+
+ deleteAllPartialLh();
+
+ runTimes.push_back(runTime);
+ double speedup = runTimes[0] / runTime;
+
+ cout << "Threads: " << proc << " / Time: " << runTime << " sec / Speedup: " << speedup
+ << " / Efficiency: " << (int)round(speedup*100/proc) << "% / LogL: " << (int)logl << endl;
+
+ // break if too bad efficiency ( < 50%) or worse than than 10% of the best run time
+ if (speedup*2 <= proc || (runTime > runTimes[bestProc]*1.1 && proc>1))
+ break;
+
+ // update best threads if sufficient
+ if (runTime <= runTimes[bestProc]*0.95)
+ bestProc = proc-1;
+
+ }
+
+ readTreeString(trees[0]);
+
+ cout << "BEST NUMBER OF THREADS: " << bestProc+1 << endl << endl;
+ setLikelihoodKernel(sse, bestProc+1);
+
+ return bestProc+1;
+#endif
+}
diff --git a/iqtree.h b/iqtree.h
index 1a34ff0..b359971 100644
--- a/iqtree.h
+++ b/iqtree.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2009 by BUI Quang Minh *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -74,9 +76,7 @@ inline int int_branch_cmp(const IntBranchInfo a, const IntBranchInfo b) {
typedef multiset<RepLeaf*, nodeheightcmp> RepresentLeafSet;
/**
-Important Quartet Puzzling
-
- @author BUI Quang Minh <minh.bui at univie.ac.at>
+ Main class for tree search
*/
class IQTree : public PhyloTree {
public:
@@ -112,6 +112,19 @@ public:
*/
virtual void restoreCheckpoint();
+ /**
+ save UFBoot_trees.
+ For MPI workers only save from sample_start to sample_end
+ @param checkpoint Checkpoint object
+ */
+ void saveUFBoot(Checkpoint *checkpoint);
+
+ /**
+
+ restore UFBoot_trees from sample_start to sample_end (MPI)
+ @param checkpoint Checkpoint object
+ */
+ void restoreUFBoot(Checkpoint *checkpoint);
/**
* setup all necessary parameters (declared as virtual needed for phylosupertree)
@@ -136,6 +149,8 @@ public:
*/
void printResultTree(ostream &out);
+ void printBestCandidateTree();
+
/**
* print phylolib tree to a file.
* @param suffix suffix string for the tree file
@@ -207,18 +222,40 @@ public:
void doIQP();
/**
- * @brief remove all branches mapped to splits in \a split
- * @param nodes1 node vector containing one end of the branches
- * @param nodes2 node vector containing the other end of the branches
- * @return number of branches removed
+ * @brief get non-tabu branches from a set of branches
+ *
+ * @param
+ * allBranches[IN] the inital branches
+ * @param
+ * initTabuSplits[IN] the tabu splits
+ * @param
+ * nonTabuBranches[OUT] non-tabu branches from \a allBranches
+ * @param[OUT]
+ * tabuBranches branches that are tabu
+ */
+ void getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches = NULL);
+
+ /**
+ * @brief remove all branches corresponding to nnis
+ * @param nodes1 node vector containing one end of the branches
+ * @param nodes2 node vector containing the other end of the branches
+ * @param nnis
+ * @return
*/
- int removeBranches(NodeVector& nodes1, NodeVector& nodes2, SplitGraph& splits);
+ int removeNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove> nnis);
/**
* Perform a series of random NNI moves
- * @param numNNI number of random NNIs
+ * @return the perturbed newick string
+ */
+ string doRandomNNIs(bool storeTabu = false);
+
+ /**
+ * Do a random NNI on splits that are shared among all the candidate trees.
+ * @return the perturbed newick string
*/
- void doRandomNNIs(int numNNI);
+ string perturbStableSplits(double supportValue);
+
/**
* input model parameters from IQ-TREE to PLL
@@ -271,6 +308,9 @@ public:
*/
double swapTaxa(PhyloNode *node1, PhyloNode *node2);
+ /** collect boostrap trees from workers to master */
+ void collectBootTrees();
+
/**
perform tree search
@return best likelihood found
@@ -288,13 +328,14 @@ public:
/**
* Wrapper function to compute tree log-likelihood.
* This function with call either PLL or IQ-TREE to compute tree log-likelihood
+ * @return current score of tree
*/
- void computeLogL();
+ double computeLogL();
/**
- * Print numBestScore found so far, starting from the highest
+ * Print scores of tree used for generating offsprings
*/
- void printBestScores(int numBestScore);
+ void printBestScores();
/****************************************************************************
Fast Nearest Neighbor Interchange by maximum likelihood
@@ -302,13 +343,79 @@ public:
/**
- This implement the fastNNI algorithm proposed in PHYML paper
- TUNG: this is a virtual function, so it will be called automatically by optimizeNNIBranches()
- @return best likelihood found
- @param skipped (OUT) 1 if current iteration is skipped, otherwise 0
- @param nni_count (OUT) the number of single NNI moves proceeded so far
+ * Optimize current tree using NNI
+ *
+ * @return
+ * <number of NNI steps, number of NNIs> done
+ */
+ pair<int, int> optimizeNNI(bool speedNNI = true);
+
+ /**
+ * Return the current best score found
+ */
+ double getBestScore();
+
+ /**
+ * @brief Generate a list of internal branches on which NNI moves will be evaluated
+ * @param
+ * nonNNIBranches [OUT] Branches on which NNI evaluation will be skipped
+ * @param
+ * tabuSplits [IN] A list of splits that are considered tabu
+ * @param
+ * candidateSplitHash [IN] Lists that appear on the best 20 candidate trees
+ * @param
+ * dad [IN] for navigation
+ * @param
+ * node[IN] for navigation
+ * @return A list of branches for evaluating NNIs
+ */
+ void getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candidateSplitHash, Branches &nonNNIBranches, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
+
+ /**
+ * Return internal branches that appear in \a candidateSplitHash
+ * and has support value >= \a supportValue.
+ * @param
+ * candidateSplitHash [IN] A set of splits with the number of occurences.
+ * @param
+ * supportValue [IN] Only consider split whose support value is higher than this number
+ * @param
+ * dad [IN] for navigation
+ * @param
+ * node[IN] for navigation
+ * @return
+ * A list of branches fufilling the aforementioned conditions.
*/
- double optimizeNNI(int &nni_count, int &nni_steps);
+ void getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
+
+
+ /**
+ *
+ * Determine whether to evaluate NNI moves on the branch corresponding to the current split
+ *
+ * @param curSplit [IN] the split that correspond to the current branch
+ * @param tabuSplits [IN] tabu splits
+ * @param candSplits [IN] splits contained in all candidate trees
+ * @param nonNNIBranches [OUT] branches that are not inserted to nniBranches are store here
+ * @param nniBranches [OUT] if the split is neither stable nor tabu it is inserted in this list
+ */
+ bool shouldEvaluate(Split* curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits);
+
+
+ /**
+ * @brief Only select NNI branches that are 2 branches away from the previously
+ * appied NNIs
+ * @param
+ * appliedNNIs List of previously applied NNIs
+ * @return
+ * List of branches to be evaluated
+ */
+ void filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &outBranches);
+
+
+ /**
+ * @brief get branches that correspond to the splits in \a nniSplits
+ */
+ void getSplitBranches(Branches &branches, SplitIntMap &splits, Node *dad = NULL, Node *node = NULL);
/**
* Do fastNNI using PLL
@@ -320,30 +427,29 @@ public:
/**
* @brief Perform NNI search on the current tree topology
+ * @return <number_of_NNIs, number_of_NNI_steps>
* This function will automatically use the selected kernel (either PLL or IQ-TREE)
- *
- * @param nniCount (OUT) number of NNIs applied
- * @param nniSteps (OUT) number of NNI steps done
- * @return the new NEWICK string
*/
- string doNNISearch(int &nniCount, int &nniSteps);
+ pair<int, int> doNNISearch();
/**
- @brief evaluate all NNIs and store them in possilbleNNIMoves list
+ @brief evaluate all NNIs
@param node evaluate all NNIs of the subtree rooted at node
@param dad a neighbor of \p node which does not belong to the subtree
being considered (used for traverse direction)
*/
- void evalNNIs(PhyloNode *node = NULL, PhyloNode *dad = NULL);
+ //void evalNNIs(PhyloNode *node = NULL, PhyloNode *dad = NULL);
/**
- * @brief Evaluate all NNIs on branch defined by \a nodes1 and \a nodes2
+ * @brief Evaluate all NNIs on branch defined by \a branches
*
- * @param[in] nodes1 contains one ends of the branches for NNI evaluation
- * @param[in] nodes2 contains the other ends of the branches for NNI evaluation
+ * @param nniBranches [IN] branches the branches on which NNIs will be evaluated
+ * @return list positive NNIs
*/
- void evalNNIs(NodeVector &nodes1, NodeVector &nodes2);
+ void evaluateNNIs(Branches &nniBranches, vector<NNIMove> &outNNIMoves);
+
+ double optimizeNNIBranches(Branches &nniBranches);
/**
search all positive NNI move on the current tree and save them
@@ -352,11 +458,11 @@ public:
void evalNNIsSort(bool approx_nni);
/**
- apply nni2apply NNIs from the non-conflicting NNI list
- @param nni2apply number of NNIs to apply from the list
+ apply NNIs from the non-conflicting NNI list
+ @param compatibleNNIs vector of all compatible NNIs
@param changeBran whether or not the computed branch lengths should be applied
*/
- virtual void doNNIs(int nni2apply, bool changeBran = true);
+ virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);
/**
* Restore the old 5 branch lengths stored in the NNI move.
@@ -365,43 +471,18 @@ public:
*/
//void restoreNNIBranches(NNIMove nnimove);
- /**
- generate non conflicting NNI moves.
- moves are saved in vec_nonconf_nni
- */
- void genNonconfNNIs();
-
- /**
- add a NNI move to the list of possible NNI moves;
- */
- void addPositiveNNIMove(NNIMove myMove);
-
- /**
- * Save all the current branch lengths
- */
-// void saveBranches(PhyloNode *node = NULL, PhyloNode *dad = NULL);
-
- /**
- * Restore the branch lengths from the saved values
- */
-// virtual void restoreAllBrans(PhyloNode *node = NULL, PhyloNode *dad = NULL);
/**
- * Get the branch length of the branch node1-node2
- * @param node1
- * @param node2
- * @return the branch length
+ * @brief get a list of compatible NNIs from a list of NNIs
+ * @param nniMoves [IN] list of NNIs
+ * @return list of compatible NNIs
*/
-// double getBranLen(PhyloNode *node1, PhyloNode *node2);
-
+ void getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs);
/**
- Described in PhyML paper: apply change to branch that does not
- correspond to a swap with the following formula l = l + lamda(la - l)
- @param node1 the first node of the branch
- @param node2 the second node of the branch
+ add a NNI move to the list of possible NNI moves;
*/
-// void changeBranLen(PhyloNode *node1, PhyloNode *node2, double branLen);
+ void addPositiveNNIMove(NNIMove &myMove);
/**
* Estimate the 95% quantile of the distribution of N (see paper for more d
@@ -482,6 +563,7 @@ public:
*/
vector<int> vecNumNNI;
+
/**
* Do memory allocation and initialize parameter for UFBoot to run with PLL
*/
@@ -530,6 +612,11 @@ public:
protected:
/**
+ * Splits corresponding to random NNIs
+ */
+ SplitIntMap initTabuSplits;
+
+ /**
criterion to assess important quartet
*/
IQP_ASSESS_QUARTET iqp_assess_quartet;
@@ -541,37 +628,11 @@ protected:
NodeVector taxaSet;
/**
- * confidence value for number of NNIs found in one iteration
- */
- int nni_count_est;
-
- /**
- * confidence value for likelihood improvement made by one NNI
- */
- double nni_delta_est;
-
-
- /**
* Vector contains approximated improvement pro NNI at each iterations
*/
vector<double> vecImpProNNI;
/**
- List of positive NNI for the current tree;
- */
- vector<NNIMove> plusNNIs;
-
- /**
- List of non-conflicting NNIs for the current tree;
- */
- vector<NNIMove> nonConfNNIs;
-
- /**
- * NNIs that have been applied in the previous step
- */
- vector<NNIMove> appliedNNIs;
-
- /**
Optimal branch lengths
*/
// mapString2Double optBrans;
@@ -582,26 +643,66 @@ protected:
* @param[out] nodes2 the other ends of the branches
* @param[in] nnis NNIs that have been previously applied
*/
- void getBranchesForNNI(NodeVector& nodes1, NodeVector& nodes2, vector<NNIMove>& nnis);
+ void generateNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove>& nnis);
+
+ int k_delete, k_delete_min, k_delete_max, k_delete_stay;
+
+ /**
+ number of representative leaves for IQP step
+ */
+ int k_represent;
+
+public:
/**
- * Use fastNNI heuristic
+ * Candidate tree set (the current best N (default N = 5)
+ * NNI-optimal trees
*/
- bool fastNNI;
+ CandidateSet candidateTrees;
/**
- Original branch lengths
+ * Set of all intermediate trees (initial trees, tree generated by NNI steps,
+ * NNI-optimal trees)
*/
-// mapString2Double orgBrans;
+ CandidateSet intermediateTrees;
- int k_delete, k_delete_min, k_delete_max, k_delete_stay;
/**
- number of representative leaves for IQP step
- */
- int k_represent;
+ * Update the candidate set with a new NNI-optimal tree. The maximum size of the candidate set
+ * is fixed to the initial setting. Thus, if the size exceed the maximum number of trees, the worse
+ * tree will be removed.
+ *
+ * @param treeString
+ * the new tree
+ * @param score
+ * the score of the new tree
+ * @param updateStopRule
+ * Whether or not to update the stop rule
+ * @return relative position of the new tree to the current best.
+ * -1 if duplicated
+ * -2 if the candidate set is not updated
+ */
+ int addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID);
+
+ /**
+ MPI: synchronize candidate trees between all processes
+ @param nTrees number of trees to broadcast
+ @param updateStopRule true to update stopping rule, false otherwise
+ */
+ void syncCandidateTrees(int nTrees, bool updateStopRule);
-public:
+ /**
+ MPI: synchronize tree of current iteration with master
+ will update candidateset_changed
+ @param curTree current tree
+
+ */
+ void syncCurrentTree();
+
+ /**
+ MPI: Master sends stop message to all workers
+ */
+ void sendStopMessage();
/**
* Generate the initial parsimony/random trees, called by initCandidateTreeSet
@@ -616,7 +717,6 @@ public:
*/
void initCandidateTreeSet(int nParTrees, int nNNITrees);
-
/**
* Generate the initial tree (usually used for model parameter estimation)
* @param dist_file only needed for BIONJ tree
@@ -637,9 +737,6 @@ public:
*/
topol* pllBestTree;
- CandidateSet candidateTrees;
-
-
/****** following variables are for ultra-fast bootstrap *******/
/** TRUE to save also branch lengths into treels_newick */
@@ -669,6 +766,12 @@ public:
/** vector of bootstrap alignments generated */
vector<BootValType* > boot_samples;
+ /** starting sample for UFBoot, used for MPI */
+ int sample_start;
+
+ /** end sample for UFBoot, used for MPI */
+ int sample_end;
+
/** newick string of corresponding bootstrap trees */
StrVector boot_trees;
@@ -715,7 +818,6 @@ protected:
*/
vector<NNIInfo> nni_info;
-
bool estimate_nni_cutoff;
double nni_cutoff;
@@ -727,11 +829,12 @@ protected:
ofstream outNNI;
protected:
- bool print_tree_lh;
+ //bool print_tree_lh;
- int write_intermediate_trees;
+ //int write_intermediate_trees;
ofstream out_treels, out_treelh, out_sitelh, out_treebetter;
+ string treels_name, out_lh_file, site_lh_file;
void estimateNNICutoff(Params* params);
@@ -741,6 +844,12 @@ protected:
int duplication_counter;
+ // MPI: vector of size = num processes, true if master should send candidate set to worker
+ BoolVector candidateset_changed;
+
+ // true if best candidate tree is changed
+ bool bestcandidate_changed;
+
/**
number of IQPNNI iterations
*/
@@ -843,9 +952,56 @@ protected:
void estDeltaMin();
-};
+ void convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs);
+
+ string generateParsimonyTree(int randomSeed);
-void estimateNNICutoff(Params ¶ms);
+#ifdef _IQTREE_MPI
+ /**
+ * Receive trees from other processes and add them to the candidate set
+ *
+ * @param allTrees
+ * If true, wait for tree from every node
+ * If false, only collect trees that have been sent
+ * @param maxNumTrees
+ * Only received up to maxNumTrees to prevent the function to block because it can constantly receive
+ * new trees
+ * @param updateStopRule
+ * To update the stop rule or not
+ */
+ bool MPI_CollectTrees(bool allTrees, int maxNumTrees, bool updateStopRule);
+#endif
+
+ double doTreePerturbation();
+
+ void estimateLoglCutoffBS();
+
+ //void estimateNNICutoff(Params ¶ms);
+
+public:
+ /**
+ * Return best tree string from the candidate set
+ *
+ * @param numTrees
+ * Number of best trees to return
+ * @return
+ * A string vector of trees
+ */
+ vector<string> getBestTrees(int numTrees = 0);
+ /**
+ * Print the iteration number and the tree score
+ */
+ void printIterationInfo(int sourceProcID);
+ /**
+ * Return branches that are 2 branches away from the branches, on which NNIs were applied
+ * in the previous NNI steps.
+ * @param
+ * previousNNIBranches[IN] a set of branches on which NNIs were performed in the previous NNI step.
+ * @return
+ * a set of branches, on which NNIs should be evaluated for the current NNI steps
+ */
+ Branches getReducedListOfNNIBranches(Branches &previousNNIBranches);
+};
#endif
diff --git a/iqtree_config.h.in b/iqtree_config.h.in
index 9d3e17c..bf2233e 100644
--- a/iqtree_config.h.in
+++ b/iqtree_config.h.in
@@ -12,3 +12,6 @@
/*#cmakedefine HAVE_PCLOSE*/
/* does the platform provide GlobalMemoryStatusEx functions? */
#cmakedefine HAVE_GLOBALMEMORYSTATUSEX
+
+/* does the platform provide backtrace functions? */
+#cmakedefine Backtrace_FOUND
diff --git a/lpwrapper.c b/lpwrapper.c
index 2b9ad4e..263d6f9 100644
--- a/lpwrapper.c
+++ b/lpwrapper.c
@@ -43,7 +43,7 @@ int lp_solve(char *filename, int ntaxa, double *score, double *variables, int ve
//write_lp(lp, name2);
if (lp == NULL) {
- printf("Could not create an LP_SOLVE instance!\n");
+ printf("Could not create an LP_SOLVE getInstance!\n");
return 1;
}
diff --git a/memslot.cpp b/memslot.cpp
new file mode 100644
index 0000000..e56b807
--- /dev/null
+++ b/memslot.cpp
@@ -0,0 +1,254 @@
+/***************************************************************************
+ * Copyright (C) 2009-2016 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#include "phylotree.h"
+#include "memslot.h"
+
+const int MEM_LOCKED = 1;
+const int MEM_SPECIAL = 2;
+
+void MemSlotVector::init(PhyloTree *tree, int num_slot) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ reserve(num_slot+2);
+ resize(num_slot);
+ size_t lh_size = tree->getPartialLhSize();
+ size_t scale_size = tree->getScaleNumSize();
+ reset();
+ for (iterator it = begin(); it != end(); it++) {
+ it->partial_lh = tree->central_partial_lh + lh_size*(it-begin());
+ it->scale_num = tree->central_scale_num + scale_size*(it-begin());
+ }
+}
+
+void MemSlotVector::reset() {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ for (iterator it = begin(); it != end(); it++) {
+ it->status = 0;
+ it->nei = NULL;
+ }
+ nei_id_map.clear();
+ free_count = 0;
+}
+
+
+MemSlotVector::iterator MemSlotVector::findNei(PhyloNeighbor *nei) {
+ auto it = nei_id_map.find(nei);
+ assert(it != nei_id_map.end());
+// assert(at(it->second).nei == nei);
+ return begin()+it->second;
+}
+
+void MemSlotVector::addNei(PhyloNeighbor *nei, iterator it) {
+// assert((it->status & MEM_SPECIAL) == 0);
+ nei->partial_lh = it->partial_lh;
+ nei->scale_num = it->scale_num;
+ it->nei = nei;
+ nei_id_map[nei] = it-begin();
+}
+
+
+void MemSlotVector::addSpecialNei(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ MemSlot ms;
+ ms.status = MEM_SPECIAL + MEM_LOCKED;
+ ms.nei = nei;
+ ms.partial_lh = nei->partial_lh;
+ ms.scale_num = nei->scale_num;
+ push_back(ms);
+ nei_id_map[nei] = size()-1;
+}
+
+void MemSlotVector::eraseSpecialNei() {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ while (back().status & MEM_SPECIAL) {
+ nei_id_map.erase(back().nei);
+ pop_back();
+ }
+}
+
+
+bool MemSlotVector::lock(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return false;
+ if (nei->node->isLeaf())
+ return false;
+ iterator id = findNei(nei);
+ if (id->status & MEM_SPECIAL)
+ return false;
+ assert((id->status & MEM_LOCKED) == 0);
+ id->status |= MEM_LOCKED;
+ return true;
+}
+
+void MemSlotVector::unlock(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ if (nei->node->isLeaf())
+ return;
+ iterator id = findNei(nei);
+ if (id->status & MEM_SPECIAL)
+ return;
+ assert((id->status & MEM_LOCKED) != 0);
+ id->status &= ~MEM_LOCKED;
+}
+
+bool MemSlotVector::locked(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return false;
+ if (nei->node->isLeaf())
+ return false;
+ iterator id = findNei(nei);
+
+ if (id->status & MEM_SPECIAL)
+ return false;
+
+ if ((id->status & MEM_LOCKED) == 0)
+ return false;
+ else
+ return true;
+}
+
+int MemSlotVector::allocate(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return -1;
+
+ // first find a free slot
+ if (free_count < size() && (at(free_count).status & MEM_SPECIAL) == 0) {
+ iterator it = begin() + free_count;
+ assert(it->nei == NULL);
+ addNei(nei, it);
+ free_count++;
+ return it-begin();
+ }
+
+ int min_size = INT_MAX;
+ iterator best = end();
+
+
+ // no free slot found, find an unlocked slot with minimal size
+ for (iterator it = begin(); it != end(); it++)
+ if ((it->status & MEM_LOCKED) == 0 && (it->status & MEM_SPECIAL) == 0 && min_size > it->nei->size) {
+ best = it;
+ min_size = it->nei->size;
+ // 2 is the minimum size
+ if (min_size == 2)
+ break;
+ }
+
+ if (best == end())
+ return -1;
+
+ // clear mem assigned to it->nei
+ best->nei->clearPartialLh();
+
+ // assign mem to nei
+ addNei(nei, best);
+ return best-begin();
+
+}
+
+void MemSlotVector::update(PhyloNeighbor *nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+
+ iterator it = findNei(nei);
+// if (it->status & MEM_SPECIAL)
+// return;
+ if (it->nei != nei) {
+ // clear mem assigned to it->nei
+ it->nei->clearPartialLh();
+
+ // assign mem to nei
+ addNei(nei, it);
+ }
+}
+
+/*
+void MemSlotVector::cleanup() {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ unordered_map<PhyloNeighbor*, iterator> new_map;
+ for (auto it = nei_id_map.begin(); it != nei_id_map.end(); it++)
+ if (it->first != it->second->nei) {
+ it->first->partial_lh_computed &= ~1; // clear bit
+ it->first->partial_lh = NULL;
+ it->first->scale_num = NULL;
+ } else {
+ new_map[it->first] = it->second;
+ }
+ nei_id_map = new_map;
+ assert(nei_id_map.size() == size());
+}
+*/
+
+void MemSlotVector::takeover(PhyloNeighbor *nei, PhyloNeighbor *taken_nei) {
+ assert(taken_nei->partial_lh);
+ nei->partial_lh = taken_nei->partial_lh;
+ nei->scale_num = taken_nei->scale_num;
+ taken_nei->partial_lh = NULL;
+ taken_nei->scale_num = NULL;
+ taken_nei->partial_lh_computed &= ~1; // clear bit
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ iterator id = findNei(taken_nei);
+// if (id->status & MEM_SPECIAL)
+// return;
+ nei_id_map.erase(nei_id_map.find(taken_nei));
+ nei_id_map[nei] = id - begin();
+ if (id->nei == taken_nei) {
+ id->nei = nei;
+ }
+}
+
+void MemSlotVector::replace(PhyloNeighbor *new_nei, PhyloNeighbor *old_nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ iterator it = findNei(old_nei);
+ assert(it->partial_lh == old_nei->partial_lh);
+ it->saved_nei = it->nei;
+ it->nei = new_nei;
+ it->partial_lh = new_nei->partial_lh;
+ it->scale_num = new_nei->scale_num;
+ it->status = MEM_LOCKED + MEM_SPECIAL;
+ nei_id_map[new_nei] = it-begin();
+// nei_id_map.erase(old_nei);
+ cout << "slot " << distance(begin(), it) << " replaced" << endl;
+}
+
+void MemSlotVector::restore(PhyloNeighbor *new_nei, PhyloNeighbor *old_nei) {
+ if (Params::getInstance().lh_mem_save != LM_MEM_SAVE)
+ return;
+ iterator it = findNei(new_nei);
+ assert(it->nei == new_nei);
+ assert(nei_id_map[old_nei] == it-begin());
+ it->nei = it->saved_nei;
+ it->saved_nei = NULL;
+ it->partial_lh = old_nei->partial_lh;
+ it->scale_num = old_nei->scale_num;
+ it->status = 0;
+ nei_id_map.erase(new_nei);
+// nei_id_map[old_nei] = it;
+ cout << "slot " << distance(begin(), it) << " restored" << endl;
+}
diff --git a/memslot.h b/memslot.h
new file mode 100644
index 0000000..dfb3f3f
--- /dev/null
+++ b/memslot.h
@@ -0,0 +1,111 @@
+/***************************************************************************
+ * Copyright (C) 2009-2016 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#ifndef MEMSLOT_H
+#define MEMSLOT_H
+
+#ifndef PHYLOTREE_H
+#error "Please #include phylotree.h before including this header file"
+#endif
+
+/**
+ one memory slot, used for memory saving technique
+*/
+struct MemSlot {
+ int status; // status of this slot
+ PhyloNeighbor *nei; // neighbor assigned to this slot
+ double *partial_lh; // partial_lh assigned to this slot
+ UBYTE *scale_num; // scale_num assigned to this slot
+
+ PhyloNeighbor *saved_nei;
+};
+
+/**
+ all memory slots, used for memory saving technique
+*/
+class MemSlotVector : public vector<MemSlot> {
+public:
+
+ /** initialize with a specified number of slots */
+ void init(PhyloTree *tree, int num_slot);
+
+ /**
+ lock the memory assigned to nei
+ @param nei neighbor to lock
+ @return TRUE if successfully locked, FALSE otherwise
+ */
+ bool lock(PhyloNeighbor *nei);
+
+ /** unlock the memory assigned to nei */
+ void unlock(PhyloNeighbor *nei);
+
+ /** test if the memory assigned to nei is locked or not */
+ bool locked(PhyloNeighbor *nei);
+
+ /** allocate free or unlocked memory to nei */
+ int allocate(PhyloNeighbor *nei);
+
+ /** update neighbor */
+ void update(PhyloNeighbor *nei);
+
+ /** find ID the a neighbor */
+ iterator findNei(PhyloNeighbor *nei);
+
+ /** add neighbor into a specified iterator */
+ void addNei(PhyloNeighbor *nei, iterator it);
+
+ /** reset everything */
+ void reset();
+
+ /** clean up all neighbors where partial_lh_computed = 0 */
+ void cleanup();
+
+ /** take over neighbor from another one */
+ void takeover(PhyloNeighbor *nei, PhyloNeighbor *taken_nei);
+
+ /** add special neihbor e.g. for NNI */
+ void addSpecialNei(PhyloNeighbor *nei);
+
+ /** erase special neihbor e.g. for NNI */
+ void eraseSpecialNei();
+
+ /** replace a neighbor, used for NNI */
+ void replace(PhyloNeighbor *new_nei, PhyloNeighbor *old_nei);
+
+ /** restore neighbor, after calling replace */
+ void restore(PhyloNeighbor *new_nei, PhyloNeighbor *old_nei);
+
+protected:
+
+
+ /**
+ map from neighbor to slot ID for fast lookup
+ IMPORTANT: mapping to ID instead of (unsafe) iterator
+ */
+ unordered_map<PhyloNeighbor*, int> nei_id_map;
+
+ /** counter of free slot ID */
+ int free_count;
+
+};
+
+
+#endif // MEMSLOT_H
diff --git a/mexttree.cpp b/mexttree.cpp
index d9ee2f7..29f5789 100644
--- a/mexttree.cpp
+++ b/mexttree.cpp
@@ -59,7 +59,7 @@ void MExtTree::generateRandomTree(TreeGenType tree_type, Params ¶ms, bool bi
void MExtTree::setZeroInternalBranches(int num_zero_len) {
NodeVector nodes, nodes2;
- getAllInnerBranches(nodes, nodes2);
+ generateNNIBraches(nodes, nodes2);
if (num_zero_len > nodes.size()) outError("The specified number of zero branches is too much");
for (int i = 0; i < num_zero_len;) {
int id = random_int(nodes.size());
@@ -367,10 +367,79 @@ void MExtTree::generateYuleHarding(Params ¶ms, bool binary) {
}
+void MExtTree::generateConstrainedYuleHarding(Params ¶ms, MTree* constraint_tree, StrVector &taxnames) {
+ int size = taxnames.size();
+ if (size < 3)
+ outError(ERR_FEW_TAXA);
+ NodeVector myleaves;
+ NodeVector innodes;
+ StrVector names;
+ StringIntMap namemap;
+ StrVector::iterator it;
+
+ // copy constraint tree and resolve multifurcation
+ copyTree(constraint_tree);
+ resolveMultifurcation();
+
+ getTaxa(myleaves);
+ getTaxaName(names);
+ for (it = names.begin(); it != names.end(); it++)
+ namemap[*it] = 1;
+
+ // add the remaining taxa names
+ for (it = taxnames.begin(); it != taxnames.end(); it++)
+ if (namemap.find(*it) == namemap.end())
+ names.push_back(*it);
+ assert(names.size() == taxnames.size());
+ my_random_shuffle(names.begin()+leafNum, names.end());
+
+ // additionally add a leaf
+ for (; leafNum < size; leafNum++)
+ {
+ int index;
+ index = random_int(leafNum);
+ Node *leaf = myleaves[index];
+ Node *dad = leaf->neighbors[0]->node;
+ // add the first leaf
+
+ Node *newleaf = newNode(leafNum, names[leafNum].c_str());
+ Node *node = newNode();
+
+ // redirect the current leaf
+ node->addNeighbor(leaf, -1.0);
+ leaf->updateNeighbor(dad, node);
+
+ // add the new leaf
+ node->addNeighbor(newleaf, -1.0);
+ newleaf->addNeighbor(node, -1.0);
+
+ // connect dad and new node
+ dad->updateNeighbor(leaf, node);
+ node->addNeighbor(dad, -1.0);
+
+ myleaves.push_back(newleaf);
+ }
+
+ // assign random branch lengths
+ myleaves.clear();
+ innodes.clear();
+ getBranches(myleaves, innodes);
+ for (int i = 0; i < myleaves.size(); i++) {
+ double len = randomLen(params);
+ myleaves[i]->findNeighbor(innodes[i])->length = len;
+ innodes[i]->findNeighbor(myleaves[i])->length = len;
+ }
+
+
+ nodeNum = leafNum;
+ initializeTree();
+
+}
+
void MExtTree::generateStarTree(Params ¶ms) {
generateYuleHarding(params);
NodeVector nodes, nodes2;
- getAllInnerBranches(nodes, nodes2);
+ generateNNIBraches(nodes, nodes2);
for (int i = 0; i < nodes.size(); i++) {
nodes[i]->findNeighbor(nodes2[i])->length = 0.0;
nodes2[i]->findNeighbor(nodes[i])->length = 0.0;
diff --git a/mexttree.h b/mexttree.h
index b7d501c..bf4f72d 100644
--- a/mexttree.h
+++ b/mexttree.h
@@ -75,6 +75,16 @@ public:
void generateYuleHarding(Params ¶ms, bool binary = true);
/**
+ generate a random tree following Yule-Harding model satisfying a constraint tree
+ @param params program parameters
+ @param binary TRUE if you want to generate a binary tree
+ @param constraint_tree a (multifurcating) constraint tree
+ @param taxnames taxa names
+ */
+ void generateConstrainedYuleHarding(Params ¶ms, MTree* constraint_tree, StrVector &taxnames);
+
+
+ /**
generate a random tree following uniform model
@param size number of taxa
@param binary TRUE if you want to generate a binary tree
diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp
index e866e63..4e8a91d 100644
--- a/model/modelcodon.cpp
+++ b/model/modelcodon.cpp
@@ -544,6 +544,14 @@ void ModelCodon::computeRateAttributes() {
}
if (verbose_mode >= VB_MAX) {
+
+ // make cost matrix fulfill triangular inequality
+ for (int k = 0; k < 20; k++)
+ for (i = 0; i < 20; i++)
+ for (j = 0; j < 20; j++)
+ if (aa_cost_change[i*20+j] > aa_cost_change[i*20+k] + aa_cost_change[k*20+j])
+ aa_cost_change[i*20+j] = aa_cost_change[i*20+k] + aa_cost_change[k*20+j];
+
cout << "cost matrix by number of nt changes for TNT use" << endl;
cout << "smatrix =1 (aa_nt_changes)";
for (i = 0; i < 19; i++)
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index 8bbe92b..88cb0ab 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -338,6 +338,8 @@ ModelFactory::ModelFactory(Params ¶ms, PhyloTree *tree, ModelsBlock *models_
delete [] rates;
delete [] state_freq;
+ models->joinEigenMemory();
+
// delete information of the old alignment
// tree->aln->ordered_pattern.clear();
// tree->deleteAllPartialLh();
@@ -358,15 +360,21 @@ ModelFactory::ModelFactory(Params ¶ms, PhyloTree *tree, ModelsBlock *models_
// if (unobserved_ptns.size() <= 0)
// outError("Invalid use of +ASC because all constant patterns are observed in the alignment");
if (tree->aln->frac_invariant_sites > 0) {
- cerr << tree->aln->frac_invariant_sites*tree->aln->getNSite() << " invariant sites are observed in the alignment (see below)" << endl;
- for (Alignment::iterator pit = tree->aln->begin(); pit != tree->aln->end(); pit++)
- if (pit->isInvariant()) {
- string pat_str = "";
- for (Pattern::iterator it = pit->begin(); it != pit->end(); it++)
- pat_str += tree->aln->convertStateBackStr(*it);
- cerr << pat_str << " is invariant site pattern" << endl;
- }
- outError("Invalid use of +ASC in the presence of invariant sites");
+// cerr << tree->aln->frac_invariant_sites*tree->aln->getNSite() << " invariant sites observed in the alignment" << endl;
+// for (Alignment::iterator pit = tree->aln->begin(); pit != tree->aln->end(); pit++)
+// if (pit->isInvariant()) {
+// string pat_str = "";
+// for (Pattern::iterator it = pit->begin(); it != pit->end(); it++)
+// pat_str += tree->aln->convertStateBackStr(*it);
+// cerr << pat_str << " is invariant site pattern" << endl;
+// }
+ if (!params.partition_file) {
+ string varsites_file = ((string)params.out_prefix + ".varsites.phy");
+ tree->aln->printPhylip(varsites_file.c_str(), false, NULL, false, true);
+ cerr << "For your convenience alignment with variable sites printed to " << varsites_file << endl;
+ }
+ outError("Invalid use of +ASC because of " + convertIntToString(tree->aln->frac_invariant_sites*tree->aln->getNSite()) +
+ " invariant sites in the alignment");
}
cout << "Ascertainment bias correction: " << unobserved_ptns.size() << " unobservable constant patterns"<< endl;
rate_str = rate_str.substr(0, posasc) + rate_str.substr(posasc+4);
@@ -678,7 +686,7 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) {
}
double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
- if (!site_rate->isGammai())
+ if (!site_rate->isGammai() || site_rate->isFixPInvar() || site_rate->isFixGammaShape() || site_rate->getTree()->aln->frac_const_sites == 0.0)
return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
double begin_time = getRealTime();
@@ -844,7 +852,7 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info,
assert(model);
assert(site_rate);
- double defaultEpsilon = logl_epsilon;
+// double defaultEpsilon = logl_epsilon;
double begin_time = getRealTime();
double cur_lh;
@@ -856,7 +864,7 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info,
// no optimization of branch length in the first round
cur_lh = tree->computeLikelihood();
tree->setCurScore(cur_lh);
- if (verbose_mode >= VB_MED || write_info)
+ if (write_info)
cout << "1. Initial log-likelihood: " << cur_lh << endl;
// For UpperBounds -----------
@@ -899,7 +907,7 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info,
}
if (new_lh > cur_lh + logl_epsilon) {
cur_lh = new_lh;
- if (verbose_mode >= VB_MED || write_info)
+ if (write_info)
cout << i << ". Current log-likelihood: " << cur_lh << endl;
} else {
site_rate->classifyRates(new_lh);
diff --git a/model/modelmixture.cpp b/model/modelmixture.cpp
index bca673d..6b6bfdf 100644
--- a/model/modelmixture.cpp
+++ b/model/modelmixture.cpp
@@ -14,7 +14,9 @@
#include "modelmorphology.h"
#include "modelset.h"
#include "modelmixture.h"
-#include "phylokernelmixture.h"
+//#include "phylokernelmixture.h"
+
+using namespace std;
const string builtin_mixmodels_definition =
"#nexus\n\
@@ -1426,6 +1428,7 @@ double ModelMixture::optimizeWeights() {
for (c = 0; c < nmix; c++) {
lk_ptn += this_lk_cat[c];
}
+ assert(lk_ptn != 0.0);
lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn;
for (c = 0; c < nmix; c++) {
new_prop[c] += this_lk_cat[c] * lk_ptn;
@@ -1435,9 +1438,14 @@ double ModelMixture::optimizeWeights() {
double new_pinvar = 0.0;
for (c = 0; c < nmix; c++) {
new_prop[c] /= phylo_tree->getAlnNSite();
+ // Make sure that probabilities do not get zero
+ if (new_prop[c] < 1e-10) new_prop[c] = 1e-10;
// check for convergence
converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
ratio_prop[c] = new_prop[c] / prop[c];
+ if (std::isnan(ratio_prop[c])) {
+ cerr << "BUG: " << new_prop[c] << " " << prop[c] << " " << ratio_prop[c] << endl;
+ }
prop[c] = new_prop[c];
new_pinvar += prop[c];
}
@@ -1474,7 +1482,8 @@ double ModelMixture::optimizeWithEM(double gradient_epsilon) {
tree->copyPhyloTree(phylo_tree);
tree->optimize_by_newton = phylo_tree->optimize_by_newton;
- tree->setLikelihoodKernel(phylo_tree->sse);
+ tree->setParams(phylo_tree->params);
+ tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads);
// initialize model
ModelFactory *model_fac = new ModelFactory();
model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint;
@@ -1506,6 +1515,7 @@ double ModelMixture::optimizeWithEM(double gradient_epsilon) {
for (c = 0; c < nmix; c++) {
lk_ptn += this_lk_cat[c];
}
+ assert(lk_ptn != 0.0);
lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn;
// transform _pattern_lh_cat into posterior probabilities of each category
@@ -1523,6 +1533,7 @@ double ModelMixture::optimizeWithEM(double gradient_epsilon) {
double new_pinvar = 0.0;
for (c = 0; c < nmix; c++) {
new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
+ if (new_prop[c] < 1e-10) new_prop[c] = 1e-10;
// check for convergence
converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
prop[c] = new_prop[c];
@@ -1608,6 +1619,17 @@ double ModelMixture::optimizeParameters(double gradient_epsilon) {
return score;
}
+bool ModelMixture::isUnstableParameters() {
+ int c, ncategory = size();
+ for (c = 0; c < ncategory; c++)
+ if (prop[c] < MIN_MIXTURE_PROP*0.1) {
+ outWarning("The mixture model might be overfitting because some mixture weights are estimated close to zero");
+ break;
+ return true;
+ }
+ return false;
+}
+
void ModelMixture::decomposeRateMatrix() {
for (iterator it = begin(); it != end(); it++)
(*it)->decomposeRateMatrix();
diff --git a/model/modelmixture.h b/model/modelmixture.h
index 0ace5c5..5c3caa3 100644
--- a/model/modelmixture.h
+++ b/model/modelmixture.h
@@ -86,6 +86,12 @@ public:
*/
virtual int getNMixtures() {return size(); }
+ /**
+ * @param cat mixture class
+ * @return weight of a mixture model component
+ */
+ virtual double getMixtureWeight(int cat) { return prop[cat]; }
+
/**
@return the number of dimensions
*/
@@ -124,6 +130,11 @@ public:
virtual double optimizeParameters(double gradient_epsilon);
/**
+ * @return TRUE if parameters are at the boundary that may cause numerical unstability
+ */
+ virtual bool isUnstableParameters();
+
+ /**
decompose the rate matrix into eigenvalues and eigenvectors
*/
virtual void decomposeRateMatrix();
diff --git a/model/modelpomo.cpp b/model/modelpomo.cpp
index c9c08e2..268508d 100644
--- a/model/modelpomo.cpp
+++ b/model/modelpomo.cpp
@@ -8,11 +8,10 @@
#include "modelpomo.h"
ModelPoMo::ModelPoMo() {
- // TODO Auto-generated constructor stub
}
ModelPoMo::~ModelPoMo() {
- // TODO Auto-generated destructor stub
+
}
diff --git a/model/modelset.cpp b/model/modelset.cpp
index 83c7d1a..83feabb 100644
--- a/model/modelset.cpp
+++ b/model/modelset.cpp
@@ -28,6 +28,8 @@ ModelSet::ModelSet(const char *model_name, PhyloTree *tree) : ModelGTR(tree)
void ModelSet::computeTransMatrix(double time, double* trans_matrix)
{
+ // TODO not working with vectorization
+ assert(0);
for (iterator it = begin(); it != end(); it++) {
(*it)->computeTransMatrix(time, trans_matrix);
trans_matrix += (num_states * num_states);
@@ -36,6 +38,8 @@ void ModelSet::computeTransMatrix(double time, double* trans_matrix)
void ModelSet::computeTransMatrixFreq(double time, double* trans_matrix)
{
+ // TODO not working with vectorization
+ assert(0);
for (iterator it = begin(); it != end(); it++) {
(*it)->computeTransMatrixFreq(time, trans_matrix);
trans_matrix += (num_states * num_states);
@@ -44,6 +48,8 @@ void ModelSet::computeTransMatrixFreq(double time, double* trans_matrix)
void ModelSet::computeTransDerv(double time, double* trans_matrix, double* trans_derv1, double* trans_derv2)
{
+ // TODO not working with vectorization
+ assert(0);
for (iterator it = begin(); it != end(); it++) {
(*it)->computeTransDerv(time, trans_matrix, trans_derv1, trans_derv2);
trans_matrix += (num_states * num_states);
@@ -54,6 +60,8 @@ void ModelSet::computeTransDerv(double time, double* trans_matrix, double* trans
void ModelSet::computeTransDervFreq(double time, double rate_val, double* trans_matrix, double* trans_derv1, double* trans_derv2)
{
+ // TODO not working with vectorization
+ assert(0);
for (iterator it = begin(); it != end(); it++) {
(*it)->computeTransDervFreq(time, rate_val, trans_matrix, trans_derv1, trans_derv2);
trans_matrix += (num_states * num_states);
@@ -71,12 +79,50 @@ int ModelSet::getPtnModelID(int ptn)
double ModelSet::computeTrans(double time, int model_id, int state1, int state2) {
- return at(model_id)->computeTrans(time, state1, state2);
+ if (phylo_tree->vector_size == 1)
+ return at(model_id)->computeTrans(time, state1, state2);
+ // temporary fix problem with vectorized eigenvectors
+ int i;
+ int vsize = phylo_tree->vector_size;
+ int states_vsize = num_states*vsize;
+ int model_vec_id = model_id % vsize;
+ int start_ptn = model_id - model_vec_id;
+ double *evec = &eigenvectors[start_ptn*num_states*num_states + model_vec_id + state1*num_states*vsize];
+ double *inv_evec = &inv_eigenvectors[start_ptn*num_states*num_states + model_vec_id + state2*vsize];
+ double *eval = &eigenvalues[start_ptn*num_states + model_vec_id];
+ double trans_prob = 0.0;
+ for (i = 0; i < states_vsize; i+=vsize) {
+ double val = eval[i];
+ double trans = evec[i] * inv_evec[i*num_states] * exp(time * val);
+ trans_prob += trans;
+ }
+ return trans_prob;
}
double ModelSet::computeTrans(double time, int model_id, int state1, int state2, double &derv1, double &derv2) {
- return at(model_id)->computeTrans(time, state1, state2, derv1, derv2);
-
+ if (phylo_tree->vector_size == 1)
+ return at(model_id)->computeTrans(time, state1, state2, derv1, derv2);
+
+ // temporary fix problem with vectorized eigenvectors
+ int i;
+ int vsize = phylo_tree->vector_size;
+ int states_vsize = num_states*vsize;
+ int model_vec_id = model_id % vsize;
+ int start_ptn = model_id - model_vec_id;
+ double *evec = &eigenvectors[start_ptn*num_states*num_states + model_vec_id + state1*num_states*vsize];
+ double *inv_evec = &inv_eigenvectors[start_ptn*num_states*num_states + model_vec_id + state2*vsize];
+ double *eval = &eigenvalues[start_ptn*num_states + model_vec_id];
+ double trans_prob = 0.0;
+ derv1 = derv2 = 0.0;
+ for (i = 0; i < states_vsize; i+=vsize) {
+ double val = eval[i];
+ double trans = evec[i] * inv_evec[i*num_states] * exp(time * val);
+ double trans2 = trans * val;
+ trans_prob += trans;
+ derv1 += trans2;
+ derv2 += trans2 * val;
+ }
+ return trans_prob;
}
int ModelSet::getNDim()
@@ -87,8 +133,9 @@ int ModelSet::getNDim()
void ModelSet::writeInfo(ostream& out)
{
- assert(size());
- if (verbose_mode >= VB_MED) {
+ if (empty())
+ return;
+ if (verbose_mode >= VB_DEBUG) {
int i = 1;
for (iterator it = begin(); it != end(); it++, i++) {
out << "Partition " << i << ":" << endl;
@@ -101,8 +148,40 @@ void ModelSet::writeInfo(ostream& out)
void ModelSet::decomposeRateMatrix()
{
+ if (empty())
+ return;
for (iterator it = begin(); it != end(); it++)
(*it)->decomposeRateMatrix();
+ if (phylo_tree->vector_size == 1)
+ return;
+ // rearrange eigen to obey vector_size
+ size_t vsize = phylo_tree->vector_size;
+ size_t states2 = num_states*num_states;
+ size_t ptn, i, x;
+ double new_eval[num_states*vsize];
+ double new_evec[states2*vsize];
+ double new_inv_evec[states2*vsize];
+
+ for (ptn = 0; ptn < size(); ptn += vsize) {
+ double *eval_ptr = &eigenvalues[ptn*num_states];
+ double *evec_ptr = &eigenvectors[ptn*states2];
+ double *inv_evec_ptr = &inv_eigenvectors[ptn*states2];
+ for (i = 0; i < vsize; i++) {
+ for (x = 0; x < num_states; x++)
+ new_eval[x*vsize+i] = eval_ptr[x];
+ for (x = 0; x < states2; x++) {
+ new_evec[x*vsize+i] = evec_ptr[x];
+ new_inv_evec[x*vsize+i] = inv_evec_ptr[x];
+ }
+ eval_ptr += num_states;
+ evec_ptr += states2;
+ inv_evec_ptr += states2;
+ }
+ // copy new values
+ memcpy(&eigenvalues[ptn*num_states], new_eval, sizeof(double)*num_states*vsize);
+ memcpy(&eigenvectors[ptn*states2], new_evec, sizeof(double)*states2*vsize);
+ memcpy(&inv_eigenvectors[ptn*states2], new_inv_evec, sizeof(double)*states2*vsize);
+ }
}
@@ -124,8 +203,49 @@ void ModelSet::setVariables(double* variables)
ModelSet::~ModelSet()
{
- for (reverse_iterator rit = rbegin(); rit != rend(); rit++)
- delete (*rit);
-
+ for (reverse_iterator rit = rbegin(); rit != rend(); rit++) {
+ (*rit)->eigenvalues = NULL;
+ (*rit)->eigenvectors = NULL;
+ (*rit)->inv_eigenvectors = NULL;
+ delete (*rit);
+ }
}
+void ModelSet::joinEigenMemory() {
+ size_t nmixtures = get_safe_upper_limit(size());
+ if (eigenvalues) aligned_free(eigenvalues);
+ if (eigenvectors) aligned_free(eigenvectors);
+ if (inv_eigenvectors) aligned_free(inv_eigenvectors);
+
+ size_t states2 = num_states*num_states;
+
+ eigenvalues = aligned_alloc<double>(num_states*nmixtures);
+ eigenvectors = aligned_alloc<double>(states2*nmixtures);
+ inv_eigenvectors = aligned_alloc<double>(states2*nmixtures);
+
+ // assigning memory for individual models
+ size_t m = 0;
+ for (iterator it = begin(); it != end(); it++, m++) {
+ // first copy memory for eigen stuffs
+ memcpy(&eigenvalues[m*num_states], (*it)->eigenvalues, num_states*sizeof(double));
+ memcpy(&eigenvectors[m*states2], (*it)->eigenvectors, states2*sizeof(double));
+ memcpy(&inv_eigenvectors[m*states2], (*it)->inv_eigenvectors, states2*sizeof(double));
+ // then delete
+ if ((*it)->eigenvalues) aligned_free((*it)->eigenvalues);
+ if ((*it)->eigenvectors) aligned_free((*it)->eigenvectors);
+ if ((*it)->inv_eigenvectors) aligned_free((*it)->inv_eigenvectors);
+// if ((*it)->eigen_coeff) aligned_free((*it)->eigen_coeff);
+
+ // and assign new memory
+ (*it)->eigenvalues = &eigenvalues[m*num_states];
+ (*it)->eigenvectors = &eigenvectors[m*states2];
+ (*it)->inv_eigenvectors = &inv_eigenvectors[m*states2];
+ }
+
+ // copy dummy values
+ for (m = size(); m < nmixtures; m++) {
+ memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states);
+ memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2);
+ memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2);
+ }
+}
diff --git a/model/modelset.h b/model/modelset.h
index 6c2ae9f..18c59b1 100644
--- a/model/modelset.h
+++ b/model/modelset.h
@@ -168,7 +168,12 @@ public:
/** map from pattern ID to model ID */
IntVector pattern_model_map;
-
+
+ /**
+ join memory for eigen into one chunk
+ */
+ void joinEigenMemory();
+
protected:
diff --git a/model/modelsubst.h b/model/modelsubst.h
index a2ca4ba..4e9dc4f 100644
--- a/model/modelsubst.h
+++ b/model/modelsubst.h
@@ -76,6 +76,12 @@ public:
*/
virtual int getNMixtures() { return 1; }
+ /**
+ * @param cat mixture class
+ * @return weight of a mixture model component
+ */
+ virtual double getMixtureWeight(int cat) { return 1.0; }
+
/**
@return the number of rate entries, equal to the number of elements
in the upper-diagonal of the rate matrix (since model is reversible)
diff --git a/model/partitionmodel.cpp b/model/partitionmodel.cpp
index a2942d6..8108cb8 100644
--- a/model/partitionmodel.cpp
+++ b/model/partitionmodel.cpp
@@ -137,7 +137,7 @@ double PartitionModel::optimizeLinkedAlpha(bool write_info, double gradient_epsi
double negative_lh;
double current_shape = linked_alpha;
double ferror, optx;
- optx = minimizeOneDimen(MIN_GAMMA_SHAPE, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
+ optx = minimizeOneDimen(site_rate->getTree()->params->min_gamma_shape, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
if (write_info)
cout << "Linked alpha across partitions: " << linked_alpha << endl;
return site_rate->getTree()->computeLikelihood();
@@ -151,7 +151,7 @@ double PartitionModel::optimizeParameters(int fixed_len, bool write_info, double
if (tree->part_order.empty()) tree->computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
+ #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(tree->num_threads > 1)
#endif
for (int i = 0; i < ntrees; i++) {
int part = tree->part_order[i];
@@ -183,7 +183,7 @@ double PartitionModel::optimizeParametersGammaInvar(int fixed_len, bool write_in
if (tree->part_order.empty()) tree->computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
+ #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(tree->num_threads > 1)
#endif
for (int i = 0; i < ntrees; i++) {
int part = tree->part_order[i];
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 4e84f81..60c302f 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -7,6 +7,7 @@
#include "phylotree.h"
#include "ratefree.h"
+#include "rateinvar.h"
#include "model/modelfactory.h"
#include "model/modelmixture.h"
@@ -209,7 +210,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;
// TODO: turn off EM algorithm for +ASC model
- if (optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())
+ if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()) || getPInvar() <= MIN_PINVAR)
return optimizeWithEM();
//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
@@ -457,9 +458,16 @@ double RateFree::optimizeWithEM() {
// double *lk_ptn = aligned_alloc<double>(nptn);
double *new_prop = aligned_alloc<double>(nmix);
PhyloTree *tree = new PhyloTree;
+
+ // attach memory to save space
+// tree->central_partial_lh = phylo_tree->central_partial_lh;
+// tree->central_scale_num = phylo_tree->central_scale_num;
+// tree->central_partial_pars = phylo_tree->central_partial_pars;
+
tree->copyPhyloTree(phylo_tree);
tree->optimize_by_newton = phylo_tree->optimize_by_newton;
- tree->setLikelihoodKernel(phylo_tree->sse);
+ tree->setParams(phylo_tree->params);
+ tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads);
// initialize model
ModelFactory *model_fac = new ModelFactory();
model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint;
@@ -543,7 +551,7 @@ double RateFree::optimizeWithEM() {
new_pinvar = 1.0 - new_pinvar;
- if (new_pinvar != 0.0) {
+ if (new_pinvar > 1e-4 && getPInvar() != 0.0) {
converged = converged && (fabs(getPInvar()-new_pinvar) < 1e-4);
setPInvar(new_pinvar);
// setOptimizePInvar(false);
@@ -564,8 +572,8 @@ double RateFree::optimizeWithEM() {
tree->setModel(subst_model);
subst_model->setTree(tree);
model_fac->model = subst_model;
- if (subst_model->isMixture())
- tree->setLikelihoodKernel(phylo_tree->sse);
+ if (subst_model->isMixture() || subst_model->isSiteSpecificModel())
+ tree->setLikelihoodKernel(phylo_tree->sse, phylo_tree->num_threads);
// initialize likelihood
@@ -591,6 +599,11 @@ double RateFree::optimizeWithEM() {
if (converged) break;
}
+ // deattach memory
+// tree->central_partial_lh = NULL;
+// tree->central_scale_num = NULL;
+// tree->central_partial_pars = NULL;
+
delete tree;
aligned_free(new_prop);
return phylo_tree->computeLikelihood();
diff --git a/model/rategamma.cpp b/model/rategamma.cpp
index 030abdc..0b9f5e8 100644
--- a/model/rategamma.cpp
+++ b/model/rategamma.cpp
@@ -31,14 +31,14 @@ RateGamma::RateGamma(int ncat, double shape, bool median, PhyloTree *tree) : Rat
phylo_tree = tree;
cut_median = median;
//gamma_shape = MAX_GAMMA_SHAPE-1.0;
- gamma_shape = max(MIN_GAMMA_SHAPE, fabs(shape));
+ gamma_shape = max(tree->params->min_gamma_shape, fabs(shape));
fix_gamma_shape = false;
rates = NULL;
if (shape > 0.0) {
// gamma_shape = shape;
fix_gamma_shape = true;
} else if (shape == 0.0) {
- gamma_shape = max(MIN_GAMMA_SHAPE, random_double() * 10.0);
+ gamma_shape = max(tree->params->min_gamma_shape, random_double() * 10.0);
cout << "Randomize initial gamma shape (alpha): " << gamma_shape << endl;
}
setNCategory(ncat);
@@ -190,7 +190,7 @@ double RateGamma::targetFunk(double x[]) {
void RateGamma::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
if (getNDim() == 0) return;
- lower_bound[1] = MIN_GAMMA_SHAPE;
+ lower_bound[1] = phylo_tree->params->min_gamma_shape;
upper_bound[1] = MAX_GAMMA_SHAPE;
bound_check[1] = false;
}
@@ -233,7 +233,7 @@ double RateGamma::optimizeParameters(double gradient_epsilon) {
double negative_lh;
double current_shape = gamma_shape;
double ferror, optx;
- optx = minimizeOneDimen(MIN_GAMMA_SHAPE, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
+ optx = minimizeOneDimen(phylo_tree->params->min_gamma_shape, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
// gamma_shape = optx;
// computeRates();
// phylo_tree->clearAllPartialLH();
diff --git a/model/rategamma.h b/model/rategamma.h
index 19fc809..3ecbb98 100644
--- a/model/rategamma.h
+++ b/model/rategamma.h
@@ -22,12 +22,6 @@
#include "rateheterogeneity.h"
-const double MIN_GAMMA_RATE = 1e-6;
-// change from 0.01 to 0.02 as 0.01 causes numerical problems
-const double MIN_GAMMA_SHAPE = 0.02;
-const double MAX_GAMMA_SHAPE = 1000.0;
-const double TOL_GAMMA_SHAPE = 0.001;
-
const int GAMMA_CUT_MEDIAN = 1; // 2 discrete Gamma approximations (mean or median) of Yang 1994
const int GAMMA_CUT_MEAN = 2;
@@ -182,7 +176,7 @@ public:
*/
virtual void writeParameters(ostream &out);
- bool isFixGammaShape() const {
+ virtual bool isFixGammaShape() const {
return fix_gamma_shape;
}
diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp
index 70ebaa7..55e6a8e 100644
--- a/model/rategammainvar.cpp
+++ b/model/rategammainvar.cpp
@@ -136,7 +136,7 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
if (optimize_alg.find("EM_RR") != string::npos) {
return randomRestartOptimization(gradient_epsilon);
- } else if (optimize_alg.find("Brent") != string::npos) {
+ } else if (optimize_alg.find("Brent") != string::npos || phylo_tree->aln->frac_const_sites == 0.0 || isFixPInvar() || isFixGammaShape()) {
double lh = phylo_tree->computeLikelihood();
cur_optimize = 0;
double gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
@@ -177,6 +177,7 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
} else {
string errMsg = "Unknown optimization algorithm: " + optimize_alg;
outError(errMsg.c_str());
+ return 0.0;
}
}
diff --git a/model/rateheterogeneity.h b/model/rateheterogeneity.h
index c0c4ab5..1d2dcea 100644
--- a/model/rateheterogeneity.h
+++ b/model/rateheterogeneity.h
@@ -146,7 +146,11 @@ public:
*/
virtual void setPInvar(double pinv) { }
- /**
+ virtual bool isFixPInvar() const {
+ return true;
+ }
+
+ /**
set whether to fix p_invar
*/
virtual void setFixPInvar(bool fixPInvar) {}
@@ -169,7 +173,11 @@ public:
*/
virtual void setGammaShape(double gs) {}
- /**
+ virtual bool isFixGammaShape() const {
+ return true;
+ }
+
+ /**
set whether to fix gamma shape
*/
virtual void setFixGammaShape(bool fixGammaShape) {}
diff --git a/model/rateinvar.cpp b/model/rateinvar.cpp
index 55988c5..0a68d2f 100644
--- a/model/rateinvar.cpp
+++ b/model/rateinvar.cpp
@@ -22,10 +22,13 @@
RateInvar::RateInvar(double p_invar_sites, PhyloTree *tree)
: RateHeterogeneity()
{
- if (tree)
- p_invar = max(tree->aln->frac_const_sites/2.0, MIN_PINVAR);
+ if (tree) {
+ if (tree->aln->frac_const_sites == 0.0)
+ p_invar = 0.0;
+ else
+ p_invar = max(tree->aln->frac_const_sites/2.0, MIN_PINVAR);
// p_invar = MIN_PINVAR;
- else
+ } else
p_invar = MIN_PINVAR;
fix_p_invar = false;
optimize_p_invar = true;
@@ -85,6 +88,8 @@ void RateInvar::setBounds(double *lower_bound, double *upper_bound, bool *bound_
}
double RateInvar::optimizeParameters(double gradient_epsilon) {
+ if (phylo_tree->aln->frac_const_sites == 0.0)
+ return -computeFunction(0.0);
if (fix_p_invar || !optimize_p_invar)
return -computeFunction(p_invar);
if (verbose_mode >= VB_MAX)
diff --git a/model/rateinvar.h b/model/rateinvar.h
index 6ae98ed..acf070a 100644
--- a/model/rateinvar.h
+++ b/model/rateinvar.h
@@ -121,7 +121,7 @@ public:
*/
virtual void writeParameters(ostream &out);
- bool isFixPInvar() const {
+ virtual bool isFixPInvar() const {
return fix_p_invar;
}
diff --git a/mtree.cpp b/mtree.cpp
index 9a39486..ee363d0 100644
--- a/mtree.cpp
+++ b/mtree.cpp
@@ -21,6 +21,7 @@
#include <iostream>
//#include <fstream>
#include <iterator>
+//#include <mtree.h>
#include "splitgraph.h"
using namespace std;
@@ -65,6 +66,23 @@ MTree::MTree(MTree &tree) {
init(tree);
}
+MTree::MTree(string& treeString, vector<string>& taxaNames, bool isRooted) {
+ stringstream str;
+ str << treeString;
+ str.seekg(0, ios::beg);
+ readTree(str, isRooted);
+ assignIDs(taxaNames);
+ assignLeafID();
+}
+
+MTree::MTree(string& treeString, bool isRooted) {
+ stringstream str;
+ str << treeString;
+ str.seekg(0, ios::beg);
+ readTree(str, isRooted);
+ assignLeafID();
+}
+
void MTree::init(MTree &tree) {
root = tree.root;
leafNum = tree.leafNum;
@@ -78,6 +96,41 @@ void MTree::init(MTree &tree) {
fig_char = tree.fig_char;
}
+void MTree::assignIDs(vector<string>& taxaNames) {
+ bool err = false;
+ int nseq = taxaNames.size();
+ for (int seq = 0; seq < nseq; seq++) {
+ string seq_name = taxaNames[seq];
+ Node *node = findLeafName(seq_name);
+ if (!node) {
+ string str = "Sequence ";
+ str += seq_name;
+ str += " does not appear in the tree";
+ err = true;
+ outError(str, false);
+ } else {
+ assert(node->isLeaf());
+ node->id = seq;
+ }
+ }
+ StrVector taxname;
+ getTaxaName(taxname);
+ for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
+ bool foundTaxa = false;
+ for (vector<string>::iterator it2 = taxaNames.begin(); it2 != taxaNames.end(); it2++) {
+ if ( *it == *it2 ) {
+ foundTaxa = true;
+ break;
+ }
+ }
+ if (!foundTaxa) {
+ outError((string) "Tree taxon " + (*it) + " does not appear in the input taxa names", false);
+ err = true;
+ }
+ }
+ if (err) outError("Tree taxa and input taxa names do not match (see above)");
+}
+
void MTree::copyTree(MTree *tree) {
if (root) freeNode();
stringstream ss;
@@ -153,6 +206,95 @@ Node* MTree::copyTree(MTree *tree, string &taxa_set, double &len, Node *node, No
return int_node;
}
+void MTree::extractBifurcatingSubTree(Node *node, Node *dad) {
+ if (!node) node = root;
+ if (node->degree() > 3) {
+ int id1, id2, id3;
+ id1 = node->findNeighborIt(dad) - node->neighbors.begin();
+ do {
+ id2 = random_int(node->degree());
+ } while (id2 == id1);
+
+ // make sure that id1 < id2
+ if (id1 > id2) {
+ int tmp = id1;
+ id1 = id2;
+ id2 = tmp;
+ }
+ do {
+ id3 = random_int(node->degree());
+ } while (id3 == id1 || id3 == id2);
+ //make sure that id1 < id2 < id3
+ if (id3 < id2) {
+ if (id3 < id1) {
+ // id3 < id1 < id2
+ int tmp = id1;
+ id1 = id3;
+ id3 = id2;
+ id2 = tmp;
+ } else {
+ // id1 < id3 < id2
+ int tmp = id2;
+ id2 = id3;
+ id3 = tmp;
+ }
+ }
+ // remove all neighbors except id1, id2, id3
+ for (int i = 0; i != node->neighbors.size(); i++)
+ if (i != id1 && i != id2 && i != id3) {
+ freeNode(node->neighbors[i]->node, node);
+ delete node->neighbors[i];
+ }
+ node->neighbors[0] = node->neighbors[id1];
+ node->neighbors[1] = node->neighbors[id2];
+ node->neighbors[2] = node->neighbors[id3];
+ node->neighbors.erase(node->neighbors.begin()+3, node->neighbors.end());
+ }
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if (!(*it)->node->isLeaf())
+ extractBifurcatingSubTree((*it)->node, node);
+ }
+}
+
+void MTree::resolveMultifurcation() {
+ // randomly resolve multifurcating node
+
+ NodeVector nodes;
+ getInternalNodes(nodes);
+ for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++)
+ while ((*it)->degree() > 3) {
+ Node *new_node = newNode();
+ int id1 = random_int((*it)->degree());
+ int id2;
+ do {
+ id2 = random_int((*it)->degree());
+ } while (id2 == id1);
+
+ // make sure that id1 < id2
+ if (id1 > id2) {
+ int tmp = id1;
+ id1 = id2;
+ id2 = tmp;
+ }
+ Neighbor *nei1 = (*it)->neighbors[id1];
+ Neighbor *nei2 = (*it)->neighbors[id2];
+
+ // connect id1 with new_node
+ nei1->node->updateNeighbor((*it), new_node);
+ new_node->neighbors.push_back(nei1);
+
+ // connect id2 with new_node
+ nei2->node->updateNeighbor((*it), new_node);
+ new_node->neighbors.push_back(nei2);
+
+ // connect new_node with old node
+ new_node->addNeighbor((*it), -1.0);
+ (*it)->neighbors.erase((*it)->neighbors.begin() + id2);
+ (*it)->neighbors.erase((*it)->neighbors.begin() + id1);
+ (*it)->addNeighbor(new_node, -1.0);
+ }
+}
+
Node* MTree::newNode(int node_id, const char* node_name) {
return new Node(node_id, node_name);
}
@@ -236,12 +378,6 @@ void MTree::printTree(const char *ofile, int brtype)
}
}
-//string MTree::getTreeString() {
-// stringstream tree_stream;
-// printTree(tree_stream);
-// return tree_stream.str();
-//}
-
void MTree::printTree(ostream &out, int brtype) {
if (root->isLeaf()) {
if (root->neighbors[0]->node->isLeaf()) {
@@ -301,11 +437,14 @@ int MTree::printTree(ostream &out, int brtype, Node *node, Node *dad)
out << node->name;
if (brtype & WT_BR_LEN) {
+ int prec = 10;
out.setf( std::ios::fixed, std:: ios::floatfield ); // some sofware does handle number format like '1.234e-6'
// out.precision(10); // increase precision to avoid zero branch (like in RAxML)
double len = node->neighbors[0]->length;
if (brtype & WT_BR_SCALE) len *= len_scale;
+ if (brtype & WT_BR_LEN_SHORT) prec = 6;
if (brtype & WT_BR_LEN_ROUNDING) len = round(len);
+ out.precision(prec);
if (brtype & WT_BR_LEN_FIXED_WIDTH)
out << ":" << fixed << len;
else
@@ -499,6 +638,16 @@ void MTree::readTree(const char *infile, bool &is_rooted) {
" taxa and " << nodeNum-1-is_rooted << " branches" << endl;
}
+/*
+void MTree::readTreeString(string tree_string, bool is_rooted) {
+ stringstream str;
+ str << tree_string;
+ str.seekg(0, ios::beg);
+ freeNode();
+ readTree(str, is_rooted);
+}
+*/
+
void MTree::readTree(istream &in, bool &is_rooted)
{
@@ -774,7 +923,7 @@ void MTree::getTaxa(NodeVector &taxa, Node *node, Node *dad) {
}
void MTree::getAllNodesInSubtree(Node *node, Node *dad, NodeVector &nodeList) {
- assert(node && dad);
+ assert(node);
nodeList.push_back(node);
if (node->isLeaf()) {
return;
@@ -812,13 +961,13 @@ void MTree::getInternalNodes(NodeVector &nodes, Node *node, Node *dad) {
}
}
-void MTree::getAllInnerBranches(NodeVector &nodes1, NodeVector &nodes2, SplitGraph* excludeSplits, Node *node, Node *dad) {
+void MTree::generateNNIBraches(NodeVector &nodes1, NodeVector &nodes2, SplitGraph* excludeSplits, Node *node, Node *dad) {
if (!node) node = root;
//for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
//if ((*it)->node != dad) {
FOR_NEIGHBOR_IT(node, dad, it)
if (!(*it)->node->isLeaf()) {
- getAllInnerBranches(nodes1, nodes2, excludeSplits, (*it)->node, node);
+ generateNNIBraches(nodes1, nodes2, excludeSplits, (*it)->node, node);
if (!node->isLeaf()) {
if (excludeSplits != NULL && excludeSplits->size() != 0) {
Split* sp = getSplit(node, (*it)->node);
@@ -839,73 +988,54 @@ void MTree::getAllInnerBranches(NodeVector &nodes1, NodeVector &nodes2, SplitGra
}
}
-bool MTree::branchExist(Node* node1, Node* node2, NodeVector& nodes1, NodeVector& nodes2) {
- assert(nodes1.size() == nodes2.size());
- bool existed = false;
- for (int i = 0; i < nodes1.size(); i++) {
- if (nodes1[i] == node1) {
- if (nodes2[i] == node2) {
- existed = true;
- break;
- }
- }
- if (nodes1[i] == node2) {
- if (nodes2[i] == node1) {
- existed = true;
- break;
- }
- }
- }
- return existed;
-}
+//bool MTree::branchExist(Node* node1, Node* node2, NodeVector& nodes1, NodeVector& nodes2) {
+// assert(nodes1.size() == nodes2.size());
+// bool existed = false;
+// for (int i = 0; i < nodes1.size(); i++) {
+// if (nodes1[i] == node1) {
+// if (nodes2[i] == node2) {
+// existed = true;
+// break;
+// }
+// }
+// if (nodes1[i] == node2) {
+// if (nodes2[i] == node1) {
+// existed = true;
+// break;
+// }
+// }
+// }
+// return existed;
+//}
-void MTree::getInnerBranches(NodeVector &nodes1, NodeVector &nodes2, int depth, Node *node, Node *dad) {
+void MTree::getSurroundingInnerBranches(Node *node, Node *dad, int depth, Branches &surrBranches) {
if (depth == 0)
return;
FOR_NEIGHBOR_IT(node, dad, it) {
- if (!(*it)->node->isLeaf() && !branchExist(node, (*it)->node, nodes1, nodes2)) {
- nodes1.push_back(node);
- nodes2.push_back((*it)->node);
- getInnerBranches(nodes1, nodes2, depth-1, (*it)->node, node);
+ if (!(*it)->node->isLeaf()) {
+ Branch curBranch;
+ curBranch.first = node;
+ curBranch.second = (*it)->node;
+ int branchID = pairInteger(node->id, (*it)->node->id);
+ if (surrBranches.find(branchID) == surrBranches.end())
+ surrBranches.insert(pair<int,Branch>(branchID, curBranch));
+ getSurroundingInnerBranches((*it)->node, node, depth-1, surrBranches);
}
}
}
bool MTree::isInnerBranch(Node* node1, Node* node2) {
- assert(node1->degree() == 3 && node2->degree() == 3);
- return (isABranch(node1, node2) && !node1->isLeaf() && !node2->isLeaf());
+ return(node1->degree() >= 3 && node2->degree() >= 3 && isABranch(node1, node2));
}
bool MTree::isABranch(Node* node1, Node* node2) {
- bool isBranch1 = false;
- for (NeighborVec::iterator it = node1->neighbors.begin(); it != node1->neighbors.end(); it++) {
- if ((*it)->node == node2) {
- isBranch1 = true;
- break;
- }
- }
- // Sanity check: both nodes must have each other as neighbors or not at all
- bool isBranch2 = false;
- for (NeighborVec::iterator it = node2->neighbors.begin(); it != node2->neighbors.end(); it++) {
- if ((*it)->node == node1) {
- isBranch2 = true;
- break;
- }
- }
- if (isBranch2 != isBranch1) {
- int node1ID = node1->id;
- int node2ID = node2->id;
- stringstream msg;
- msg << "Tree data structure corrupted! Node " << node1ID << " and node " << node2ID << " are not constructed properly";
- outError(msg.str());
- }
- return isBranch1;
+ return (node1->findNeighbor(node2) != NULL && node2->findNeighbor(node1) != NULL);
}
void MTree::getBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
if (!node) node = root;
//for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
- //if ((*it)->node != dad) {
+ //if ((*it)->node != dad) {
FOR_NEIGHBOR_IT(node, dad, it) {
if (node->id < (*it)->node->id) {
nodes.push_back(node);
@@ -918,6 +1048,19 @@ void MTree::getBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node
}
}
+void MTree::getInnerBranches(Branches& branches, Node *node, Node *dad) {
+ if (!node) node = root;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if (isInnerBranch((*it)->node, node)) {
+ Branch branch;
+ branch.first = node;
+ branch.second = (*it)->node;
+ branches.insert(pair<int, Branch>(pairInteger(branch.first->id, branch.second->id), branch));
+ }
+ getInnerBranches(branches, (*it)->node, node);
+ }
+}
+
void MTree::getBranchLengths(DoubleVector &len, Node *node, Node *dad) {
if (!node) {
node = root;
@@ -971,6 +1114,18 @@ void MTree::getTaxaName(vector<string> &taxname, Node *node, Node *dad) {
}
+void MTree::getUnorderedTaxaName(vector<string> &taxname, Node *node, Node *dad) {
+ if (!node) node = root;
+ if (node->isLeaf()) {
+ taxname.push_back(node->name);
+ }
+ //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
+ //if ((*it)->node != dad) {
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ getUnorderedTaxaName(taxname, (*it)->node, node);
+ }
+}
+
void MTree::getTaxaID(vector<int> &taxa, Node *node, Node *dad) {
if (!node) node = root;
if (node->isLeaf()) {
@@ -997,11 +1152,16 @@ bool MTree::containsSplits(SplitGraph& splits) {
}
Split* MTree::getSplit(Node* node1, Node* node2) {
- Split* sp = new Split(leafNum);
- getTaxa(*sp, node1, node2);
- if (sp->shouldInvert())
- sp->invert();
- return sp;
+ Neighbor* node12 = node1->findNeighbor(node2);
+ return node12->split;
+}
+
+Split* MTree::_getSplit(Node* node1, Node* node2) {
+ Split* sp = new Split(leafNum);
+ getTaxa(*sp, node1, node2);
+ if (sp->shouldInvert())
+ sp->invert();
+ return sp;
}
void MTree::convertSplits(SplitGraph &sg, Split *resp, NodeVector *nodes, Node *node, Node *dad) {
@@ -1019,8 +1179,8 @@ void MTree::convertSplits(SplitGraph &sg, Split *resp, NodeVector *nodes, Node *
sp->invert();
/* ignore nodes with degree of 2 because such split will be added before */
if (node->degree() != 2) {
- sg.push_back(sp);
- if (nodes) nodes->push_back((*it)->node);
+ sg.push_back(sp);
+ if (nodes) nodes->push_back((*it)->node);
}
has_child = true;
}
@@ -2049,6 +2209,81 @@ void MTree::removeTaxa(StrVector &taxa_names) {
initializeTree();
}
+void MTree::getSplits(SplitGraph &splits, Node* node, Node* dad) {
+ if (!node) {
+ node = root;
+ }
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ getSplits(splits, (*it)->node, node);
+ Split* mySplit = new Split(*((*it)->split));
+ if (mySplit->shouldInvert())
+ mySplit->invert();
+ splits.push_back(mySplit);
+ }
+}
+
+void MTree::buildNodeSplit(Split *resp, Node *node, Node *dad) {
+ if (!node) {
+ node = root;
+ // The neighbor that represents root
+ Neighbor* rootNei = root->neighbors[0]->node->findNeighbor(root);
+ if (rootNei->split == NULL) {
+ rootNei->split = new Split(leafNum);
+ } else {
+ delete rootNei->split;
+ rootNei->split = new Split(leafNum);
+ }
+ resp = rootNei->split;
+ }
+ bool has_child = false;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ if ((*it)->split == NULL) {
+ (*it)->split = new Split(leafNum);
+ } else {
+ delete (*it)->split;
+ (*it)->split = new Split(leafNum);
+ }
+ buildNodeSplit((*it)->split, (*it)->node, node);
+ //(*it)->split->report(cout);
+ *resp += *((*it)->split);
+ has_child = true;
+ }
+
+ if (dad != NULL) {
+ Neighbor* dadNei = node->findNeighbor(dad);
+ dadNei->split = new Split(*resp);
+ dadNei->split->invert();
+ }
+
+ if (!has_child) {
+ resp->addTaxon(node->id);
+ }
+}
+
+void MTree::initializeSplitMap(Split *resp, Node *node, Node *dad) {
+ if (!node) node = root;
+ if (!resp) {
+ resp = new Split(leafNum);
+ }
+ bool has_child = false;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ Split *sp = new Split(leafNum);
+ initializeSplitMap(sp, (*it)->node, node);
+ *resp += *sp;
+ if (sp->shouldInvert())
+ sp->invert();
+ /* ignore nodes with degree of 2 because such split will be added before */
+ if (node->degree() != 2) {
+ Branch curBranch((*it)->node, node);
+ splitBranchMap.insert(make_pair(sp, curBranch));
+ }
+ has_child = true;
+ }
+ if (!has_child) {
+ resp->addTaxon(node->id);
+ }
+}
+
Node *MTree::findFarthestLeaf(Node *node, Node *dad) {
if (!node)
node = root;
@@ -2069,30 +2304,6 @@ Node *MTree::findFarthestLeaf(Node *node, Node *dad) {
return res;
}
-//void MTree::sortNeighborBySubtreeSize(Node *node, Node *dad) {
-// if (dad && node->isLeaf()) {
-// node->height = 0.0;
-// return;
-// }
-//
-// node->height = 0.0;
-// FOR_NEIGHBOR_DECLARE(node, dad, it) {
-// sortNeighborBySubtreeSize((*it)->node, node);
-// if (node->height < (*it)->node->height+1)
-// node->height = (*it)->node->height+1;
-// }
-//
-// // sort neighbors in ascending order of tree height
-// FOR_NEIGHBOR(node, dad, it)
-// for (NeighborVec::iterator it2 = it+1; it2 != node->neighbors.end(); it2++)
-// if ((*it)->node != dad && (*it)->node->height > (*it2)->node->height) {
-// Neighbor *nei;
-// nei = *it;
-// *it = *it2;
-// *it2 = nei;
-// }
-//}
-
void MTree::getPreOrderBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
if (dad) {
nodes.push_back(node);
diff --git a/mtree.h b/mtree.h
index 44e7c42..701fc6e 100644
--- a/mtree.h
+++ b/mtree.h
@@ -27,6 +27,7 @@
#include <sstream>
#include "hashsplitset.h"
#include "splitset.h"
+//#include "candidateset.h"
const char ROOT_NAME[] = "_root";
@@ -57,6 +58,23 @@ public:
MTree(MTree &tree);
/**
+ * Constructor, read tree from string.
+ * Taxa IDs are assigned according to the order in taxaNames
+ */
+ MTree(string& treeString, vector<string>& taxaNames, bool isRooted);
+
+ /**
+ * Read tree from string assuming that the taxa names are numeric numbers
+ * Leaf IDs are then assigned according to the number in the taxa names
+ */
+ MTree(string& treeString, bool isRooted);
+
+ /**
+ * Assign taxa IDs according to the order in taxaNames
+ */
+ void assignIDs(vector<string>& taxaNames);
+
+ /**
constructor
*/
MTree();
@@ -77,6 +95,21 @@ public:
Node* copyTree(MTree *tree, string &taxa_set, double &len, Node *node = NULL, Node *dad = NULL);
/**
+ In case of mulfurcating tree, extract a bifurcating subtree by randomly removing multifurcation
+ If the tree is bifurcating, nothing change
+ @param node the starting node, NULL to start from the root
+ @param dad dad of the node, used to direct the search
+ */
+ void extractBifurcatingSubTree(Node *node = NULL, Node *dad = NULL);
+
+
+ /**
+ In case of mulfurcating tree, randomly resolve multifurcating node to obtain strictly bifurcating tree
+ If the tree is bifurcating, nothing change
+ */
+ void resolveMultifurcation();
+
+ /**
initialize the tree from a NEWICK tree file
@param userTreeFile the name of the user tree
@param is_rooted (IN/OUT) true if tree is rooted
@@ -153,9 +186,6 @@ public:
*/
void printTree(ostream & out, int brtype = WT_BR_LEN);
-
-// string getTreeString();
-
/**
print the tree to the output file in newick format
@param out the output file.
@@ -254,6 +284,13 @@ public:
virtual void readTree(istream &in, bool &is_rooted);
/**
+ read the tree from a newick string
+ @param tree_string the tree string.
+ @param is_rooted (IN/OUT) true if tree is rooted
+ */
+ //virtual void readTreeString(string tree_string, bool is_rooted);
+
+ /**
parse the tree from the input file in newick format
@param infile the input file
@param ch (IN/OUT) current char
@@ -358,11 +395,20 @@ public:
get the descending taxa names below the node
@param node the starting node, NULL to start from the root
@param dad dad of the node, used to direct the search
- @param taxname (OUT) taxa name
+ @param[out] taxname taxa name, with size equal leafNum and ordered with taxon ID
*/
void getTaxaName(vector<string> &taxname, Node *node = NULL, Node *dad = NULL);
/**
+ get the descending taxa names below the node. different from getTaxaName() in that the
+ taxa are not ordered by ID at all!
+ @param node the starting node, NULL to start from the root
+ @param dad dad of the node, used to direct the search
+ @param[out] taxname taxa name
+ */
+ void getUnorderedTaxaName(vector<string> &taxname, Node *node, Node *dad);
+
+ /**
get the descending internal nodes below \a node
@param node the starting node, NULL to start from the root
@param dad dad of the node, used to direct the search
@@ -378,7 +424,7 @@ public:
@param nodes2 (OUT) vector of the other end node of branch
@param excludeSplits do not collect branches in here
*/
- void getAllInnerBranches(vector<Node*> &nodes, vector<Node*> &nodes2, SplitGraph* excludeSplits = NULL, Node *node = NULL, Node *dad = NULL);
+ void generateNNIBraches(vector<Node*> &nodes, vector<Node*> &nodes2, SplitGraph* excludeSplits = NULL, Node *node = NULL, Node *dad = NULL);
/**
get all descending branches below the node
@@ -390,19 +436,21 @@ public:
void getBranches(NodeVector &nodes, NodeVector &nodes2, Node *node = NULL, Node *dad = NULL);
/**
+ get all inner branches below the node
+ @param branches the branches are stored here
+ @param node the starting node, NULL to start from the root
+ @param dad dad of the node, used to direct the search
+ */
+ void getInnerBranches(Branches& branches, Node *node = NULL, Node *dad = NULL);
+
+ /**
* get all descending internal branches below \a node and \a dad up to depth \a depth
* @param[in] depth collect all internal branches up to distance \a depth from the current branch
* @param[in] node one of the 2 nodes of the current branches
* @param[in] dad one of the 2 nodes of the current branches
- * @param[out] nodes1 contains one ends of the collected branches
- * @param[out] nodes2 contains the other ends of the collected branches
+ * @param[out] surrBranches the resulting branches
*/
- void getInnerBranches(NodeVector& nodes1, NodeVector& nodes2, int depth, Node *node, Node *dad);
-
- /**
- * @brief check whether branch (node1, node2) exist in the branch vector (nodes1, node2)
- */
- bool branchExist(Node* node1, Node* node2, NodeVector& nodes1, NodeVector& nodes2);
+ void getSurroundingInnerBranches(Node *node, Node *dad, int depth, Branches &surrBranches);
/**
* @brief: check if the branch is internal
@@ -513,14 +561,44 @@ public:
void convertSplits(SplitGraph &sg, Split *resp, NodeVector *nodes = NULL, Node *node = NULL, Node *dad = NULL);
/**
+ * Initialize the hash stable splitBranchMap which contain mapping from split to branch
+ * @param resp (internal) set of taxa below node
+ * @param node the starting node, NULL to start from the root
+ * @param dad dad of the node, used to direct the search
+ */
+ void initializeSplitMap(Split *resp = NULL, Node *node = NULL, Node *dad = NULL);
+
+ /**
+ * Generate a split for each neighbor node
+ */
+ void buildNodeSplit(Split *resp = NULL, Node *node = NULL, Node *dad = NULL);
+
+ /**
+ * Get split graph based on split stored in nodes
+ */
+ void getSplits(SplitGraph &splits, Node* node = NULL, Node* dad = NULL);
+
+ /**
+ * Update the Split-Branch map with the new split defined by a branch
+ * @param node1 one end of the branch
+ * @param node2 the other end
+ */
+ //void updateSplitMap(Node* node1, Node* node2);
+
+ /**
* Generate a split defined by branch node1-node2
* @param node1 one end of the branch
* @param node2 one end of the branch
- * @return a pointer to the split (the new split is allocated dynamically)
+ * @return the split
*/
Split* getSplit(Node* node1, Node* node2);
/**
+ * Slow version of getSplit, which traverses the tree to get the splits
+ */
+ Split* _getSplit(Node* node1, Node* node2);
+
+ /**
* Check whehter the tree contains all splits in \a splits
* @param splits list of splits to check
* @return true or false
@@ -636,7 +714,7 @@ public:
/**
number of leaves
*/
- int leafNum;
+ unsigned int leafNum;
/**
total number of nodes in the tree
@@ -667,6 +745,11 @@ public:
double len_scale;
/**
+ * Pointer to the global params
+ */
+ Params* params;
+
+ /**
release the nemory.
@param node the starting node, NULL to start from the root
@param dad dad of the node, used to direct the search
@@ -675,7 +758,17 @@ public:
void setExtendedFigChar();
+ /** set pointer of params variable */
+ virtual void setParams(Params* params) {
+ this->params = params;
+ };
+
protected:
+ /**
+ * Hash stable mapping a split into branch.
+ * This data structure is generated when genSplitMap() is called.
+ */
+ unordered_map<Split*, Branch, hashfunc_Split> splitBranchMap;
/**
line number of the input file, used to output errors in input file
diff --git a/mtreeset.cpp b/mtreeset.cpp
index fd997da..f02f06c 100644
--- a/mtreeset.cpp
+++ b/mtreeset.cpp
@@ -112,6 +112,7 @@ void MTreeSet::init(StrVector &treels, bool &is_rooted) {
//for (i = 0; i < trees_id.size(); i++) ok_trees[trees_id[i]] = 1;
for (StrVector::iterator it = treels.begin(); it != treels.end(); it++)
+ if (!it->empty())
{
count++;
MTree *tree = newTree();
diff --git a/ngs.cpp b/ngs.cpp
index 6b22e1c..7fb98c9 100644
--- a/ngs.cpp
+++ b/ngs.cpp
@@ -410,7 +410,7 @@ NGSTree::NGSTree(Params ¶ms, NGSAlignment *alignment) {
model_factory = NULL;
optimize_by_newton = params.optimize_by_newton;
//tree.sse = params.SSE;
- setLikelihoodKernel(LK_EIGEN);
+ setLikelihoodKernel(LK_EIGEN, params.num_threads);
}
double NGSTree::computeLikelihood(double *pattern_lh) {
diff --git a/node.cpp b/node.cpp
index 65cd70f..8a89887 100644
--- a/node.cpp
+++ b/node.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
diff --git a/node.h b/node.h
index b4e6500..fbb6604 100644
--- a/node.h
+++ b/node.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -33,8 +35,8 @@
//#include <time.h>
#include <math.h>
#include "ncl/ncl.h"
-
#include "tools.h"
+#include "split.h"
using namespace std;
@@ -46,6 +48,7 @@ class Node;
Neighbor list of a node in the tree
*/
class Neighbor {
+
public:
/**
@@ -64,6 +67,11 @@ public:
int id;
/**
+ * The set of taxa underneath the neighbor
+ */
+ Split* split;
+
+ /**
construct class with a node and length
@param anode the other end of the branch
@param alength length of branch
@@ -72,6 +80,7 @@ public:
node = anode;
length = alength;
id = -1;
+ split = NULL;
}
/**
@@ -84,6 +93,7 @@ public:
node = anode;
length = alength;
id = aid;
+ split = NULL;
}
/**
@@ -94,6 +104,7 @@ public:
node = nei->node;
length = nei->length;
id = nei->id;
+ split = NULL;
}
/**
@@ -113,6 +124,9 @@ typedef vector<Neighbor*> NeighborVec;
*/
typedef vector<Node*> NodeVector;
+typedef pair<Node*, Node*> Branch;
+typedef map<int, Branch> Branches;
+
/*--------------------------------------------------------------*/
/*--------------------------------------------------------------*/
diff --git a/optimization.cpp b/optimization.cpp
index 92a8093..fc64af9 100644
--- a/optimization.cpp
+++ b/optimization.cpp
@@ -38,6 +38,7 @@ void fixBound(double x[], double lower[], double upper[], int n);
psum[n]=sum;}
+/*
#define IA 16807
#define IM 2147483647
#define AM (1.0/IM)
@@ -84,7 +85,7 @@ double ran1(long *idum) {
#undef NDIV
#undef EPS
#undef RNMX
-
+*/
long idum = 123456;
double tt;
@@ -456,7 +457,7 @@ double Optimization::minimizeNewton(double x1, double xguess, double x2, double
}
if (f < 0.0)
xl=rts;
- else
+ else if (f > 0.0)
xh=rts;
}
nrerror("Maximum number of iterations exceeded in minimizeNewton");
@@ -477,9 +478,9 @@ double Optimization::minimizeNewton(double x1, double xguess, double x2, double
#define ALF 1.0e-4
#define TOLX 1.0e-7
-static double maxarg1,maxarg2;
-#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
- (maxarg1) : (maxarg2))
+//static double maxarg1,maxarg2;
+//#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
+// (maxarg1) : (maxarg2))
void Optimization::lnsrch(int n, double xold[], double fold, double g[], double p[], double x[],
double *f, double stpmax, int *check, double lower[], double upper[]) {
@@ -496,7 +497,7 @@ void Optimization::lnsrch(int n, double xold[], double fold, double g[], double
slope += g[i]*p[i];
test=0.0;
for (i=1;i<=n;i++) {
- temp=fabs(p[i])/FMAX(fabs(xold[i]),1.0);
+ temp=fabs(p[i])/max(fabs(xold[i]),1.0);
if (temp > test) test=temp;
}
alamin=TOLX/test;
@@ -546,7 +547,7 @@ void Optimization::lnsrch(int n, double xold[], double fold, double g[], double
alam2=alam;
f2 = *f;
fold2=fold;
- alam=FMAX(tmplam,0.1*alam);
+ alam=max(tmplam,0.1*alam);
first_time = false;
}
}
@@ -608,8 +609,8 @@ double Optimization::minimizeMultiDimen(double guess[], int ndim, double lower[]
#define ITMAX 200
-static double sqrarg;
-#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
+//static double sqrarg;
+#define SQR(a) ((a)*(a))
#define EPS 3.0e-8
#define TOLX (4*EPS)
#define STPMX 100.0
@@ -641,7 +642,7 @@ void Optimization::dfpmin(double p[], int n, double lower[], double upper[], dou
//checkBound(p, xi, lower, upper, n);
//checkDirection(p, xi);
- stpmax=STPMX*FMAX(sqrt(sum),(double)n);
+ stpmax=STPMX*max(sqrt(sum),(double)n);
for (its=1;its<=ITMAX;its++) {
*iter=its;
lnsrch(n,p,fp,g,xi,pnew,fret,stpmax,&check, lower, upper);
@@ -652,7 +653,7 @@ void Optimization::dfpmin(double p[], int n, double lower[], double upper[], dou
}
test=0.0;
for (i=1;i<=n;i++) {
- temp=fabs(xi[i])/FMAX(fabs(p[i]),1.0);
+ temp=fabs(xi[i])/max(fabs(p[i]),1.0);
if (temp > test) test=temp;
}
if (test < TOLX) {
@@ -662,9 +663,9 @@ void Optimization::dfpmin(double p[], int n, double lower[], double upper[], dou
for (i=1;i<=n;i++) dg[i]=g[i];
derivativeFunk(p,g);
test=0.0;
- den=FMAX(fabs(*fret),1.0); // fix bug found by Tung, as also suggested by NR author
+ den=max(fabs(*fret),1.0); // fix bug found by Tung, as also suggested by NR author
for (i=1;i<=n;i++) {
- temp=fabs(g[i])*FMAX(fabs(p[i]),1.0)/den;
+ temp=fabs(g[i])*max(fabs(p[i]),1.0)/den;
if (temp > test) test=temp;
}
if (test < gtol) {
@@ -703,7 +704,7 @@ void Optimization::dfpmin(double p[], int n, double lower[], double upper[], dou
//checkDirection(p, xi);
//if (*iter > 200) cout << "iteration=" << *iter << endl;
}
- // BQM: TODO disable this message!
+ // BQM: disable this message!
//nrerror("too many iterations in dfpmin");
FREEALL
}
diff --git a/pda.cpp b/pda.cpp
index 13df75c..639e22f 100644
--- a/pda.cpp
+++ b/pda.cpp
@@ -65,7 +65,12 @@
#include "timeutil.h"
//#include <unistd.h>
#include <stdlib.h>
-#include "vectorclass/vectorclass.h"
+#include "vectorclass/instrset.h"
+
+#include "MPIHelper.h"
+#ifdef _IQTREE_MPI
+#include <mpi.h>
+#endif
#ifdef _OPENMP
#include <omp.h>
@@ -200,6 +205,9 @@ inline void separator(ostream &out, int type = 0) {
void printCopyright(ostream &out) {
#ifdef IQ_TREE
out << "IQ-TREE";
+ #ifdef _IQTREE_MPI
+ out << " MPI";
+ #endif
#ifdef _OPENMP
out << " multicore";
#endif
@@ -227,7 +235,7 @@ void printCopyright(ostream &out) {
#endif
#ifdef IQ_TREE
- out << endl << "Copyright (c) 2011-2015 Nguyen Lam Tung, Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
+ out << endl << "Copyright (c) 2011-2016 Nguyen Lam Tung, Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
#else
out << endl << "Copyright (c) 2006-2014 Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
#endif
@@ -1536,7 +1544,7 @@ void branchStats(Params ¶ms){
/***** Following added by BQM to print internal branch lengths */
NodeVector nodes1, nodes2;
- mytree.getAllInnerBranches(nodes1, nodes2);
+ mytree.generateNNIBraches(nodes1, nodes2);
output = params.out_prefix;
output += ".inlen";
try {
@@ -1727,7 +1735,7 @@ protected:
};
outstreambuf* outstreambuf::open( const char* name, ios::openmode mode) {
- if (!(Params::getInstance().suppress_output_flags & OUT_LOG)) {
+ if (!(Params::getInstance().suppress_output_flags & OUT_LOG) && MPIHelper::getInstance().isMaster()) {
fout.open(name, mode);
if (!fout.is_open()) {
cout << "Could not open " << name << " for logging" << endl;
@@ -1737,7 +1745,6 @@ outstreambuf* outstreambuf::open( const char* name, ios::openmode mode) {
}
cout_buf = cout.rdbuf();
cout.rdbuf(this);
- cerr.rdbuf(this);
return this;
}
@@ -1752,18 +1759,22 @@ outstreambuf* outstreambuf::close() {
}
int outstreambuf::overflow( int c) { // used for output buffer only
- if (verbose_mode >= VB_MIN)
+ if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED)
if (cout_buf->sputc(c) == EOF) return EOF;
if (Params::getInstance().suppress_output_flags & OUT_LOG)
return c;
+ if (!MPIHelper::getInstance().isMaster())
+ return c;
if (fout_buf->sputc(c) == EOF) return EOF;
return c;
}
+
+
int outstreambuf::sync() { // used for output buffer only
- if (verbose_mode >= VB_MIN)
+ if ((verbose_mode >= VB_MIN && MPIHelper::getInstance().isMaster()) || verbose_mode >= VB_MED)
cout_buf->pubsync();
- if (Params::getInstance().suppress_output_flags & OUT_LOG)
+ if ((Params::getInstance().suppress_output_flags & OUT_LOG) || !MPIHelper::getInstance().isMaster())
return 0;
return fout_buf->pubsync();
}
@@ -1773,6 +1784,7 @@ public:
void init(streambuf *fout_buf) {
this->fout_buf = fout_buf;
cerr_buf = cerr.rdbuf();
+ cerr.rdbuf(this);
}
~errstreambuf() {
@@ -1785,7 +1797,7 @@ protected:
virtual int overflow( int c = EOF) {
if (cerr_buf->sputc(c) == EOF) return EOF;
- if (Params::getInstance().suppress_output_flags & OUT_LOG)
+ if ((Params::getInstance().suppress_output_flags & OUT_LOG))
return c;
if (fout_buf->sputc(c) == EOF) return EOF;
return c;
@@ -1801,13 +1813,14 @@ protected:
-
+/*********************************************************************************
+ * GLOBAL VARIABLES
+ *********************************************************************************/
outstreambuf _out_buf;
errstreambuf _err_buf;
string _log_file;
int _exit_wait_optn = FALSE;
-
extern "C" void startLogFile(bool append_log) {
if (append_log)
_out_buf.open(_log_file.c_str(), ios::app);
@@ -1848,6 +1861,9 @@ extern "C" void funcAbort(int signal_number)
case SIGFPE: cerr << "ERRONEOUS NUMERIC"; break;
case SIGILL: cerr << "ILLEGAL INSTRUCTION"; break;
case SIGSEGV: cerr << "SEGMENTATION FAULT"; break;
+#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__
+ case SIGBUS: cerr << "BUS ERROR"; break;
+#endif
}
cerr << endl;
cerr << "*** For bug report please send to developers:" << endl << "*** Log file: " << _log_file;
@@ -2179,57 +2195,69 @@ Instruction set ID reported by vectorclass::instrset_detect
*/
int instruction_set;
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
+#ifdef _IQTREE_MPI
+ double time_initial, time_current;
+ int n_tasks, task_id;
+ if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
+ outError("MPI initialization failed!");
+ }
+ MPI_Comm_size(MPI_COMM_WORLD, &n_tasks);
+ MPI_Comm_rank(MPI_COMM_WORLD, &task_id);
+ MPIHelper::getInstance().setNumProcesses(n_tasks);
+ MPIHelper::getInstance().setProcessID(task_id);
+ MPIHelper::getInstance().setNumTreeReceived(0);
+ MPIHelper::getInstance().setNumTreeSent(0);
+ MPIHelper::getInstance().setNumNNISearch(0);
+#endif
/*************************/
{ /* local scope */
- int found=FALSE; /* "click" found in cmd name? */
+ int found = FALSE; /* "click" found in cmd name? */
int n, dummyint;
char *tmpstr;
- int intargc;
- char **intargv;
- intargc = 0;
- intargv = NULL;
-
- for (n = strlen(argv[0]) - 5;
- (n >= 0) && !found && (argv[0][n] != '/')
- && (argv[0][n] != '\\'); n--) {
+ int intargc;
+ char **intargv;
+ intargc = 0;
+ intargv = NULL;
+
+ for (n = strlen(argv[0]) - 5;
+ (n >= 0) && !found && (argv[0][n] != '/')
+ && (argv[0][n] != '\\'); n--) {
tmpstr = &(argv[0][n]);
dummyint = 0;
- (void)sscanf(tmpstr, "click%n", &dummyint);
+ (void) sscanf(tmpstr, "click%n", &dummyint);
if (dummyint == 5) found = TRUE;
else {
dummyint = 0;
- (void)sscanf(tmpstr, "CLICK%n", &dummyint);
+ (void) sscanf(tmpstr, "CLICK%n", &dummyint);
if (dummyint == 5) found = TRUE;
else {
dummyint = 0;
- (void)sscanf(tmpstr, "Click%n", &dummyint);
+ (void) sscanf(tmpstr, "Click%n", &dummyint);
if (dummyint == 5) found = TRUE;
}
}
}
- if(found) _exit_wait_optn = TRUE;
+ if (found) _exit_wait_optn = TRUE;
if (_exit_wait_optn) { // get commandline parameters from keyboard
- getintargv(&intargc, &intargv);
+ getintargv(&intargc, &intargv);
fprintf(stdout, "\n\n");
- if(intargc > 1) { // if there were option entered, use them as argc/argv
- argc = intargc;
- argv = intargv;
- }
+ if (intargc > 1) { // if there were option entered, use them as argc/argv
+ argc = intargc;
+ argv = intargv;
+ }
}
} /* local scope */
/*************************/
- //Params params;
parseArg(argc, argv, Params::getInstance());
// 2015-12-05
Checkpoint *checkpoint = new Checkpoint;
- string filename = (string)Params::getInstance().out_prefix + ".ckp.gz";
+ string filename = (string)Params::getInstance().out_prefix +".ckp.gz";
checkpoint->setFileName(filename);
bool append_log = false;
@@ -2255,27 +2283,53 @@ int main(int argc, char *argv[])
}
}
- _log_file = Params::getInstance().out_prefix;
- _log_file += ".log";
- startLogFile(append_log);
+ // after loading, workers are not allowed to write checkpoint anymore
+ if (MPIHelper::getInstance().isWorker())
+ checkpoint->setFileName("");
+
+ _log_file = Params::getInstance().out_prefix;
+ _log_file += ".log";
+ startLogFile(append_log);
+ time_t start_time;
if (append_log) {
cout << endl << "******************************************************"
<< endl << "CHECKPOINT: Resuming analysis from " << filename << endl << endl;
}
+#ifdef _IQTREE_MPI
+ cout << "************************************************" << endl;
+ cout << "* START TREE SEARCH USING MPI WITH " << MPIHelper::getInstance().getNumProcesses() << " PROCESSES *" << endl;
+ cout << "************************************************" << endl;
+ unsigned int rndSeed;
+ if (MPIHelper::getInstance().isMaster()) {
+ rndSeed = Params::getInstance().ran_seed;
+ cout << "Random seed of master = " << rndSeed << endl;
+ }
+ // Broadcast random seed
+ MPI_Bcast(&rndSeed, 1, MPI_INT, PROC_MASTER, MPI_COMM_WORLD);
+ if (MPIHelper::getInstance().isWorker()) {
+// Params::getInstance().ran_seed = rndSeed + task_id * 100000;
+ Params::getInstance().ran_seed = rndSeed;
+// printf("Process %d: random_seed = %d\n", task_id, Params::getInstance().ran_seed);
+ }
+#endif
atexit(funcExit);
signal(SIGABRT, &funcAbort);
signal(SIGFPE, &funcAbort);
signal(SIGILL, &funcAbort);
signal(SIGSEGV, &funcAbort);
+#if !defined WIN32 && !defined _WIN32 && !defined __WIN32__
+ signal(SIGBUS, &funcAbort);
+#endif
printCopyright(cout);
+
/*
- double x=1e-100;
- double y=1e-101;
- if (x > y) cout << "ok!" << endl;
- else cout << "shit!" << endl;
- */
+ double x=1e-100;
+ double y=1e-101;
+ if (x > y) cout << "ok!" << endl;
+ else cout << "shit!" << endl;
+ */
//FILE *pfile = popen("hostname","r");
char hostname[100];
#if defined WIN32 || defined _WIN32 || defined __WIN32__
@@ -2295,10 +2349,10 @@ int main(int argc, char *argv[])
#endif
if (instruction_set < 3) outError("Your CPU does not support SSE3!");
bool has_fma3 = (instruction_set >= 7) && hasFMA3();
- bool has_fma4 = (instruction_set >= 7) && hasFMA4();
+// bool has_fma4 = (instruction_set >= 7) && hasFMA4();
#ifdef __FMA__
- bool has_fma = has_fma3 || has_fma4;
+ bool has_fma = has_fma3;
if (!has_fma) {
outError("Your CPU does not support FMA instruction, quiting now...");
}
@@ -2306,16 +2360,19 @@ int main(int argc, char *argv[])
cout << "Host: " << hostname << " (";
switch (instruction_set) {
+ case 0: cout << "80386, "; break;
+ case 1: cout << "SSE, "; break;
+ case 2: cout << "SSE2, "; break;
case 3: cout << "SSE3, "; break;
case 4: cout << "SSSE3, "; break;
case 5: cout << "SSE4.1, "; break;
case 6: cout << "SSE4.2, "; break;
case 7: cout << "AVX, "; break;
case 8: cout << "AVX2, "; break;
- default: cout << "AVX512F, "; break;
+ default: cout << "AVX512, "; break;
}
if (has_fma3) cout << "FMA3, ";
- if (has_fma4) cout << "FMA4, ";
+// if (has_fma4) cout << "FMA4, ";
//#if defined __APPLE__ || defined __MACH__
cout << (int)(((getMemorySize()/1024.0)/1024)/1024) << " GB RAM)" << endl;
//#else
@@ -2330,16 +2387,20 @@ int main(int argc, char *argv[])
checkpoint->get("iqtree.seed", Params::getInstance().ran_seed);
cout << "Seed: " << Params::getInstance().ran_seed << " ";
- init_random(Params::getInstance().ran_seed, true);
+ init_random(Params::getInstance().ran_seed + MPIHelper::getInstance().getProcessID(), true);
- time_t start_time;
time(&start_time);
cout << "Time: " << ctime(&start_time);
- if (Params::getInstance().lk_no_avx)
+ if (Params::getInstance().lk_no_avx == 1)
instruction_set = min(instruction_set, 6);
cout << "Kernel: ";
+
+ if (Params::getInstance().lk_safe_scaling) {
+ cout << "Safe ";
+ }
+
if (Params::getInstance().pll) {
#ifdef __AVX__
cout << "PLL-AVX";
@@ -2347,10 +2408,13 @@ int main(int argc, char *argv[])
cout << "PLL-SSE3";
#endif
} else {
+ bool has_fma = (has_fma3) && (instruction_set >= 7) && Params::getInstance().lk_no_avx != 2;
switch (Params::getInstance().SSE) {
case LK_EIGEN: cout << "No SSE"; break;
case LK_EIGEN_SSE:
- if (instruction_set >= 7) {
+ if (has_fma) {
+ cout << "AVX+FMA";
+ } else if (instruction_set >= 7) {
cout << "AVX";
} else {
cout << "SSE3";
@@ -2363,18 +2427,23 @@ int main(int argc, char *argv[])
}
}
-
-
#ifdef _OPENMP
- if (Params::getInstance().num_threads == 0) {
+ if (Params::getInstance().num_threads < 0) {
cout << endl << endl;
- outError("Please specify the number of cores to use (-nt option)!");
+ outError("Please specify number of cores via -nt option. Use '-nt AUTO' to automatically determine the best number of cores");
}
- if (Params::getInstance().num_threads) omp_set_num_threads(Params::getInstance().num_threads);
+ if (Params::getInstance().num_threads >= 1) {
+ omp_set_num_threads(Params::getInstance().num_threads);
+ Params::getInstance().num_threads = omp_get_max_threads();
+ }
// int max_threads = omp_get_max_threads();
- Params::getInstance().num_threads = omp_get_max_threads();
int max_procs = countPhysicalCPUCores();
- cout << " - " << Params::getInstance().num_threads << " threads (" << max_procs << " CPU cores detected)";
+ cout << " - ";
+ if (Params::getInstance().num_threads > 0)
+ cout << Params::getInstance().num_threads << " threads";
+ else
+ cout << "auto-detect";
+ cout << "(" << max_procs << " CPU cores detected)";
if (Params::getInstance().num_threads > max_procs) {
cout << endl;
outError("You have specified more threads than CPU cores available");
@@ -2386,10 +2455,12 @@ int main(int argc, char *argv[])
outError("Number of threads must be 1 for sequential version.");
}
int num_procs = countPhysicalCPUCores();
+#ifndef _IQTREE_MPI
if (num_procs > 1) {
cout << endl << endl << "NOTE: Consider using the multicore version because your CPU has " << num_procs << " cores!";
}
#endif
+#endif
//cout << "sizeof(int)=" << sizeof(int) << endl;
cout << endl << endl;
@@ -2434,6 +2505,13 @@ int main(int argc, char *argv[])
CKP_SAVE(version);
checkpoint->endStruct();
+ if (MPIHelper::getInstance().getNumProcesses() > 1) {
+ if (Params::getInstance().aln_file || Params::getInstance().partition_file) {
+ runPhyloAnalysis(Params::getInstance(), checkpoint);
+ } else {
+ outError("Please use one MPI process! The feature you wanted does not need parallelization.");
+ }
+ } else
// call the main function
if (Params::getInstance().tree_gen != NONE) {
generateRandomTree(Params::getInstance());
@@ -2529,10 +2607,14 @@ int main(int argc, char *argv[])
}
}
- delete checkpoint;
time(&start_time);
cout << "Date and Time: " << ctime(&start_time);
+ delete checkpoint;
finish_random();
+
+#ifdef _IQTREE_MPI
+ MPI_Finalize();
+#endif
return EXIT_SUCCESS;
}
diff --git a/pdtree.cpp b/pdtree.cpp
index bc92ff4..bfd9d3a 100644
--- a/pdtree.cpp
+++ b/pdtree.cpp
@@ -20,7 +20,6 @@
#include "ncl/ncl.h"
#include "tools.h"
#include "pdtree.h"
-#include "msetsblock.h"
#include "myreader.h"
/*********************************************
@@ -163,7 +162,7 @@ void PDTree::readParams(Params ¶ms) {
// now convert the weights
LeafMapName lsn;
buildLeafMapName(lsn);
- tax_weight.resize(ntaxa, 0);
+ tax_weight.resize((unsigned long) ntaxa, 0);
for (int i = 0; i < tax_name.size(); i++) {
LeafMapName::iterator nameit = lsn.find(tax_name[i]);
if (nameit == lsn.end())
@@ -224,7 +223,7 @@ void PDTree::computePD(Params ¶ms, vector<PDTaxaSet> &taxa_set, PDRelatedMea
//sets->Report(cout);
- taxa_set.resize(sets->getNSets());
+ taxa_set.resize((unsigned long) sets->getNSets());
vector<PDTaxaSet>::iterator it_ts;
TaxaSetNameVector::iterator i;
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index 2eaabe6..7017887 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2009 by BUI Quang Minh *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -52,6 +54,7 @@
#include "model/modelset.h"
#include "timeutil.h"
#include "upperbounds.h"
+#include "MPIHelper.h"
void reportReferences(Params ¶ms, ofstream &out, string &original_model) {
@@ -576,9 +579,9 @@ void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) {
else
cout << " Tree used for model selection: " << params.out_prefix << ".treefile" << endl;
}
- if (params.snni && params.write_local_optimal_trees) {
- cout << " Locally optimal trees (" << tree.candidateTrees.getNumLocalOptTrees() << "): " << params.out_prefix << ".suboptimal_trees" << endl;
- }
+// if (params.snni && params.write_local_optimal_trees) {
+// cout << " Locally optimal trees (" << tree.candidateTrees.getNumLocalOptTrees() << "): " << params.out_prefix << ".suboptimal_trees" << endl;
+// }
}
if (!params.user_file && params.start_tree == STT_BIONJ) {
cout << " BIONJ tree: " << params.out_prefix << ".bionj"
@@ -618,14 +621,29 @@ void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) {
cout << " Site log-likelihoods: " << params.out_prefix << ".sitelh"
<< endl;
+ if (params.print_partition_lh)
+ cout << " Partition log-likelihoods: " << params.out_prefix << ".partlh"
+ << endl;
+
if (params.print_site_prob)
cout << " Site probability per rate/mix: " << params.out_prefix << ".siteprob"
<< endl;
+ if (params.print_ancestral_sequence) {
+ cout << " Ancestral state probabilities: " << params.out_prefix << ".ancestralprob" << endl;
+ cout << " Ancestral sequences: " << params.out_prefix << ".ancestralseq" << endl;
+ }
+
if (params.write_intermediate_trees)
cout << " All intermediate trees: " << params.out_prefix << ".treels"
<< endl;
+ if (params.writeDistImdTrees) {
+ tree.intermediateTrees.printTrees(string("ditrees"));
+ cout << " Distinct intermediate trees: " << params.out_prefix << ".ditrees" << endl;
+ cout << " Logl of intermediate trees: " << params.out_prefix << ".ditrees_lh" << endl;
+ }
+
if (params.gbo_replicates) {
cout << endl << "Ultrafast bootstrap approximation results written to:" << endl
<< " Split support values: " << params.out_prefix << ".splits.nex" << endl
@@ -641,9 +659,6 @@ void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) {
if (params.print_tree_lh) {
cout << " Tree log-likelihoods: " << params.out_prefix << ".treelh" << endl;
}
- if (params.print_site_lh) {
- cout << " Site log-likelihoods: " << params.out_prefix << ".sitelh" << endl;
- }
}
if (params.lmap_num_quartets >= 0) {
cout << " Likelihood mapping plot (SVG): " << params.out_prefix << ".lmap.svg" << endl;
@@ -660,7 +675,9 @@ void printOutfilesInfo(Params ¶ms, string &original_model, IQTree &tree) {
void reportPhyloAnalysis(Params ¶ms, string &original_model,
IQTree &tree, vector<ModelInfo> &model_info) {
-
+ if (!MPIHelper::getInstance().isMaster()) {
+ return;
+ }
if (params.suppress_output_flags & OUT_IQTREE) {
printOutfilesInfo(params, original_model, tree);
return;
@@ -890,12 +907,12 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model,
tree.setRootNode(params.root);
if (params.gbo_replicates) {
- if (tree.boot_consense_logl > tree.candidateTrees.getBestScore() + 0.1) {
+ if (tree.boot_consense_logl > tree.getBestScore() + 0.1) {
out << endl << "**NOTE**: Consensus tree has higher likelihood than ML tree found! Please use consensus tree below." << endl;
}
}
- reportTree(out, params, tree, tree.candidateTrees.getBestScore(), tree.logl_variance, true);
+ reportTree(out, params, tree, tree.getBestScore(), tree.logl_variance, true);
if (tree.isSuperTree() && verbose_mode >= VB_MED) {
PhyloSuperTree *stree = (PhyloSuperTree*) &tree;
@@ -944,9 +961,7 @@ void reportPhyloAnalysis(Params ¶ms, string &original_model,
string con_file = params.out_prefix;
con_file += ".contree";
- IntVector rfdist;
- tree.computeRFDist(con_file.c_str(), rfdist);
- out << endl << "Robinson-Foulds distance between ML tree and consensus tree: " << rfdist[0] << endl;
+ out << endl << "Robinson-Foulds distance between ML tree and consensus tree: " << params.contree_rfdist << endl;
out << endl << "Branches with bootstrap support >"
<< floor(params.split_threshold * 1000) / 10 << "% are kept";
@@ -1301,6 +1316,8 @@ void initializeParams(Params ¶ms, IQTree &iqtree, vector<ModelInfo> &model_i
bool test_only = params.model_name.find("ONLY") != string::npos;
/* initialize substitution model */
if (params.model_name.substr(0, 4) == "TEST") {
+ if (MPIHelper::getInstance().getNumProcesses() > 1)
+ outError("Please use only 1 MPI process! We are currently working on the MPI parallelization of model selection.");
// TODO: check if necessary
// if (iqtree.isSuperTree())
// ((PhyloSuperTree*) &iqtree)->mapTrees();
@@ -1341,7 +1358,7 @@ void initializeParams(Params ¶ms, IQTree &iqtree, vector<ModelInfo> &model_i
fmodel.precision(4);
fmodel << fixed;
- params.model_name = testModel(params, &iqtree, model_info, fmodel, models_block, "", true);
+ params.model_name = testModel(params, &iqtree, model_info, fmodel, models_block, params.num_threads, "", true);
fmodel.close();
params.startCPUTime = start_cpu_time;
params.start_real_time = start_real_time;
@@ -1402,8 +1419,8 @@ void pruneTaxa(Params ¶ms, IQTree &iqtree, double *pattern_lh, NodeVector &p
iqtree.clearAllPartialLH();
iqtree.setCurScore(iqtree.optimizeAllBranches());
//cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl;
- int nni_count, nni_steps;
- iqtree.setCurScore(iqtree.optimizeNNI(nni_count, nni_steps));
+// pair<int, int> nniInfo = iqtree.optimizeNNI();
+ iqtree.optimizeNNI();
cout << "Log-likelihood after optimizing partial tree: "
<< iqtree.getCurScore() << endl;
}
@@ -1420,8 +1437,8 @@ void restoreTaxa(IQTree &iqtree, double *saved_dist_mat, NodeVector &pruned_taxa
iqtree.clearAllPartialLH();
iqtree.setCurScore(iqtree.optimizeAllBranches());
//cout << "Log-likelihood after reoptimizing model parameters: " << tree.curScore << endl;
- int nni_count, nni_steps;
- iqtree.setCurScore(iqtree.optimizeNNI(nni_count, nni_steps));
+ pair<int, int> nniInfo;
+ nniInfo = iqtree.optimizeNNI();
cout << "Log-likelihood after reoptimizing full tree: " << iqtree.getCurScore() << endl;
//iqtree.setBestScore(iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, true, params.model_eps));
@@ -1512,10 +1529,23 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) {
printSiteLhCategory(site_lh_file.c_str(), &iqtree, params.print_site_lh);
}
+ if (params.print_partition_lh && !iqtree.isSuperTree()) {
+ outWarning("-wpl does not work with non-partition model");
+ params.print_partition_lh = false;
+ }
+ if (params.print_partition_lh && !params.pll) {
+ string part_lh_file = (string)params.out_prefix + ".partlh";
+ printPartitionLh(part_lh_file.c_str(), &iqtree, pattern_lh);
+ }
+
if (params.print_site_prob && !params.pll) {
printSiteProbCategory(((string)params.out_prefix + ".siteprob").c_str(), &iqtree, params.print_site_prob);
}
+ if (params.print_ancestral_sequence) {
+ printAncestralSequences(params.out_prefix, &iqtree, params.print_ancestral_sequence);
+ }
+
if (params.print_site_state_freq != WSF_NONE) {
string site_freq_file = params.out_prefix;
site_freq_file += ".sitesf";
@@ -1587,7 +1617,7 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) {
cout << endl << "Computing site-specific rates by "
<< rate_mvh->full_name << "..." << endl;
rate_mvh->runIterativeProc(params, iqtree);
- cout << endl << "BEST SCORE FOUND : " << iqtree.candidateTrees.getBestScore()<< endl;
+ cout << endl << "BEST SCORE FOUND : " << iqtree.getBestScore()<< endl;
string mhrate_file = params.out_prefix;
mhrate_file += ".mhrate";
iqtree.getRate()->writeSiteRates(mhrate_file.c_str());
@@ -1661,8 +1691,7 @@ void printFinalSearchInfo(Params ¶ms, IQTree &iqtree, double search_cpu_time
}
-void printSuboptimalTrees(IQTree& iqtree, Params& params, string suffix) {
- vector<string> trees = iqtree.candidateTrees.getTopTrees();
+void printTrees(vector<string> trees, Params ¶ms, string suffix) {
ofstream treesOut((string(params.out_prefix) + suffix).c_str(),
ofstream::out);
for (vector<string>::iterator it = trees.begin(); it != trees.end(); it++) {
@@ -1717,12 +1746,7 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
/********************** Create an initial tree **********************/
iqtree.computeInitialTree(dist_file, params.SSE);
-
- //*** FOR TUNG: This is wrong! a NULL root was already treated correctly
-// if (params.root == NULL) {
-// params.root = iqtree.aln->getSeqName(0).c_str();
-// iqtree.setRootNode(params.root);
-// }
+
iqtree.setRootNode(params.root);
/*************** SET UP PARAMETERS and model testing ****************/
@@ -1744,12 +1768,14 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
delete models_block;
// UpperBounds analysis. Here, to analyse the initial tree without any tree search or optimization
+ /*
if (params.upper_bound) {
iqtree.setCurScore(iqtree.computeLikelihood());
cout<<iqtree.getCurScore()<<endl;
UpperBounds(¶ms, iqtree.aln, &iqtree);
exit(0);
}
+ */
// degree of freedom
cout << endl;
@@ -1760,40 +1786,66 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
}
if (!params.pll) {
- uint64_t mem_size = iqtree.getMemoryRequired();
uint64_t total_mem = getMemorySize();
- if (mem_size >= total_mem) {
- if (params.lh_mem_save == LM_DETECT) {
- // switch to memory saving technique that reduces memory requirement to 1/3
- params.lh_mem_save = LM_PER_NODE;
- mem_size = iqtree.getMemoryRequired();
+ if (params.lh_mem_save == LM_MEM_SAVE && params.max_mem_size > total_mem)
+ params.max_mem_size = total_mem;
+
+ uint64_t mem_required = iqtree.getMemoryRequired();
+
+ if (mem_required >= total_mem*0.95 && !iqtree.isSuperTree()) {
+ // switch to memory saving mode
+ if (params.lh_mem_save != LM_MEM_SAVE) {
+ params.max_mem_size = (total_mem*0.95)/mem_required;
+ params.lh_mem_save = LM_MEM_SAVE;
+ mem_required = iqtree.getMemoryRequired();
+ cout << "NOTE: Switching to memory saving mode using " << (mem_required / 1073741824.0) << " GB ("
+ << (mem_required*100/total_mem) << "% of normal mode)" << endl;
+ cout << "NOTE: Use -mem option if you want to restrict RAM usage further" << endl;
}
+ if (mem_required >= total_mem) {
+ params.lh_mem_save = LM_MEM_SAVE;
+ params.max_mem_size = 0.0;
+ mem_required = iqtree.getMemoryRequired();
+ }
+ }
+ if (mem_required >= total_mem) {
+ cerr << "ERROR: Your RAM is below minimum requirement of " << (mem_required / 1073741824.0) << " GB RAM" << endl;
+ outError("Memory saving mode cannot work, switch to another computer!!!");
}
+
//#if defined __APPLE__ || defined __MACH__
- cout << "NOTE: " << (mem_size / 1024) / 1024 << " MB RAM is required!" << endl;
+ cout << "NOTE: " << (mem_required / 1048576) << " MB RAM (" << (mem_required / 1073741824) << " GB) is required!" << endl;
//#else
// cout << "NOTE: " << ((double) mem_size / 1000.0) / 1000 << " MB RAM is required!" << endl;
//#endif
- if (mem_size >= total_mem) {
- outError("Memory required exceeds your computer RAM size!");
- }
+ if (params.memCheck)
+ exit(0);
#ifdef BINARY32
- if (mem_size >= 2000000000) {
+ if (mem_required >= 2000000000) {
outError("Memory required exceeds 2GB limit of 32-bit executable");
}
#endif
int max_procs = countPhysicalCPUCores();
- if (mem_size * max_procs > total_mem * params.num_threads) {
- outWarning("Memory required per CPU-core (" + convertDoubleToString((double)mem_size/params.num_threads/1024/1024/1024)+
+ if (mem_required * max_procs > total_mem * iqtree.num_threads && iqtree.num_threads > 0) {
+ outWarning("Memory required per CPU-core (" + convertDoubleToString((double)mem_required/iqtree.num_threads/1024/1024/1024)+
" GB) is higher than your computer RAM per CPU-core ("+convertIntToString(total_mem/max_procs/1024/1024/1024)+
" GB), thus multiple runs may exceed RAM!");
}
}
+
+#ifdef _OPENMP
+ if (iqtree.num_threads <= 0) {
+ int bestThreads = iqtree.testNumThreads();
+ omp_set_num_threads(bestThreads);
+ params.num_threads = bestThreads;
+ }
+#endif
+
+
iqtree.initializeAllPartialLh();
- double initEpsilon = params.min_iterations == 0 ? params.modeps : (params.modeps*10);
+ double initEpsilon = params.min_iterations == 0 ? params.modelEps : (params.modelEps*10);
- string initTree;
if (iqtree.getRate()->name.find("+I+G") != string::npos) {
if (params.alpha_invar_file != NULL) { // COMPUTE TREE LIKELIHOOD BASED ON THE INPUT ALPHA AND P_INVAR VALUE
@@ -1809,12 +1861,14 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
}
// Optimize model parameters and branch lengths using ML for the initial tree
+ string initTree;
iqtree.clearAllPartialLH();
+
iqtree.getModelFactory()->restoreCheckpoint();
if (iqtree.getCheckpoint()->getBool("finishedModelInit")) {
// model optimization already done: ignore this step
if (!iqtree.candidateTrees.empty())
- iqtree.readTreeString(iqtree.candidateTrees.getTopTrees(1)[0]);
+ iqtree.readTreeString(iqtree.getBestTrees()[0]);
iqtree.setCurScore(iqtree.computeLikelihood());
initTree = iqtree.getTreeString();
cout << "CHECKPOINT: Model parameters restored, LogL: " << iqtree.getCurScore() << endl;
@@ -1838,7 +1892,9 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
cout << "Likelihood mapping needed " << getRealTime()-lkmap_time << " seconds" << endl << endl;
}
- bool finishedCandidateSet = iqtree.getCheckpoint()->getBool("finishedCandidateSet");
+ // TODO: why is this variable not used?
+ // ANSWER: moved to doTreeSearch
+// bool finishedCandidateSet = iqtree.getCheckpoint()->getBool("finishedCandidateSet");
bool finishedInitTree = iqtree.getCheckpoint()->getBool("finishedInitTree");
// now overwrite with random tree
@@ -1854,92 +1910,64 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
/****************** NOW PERFORM MAXIMUM LIKELIHOOD TREE RECONSTRUCTION ******************/
// Update best tree
- if (!finishedInitTree)
- iqtree.candidateTrees.update(initTree, iqtree.getCurScore());
-
- if (params.min_iterations > 0) {
- if (!iqtree.isBifurcating())
- outError("Tree search does not work with initial multifurcating tree. Please specify `-n 0` to avoid this.");
- cout << "--------------------------------------------------------------------" << endl;
- cout << "| INITIALIZING CANDIDATE TREE SET |" << endl;
- cout << "--------------------------------------------------------------------" << endl;
+ if (!finishedInitTree) {
+ iqtree.addTreeToCandidateSet(initTree, iqtree.getCurScore(), false, MPIHelper::getInstance().getProcessID());
+ iqtree.printResultTree();
+ iqtree.intermediateTrees.update(iqtree.getTreeString(), iqtree.getCurScore());
}
+ if (params.min_iterations && !iqtree.isBifurcating())
+ outError("Tree search does not work with initial multifurcating tree. Please specify `-n 0` to avoid this.");
+
// Compute maximum likelihood distance
// ML distance is only needed for IQP
// if ( params.start_tree != STT_BIONJ && ((params.snni && !params.iqp) || params.min_iterations == 0)) {
// params.compute_ml_dist = false;
// }
- if (params.min_iterations <= 1 && params.start_tree != STT_BIONJ)
+ if ((params.min_iterations <= 1 || params.numInitTrees <= 1) && params.start_tree != STT_BIONJ)
params.compute_ml_dist = false;
if ((params.user_file || params.start_tree == STT_RANDOM_TREE) && params.snni && !params.iqp) {
params.compute_ml_dist = false;
}
-// if ( params.user_file && params.min_iterations == 0) {
-// params.compute_ml_dist = false;
-// }
- if (!finishedInitTree && ((!params.dist_file && params.compute_ml_dist) || params.leastSquareBranch)) {
- computeMLDist(params, iqtree, dist_file, getCPUTime());
- if (!params.user_file && params.start_tree != STT_RANDOM_TREE) {
- // NEW 2015-08-10: always compute BIONJ tree into the candidate set
- iqtree.resetCurScore();
- double start_bionj = getRealTime();
- iqtree.computeBioNJ(params, iqtree.aln, dist_file);
- cout << getRealTime() - start_bionj << " seconds" << endl;
- if (iqtree.isSuperTree())
- iqtree.wrapperFixNegativeBranch(true);
- else
- iqtree.wrapperFixNegativeBranch(false);
- if (params.start_tree == STT_BIONJ) {
- initTree = iqtree.optimizeModelParameters(params.min_iterations==0, initEpsilon);
- } else {
- initTree = iqtree.optimizeBranches();
+ if (params.constraint_tree_file)
+ params.compute_ml_dist = false;
+
+ //Generate BIONJ tree
+ if (MPIHelper::getInstance().isMaster() && !iqtree.getCheckpoint()->getBool("finishedCandidateSet")) {
+ if (!finishedInitTree && ((!params.dist_file && params.compute_ml_dist) || params.leastSquareBranch)) {
+ computeMLDist(params, iqtree, dist_file, getCPUTime());
+ if (!params.user_file && params.start_tree != STT_RANDOM_TREE) {
+ // NEW 2015-08-10: always compute BIONJ tree into the candidate set
+ iqtree.resetCurScore();
+ double start_bionj = getRealTime();
+ iqtree.computeBioNJ(params, iqtree.aln, dist_file);
+ cout << getRealTime() - start_bionj << " seconds" << endl;
+ if (iqtree.isSuperTree())
+ iqtree.wrapperFixNegativeBranch(true);
+ else
+ iqtree.wrapperFixNegativeBranch(false);
+ if (params.start_tree == STT_BIONJ) {
+ initTree = iqtree.optimizeModelParameters(params.min_iterations==0, initEpsilon);
+ } else {
+ initTree = iqtree.optimizeBranches();
+ }
+ cout << "Log-likelihood of BIONJ tree: " << iqtree.getCurScore() << endl;
+ iqtree.candidateTrees.update(initTree, iqtree.getCurScore());
}
- cout << "Log-likelihood of BIONJ tree: " << iqtree.getCurScore() << endl;
- iqtree.candidateTrees.update(initTree, iqtree.getCurScore());
}
}
-
+
// iqtree.saveCheckpoint();
double cputime_search_start = getCPUTime();
double realtime_search_start = getRealTime();
- if (params.min_iterations > 0 && !finishedCandidateSet) {
- double initTime = getCPUTime();
-
-// if (!params.user_file && (params.start_tree == STT_PARSIMONY || params.start_tree == STT_PLL_PARSIMONY))
-// {
- iqtree.initCandidateTreeSet(params.numInitTrees - iqtree.candidateTrees.size(), params.numNNITrees);
- assert(iqtree.candidateTrees.size() != 0);
- cout << "Finish initializing candidate tree set. ";
- cout << "Number of distinct locally optimal trees: " << iqtree.candidateTrees.size() << endl;
- if (params.write_local_optimal_trees) {
- printSuboptimalTrees(iqtree, params, ".init_suboptimal_trees");
- }
-// }
- cout << "Current best tree score: " << iqtree.candidateTrees.getBestScore() << " / CPU time: "
- << getCPUTime() - initTime << endl;
- }
-
- if (finishedCandidateSet) {
- cout << "CHECKPOINT: Candidate tree set restored, best LogL: " << iqtree.candidateTrees.getBestScore() << endl;
- } else {
- iqtree.saveCheckpoint();
- iqtree.getCheckpoint()->putBool("finishedCandidateSet", true);
- iqtree.getCheckpoint()->dump(true);
- }
-
if (params.leastSquareNNI) {
iqtree.computeSubtreeDists();
}
- /* TUNG: what happens if params.root is not set? This is usually the case.
- * I added code to ininialize the root above.
- */
- //iqtree.setRootNode(params.root); // Important for NNI below
-
+
if (original_model == "WHTEST") {
cout << endl << "Testing model homogeneity by Weiss & von Haeseler (2003)..." << endl;
WHTest(params, iqtree);
@@ -1955,8 +1983,8 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
// prune stable taxa
pruneTaxa(params, iqtree, pattern_lh, pruned_taxa, linked_name);
- if (params.min_iterations > 1) {
- iqtree.readTreeString(iqtree.candidateTrees.getTopTrees()[0]);
+ /***************************************** DO STOCHASTIC TREE SEARCH *******************************************/
+ if (params.min_iterations > 0 && !params.tree_spr) {
iqtree.doTreeSearch();
iqtree.setAlignment(iqtree.aln);
cout << "TREE SEARCH COMPLETED AFTER " << iqtree.stop_rule.getCurIt() << " ITERATIONS"
@@ -1986,14 +2014,19 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
// if (iqtree.isSuperTree())
// ((PhyloSuperTree*) &iqtree)->mapTrees();
+ if (!MPIHelper::getInstance().isMaster()) {
+ delete[] pattern_lh;
+ return;
+ }
+
if (params.snni && params.min_iterations && verbose_mode >= VB_MED) {
- cout << "Log-likelihoods of best " << params.popSize << " trees: " << endl;
- iqtree.printBestScores(params.popSize);
+ cout << "Log-likelihoods of " << params.popSize << " best candidate trees: " << endl;
+ iqtree.printBestScores();
cout << endl;
}
if (params.min_iterations) {
- iqtree.readTreeString(iqtree.candidateTrees.getBestTrees()[0]);
+ iqtree.readTreeString(iqtree.getBestTrees()[0]);
iqtree.initializeAllPartialLh();
iqtree.clearAllPartialLH();
cout << "--------------------------------------------------------------------" << endl;
@@ -2006,8 +2039,12 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
} else {
cout << "Performs final model parameters optimization" << endl;
string tree;
+ Params::getInstance().fixStableSplits = false;
+ Params::getInstance().tabu = false;
+ // why doing NNI search here?
+// iqtree.doNNISearch();
tree = iqtree.optimizeModelParameters(true);
- iqtree.candidateTrees.update(tree, iqtree.getCurScore(), true);
+ iqtree.addTreeToCandidateSet(tree, iqtree.getCurScore(), false, MPIHelper::getInstance().getProcessID());
iqtree.getCheckpoint()->putBool("finishedModelFinal", true);
iqtree.saveCheckpoint();
}
@@ -2019,8 +2056,8 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
cout << "BEST SCORE FOUND : " << iqtree.getCurScore() << endl;
- if (params.write_local_optimal_trees) {
- printSuboptimalTrees(iqtree, params, ".suboptimal_trees");
+ if (params.write_candidate_trees) {
+ printTrees(iqtree.getBestTrees(), params, ".imd_trees");
}
if (params.pll)
@@ -2077,17 +2114,17 @@ void runTreeReconstruction(Params ¶ms, string &original_model, IQTree &iqtre
// BUG FIX: readTreeString(bestTreeString) not needed before this line
iqtree.printResultTree();
- if(params.upper_bound_NNI){
- string out_file_UB = params.out_prefix;
- out_file_UB += ".UB.NNI.main";
- ofstream out_UB;
- out_UB.exceptions(ios::failbit | ios::badbit);
- out_UB.open((char*)out_file_UB.c_str(),std::ofstream::out | std::ofstream::app);
- out_UB<<iqtree.leafNum<<"\t"<<iqtree.aln->getNSite()<<"\t"<<iqtree.params->upper_bound_frac<<"\t"
- <<iqtree.skippedNNIub<<"\t"<< iqtree.totalNNIub<<"\t"<<iqtree.candidateTrees.getBestScore() <<endl;
- //iqtree.minUB << "\t" << iqtree.meanUB/iqtree.skippedNNIub << "\t" << iqtree.maxUB << endl;
- out_UB.close();
- }
+ if (params.upper_bound_NNI) {
+ string out_file_UB = params.out_prefix;
+ out_file_UB += ".UB.NNI.main";
+ ofstream out_UB;
+ out_UB.exceptions(ios::failbit | ios::badbit);
+ out_UB.open((char *) out_file_UB.c_str(), std::ofstream::out | std::ofstream::app);
+ out_UB << iqtree.leafNum << "\t" << iqtree.aln->getNSite() << "\t" << iqtree.params->upper_bound_frac << "\t"
+ << iqtree.skippedNNIub << "\t" << iqtree.totalNNIub << "\t" << iqtree.getBestScore() << endl;
+ //iqtree.minUB << "\t" << iqtree.meanUB/iqtree.skippedNNIub << "\t" << iqtree.maxUB << endl;
+ out_UB.close();
+ }
if (params.out_file)
iqtree.printTree(params.out_file);
@@ -2152,7 +2189,7 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
if (Params::getInstance().randomAlpha) {
while (initAlphas.size() < 10) {
double initAlpha = random_double();
- initAlphas.push_back(initAlpha + MIN_GAMMA_SHAPE*2);
+ initAlphas.push_back(initAlpha + iqtree.params->min_gamma_shape*2);
}
} else {
initAlphas.assign(values, values+10);
@@ -2326,6 +2363,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
}
double start_time = getCPUTime();
+ double start_real_time = getRealTime();
@@ -2351,7 +2389,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
finish_random();
randstream = saved_randstream;
- if (params.print_tree_lh) {
+ if (params.print_tree_lh && MPIHelper::getInstance().isMaster()) {
double prob;
bootstrap_alignment->multinomialProb(*alignment, prob);
ofstream boot_lh;
@@ -2371,7 +2409,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
}
} else
boot_tree = new IQTree(bootstrap_alignment);
- if (params.print_bootaln)
+ if (params.print_bootaln && MPIHelper::getInstance().isMaster())
bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
// set checkpoint
@@ -2392,6 +2430,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
// outError(ERR_READ_INPUT, treefile_name);
// }
// write the tree into .boottrees file
+ if (MPIHelper::getInstance().isMaster())
try {
ofstream tree_out;
tree_out.exceptions(ios::failbit | ios::badbit);
@@ -2429,7 +2468,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
}
- if (params.consensus_type == CT_CONSENSUS_TREE) {
+ if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) {
cout << endl << "===> COMPUTE CONSENSUS TREE FROM "
<< params.num_bootstrap_samples << " BOOTSTRAP TREES" << endl << endl;
@@ -2447,6 +2486,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
runTreeReconstruction(params, original_model, *tree, *model_info);
+ if (MPIHelper::getInstance().isMaster()) {
cout << endl << "===> ASSIGN BOOTSTRAP SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl;
MExtTree ext_tree;
assignBootstrapSupport(boottrees_name.c_str(), 0, 1e6,
@@ -2454,7 +2494,8 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
params.out_prefix, ext_tree, NULL, ¶ms);
tree->copyTree(&ext_tree);
reportPhyloAnalysis(params, original_model, *tree, *model_info);
- } else if (params.consensus_type == CT_CONSENSUS_TREE) {
+ }
+ } else if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) {
int mi = params.min_iterations;
STOP_CONDITION sc = params.stop_condition;
params.min_iterations = 0;
@@ -2467,7 +2508,9 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
} else
cout << endl;
- cout << "Total CPU time for bootstrap: " << (getCPUTime() - start_time) << " seconds." << endl << endl;
+ if (MPIHelper::getInstance().isMaster()) {
+ cout << "Total CPU time for bootstrap: " << (getCPUTime() - start_time) << " seconds." << endl;
+ cout << "Total wall-clock time for bootstrap: " << (getRealTime() - start_real_time) << " seconds." << endl << endl;
cout << "Non-parametric bootstrap results written to:" << endl;
if (params.print_bootaln)
cout << " Bootstrap alignments: " << params.out_prefix << ".bootaln" << endl;
@@ -2475,7 +2518,7 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
if (params.consensus_type == CT_CONSENSUS_TREE)
cout << " Consensus tree: " << params.out_prefix << ".contree" << endl;
cout << endl;
-
+ }
delete model_info;
}
@@ -2535,7 +2578,7 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) {
delete models_block;
tree->setModel(tree->getModelFactory()->model);
tree->setRate(tree->getModelFactory()->site_rate);
- tree->setLikelihoodKernel(params.SSE);
+ tree->setLikelihoodKernel(params.SSE, params.num_threads);
if (!tree->getModel()->isMixture())
outError("No mixture model was specified!");
@@ -2551,8 +2594,15 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) {
}
#endif
+#ifdef _OPENMP
+ if (tree->num_threads <= 0) {
+ int bestThreads = tree->testNumThreads();
+ omp_set_num_threads(bestThreads);
+ }
+#endif
+
tree->initializeAllPartialLh();
- tree->getModelFactory()->optimizeParameters(params.fixed_branch_length, true, params.modeps);
+ tree->getModelFactory()->optimizeParameters(params.fixed_branch_length, true, params.modelEps);
size_t nptn = alignment->getNPattern(), nstates = alignment->num_states;
double *ptn_state_freq = new double[nptn*nstates];
@@ -2580,7 +2630,7 @@ void computeSiteFrequencyModel(Params ¶ms, Alignment *alignment) {
void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) {
Alignment *alignment;
IQTree *tree;
-
+
checkpoint->putBool("finished", false);
checkpoint->setDumpInterval(params.checkpoint_dump_interval);
@@ -2647,6 +2697,16 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) {
alignment->concatenateAlignment(&aln);
}
+ if (params.constraint_tree_file) {
+ cout << "Reading constraint tree " << params.constraint_tree_file << "..." << endl;
+ tree->constraintTree.initConstraint(params.constraint_tree_file, alignment->getSeqNames());
+ if (params.start_tree == STT_PLL_PARSIMONY)
+ params.start_tree = STT_PARSIMONY;
+ else if (params.start_tree == STT_BIONJ)
+ outError("Constraint tree does not work with -t BIONJ");
+
+ }
+
if (params.compute_seq_identity_along_tree) {
if (!params.user_file)
outError("Please supply a user tree file!");
@@ -2679,18 +2739,31 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) {
// runBootLhTest(params, alignment, *tree);
outError("Obsolete feature");
} else if (params.num_bootstrap_samples == 0) {
- // the main Maximum likelihood tree reconstruction
+ /********************************************************************************
+ THE MAIN MAXIMUM LIKELIHOOD TREE RECONSTRUCTION
+ ********************************************************************************/
vector<ModelInfo> *model_info = new vector<ModelInfo>;
alignment->checkGappySeq(params.remove_empty_seq);
// remove identical sequences
if (params.ignore_identical_seqs) {
tree->removeIdenticalSeqs(params);
+ if (tree->removed_seqs.size() > 0 && MPIHelper::getInstance().isMaster() && (params.suppress_output_flags & OUT_UNIQUESEQ) == 0) {
+ string filename = (string)params.out_prefix + ".uniqueseq.phy";
+ if (tree->isSuperTree())
+ ((SuperAlignment*)tree->aln)->printCombinedAlignment(filename.c_str());
+ else
+ tree->aln->printPhylip(filename.c_str());
+ cout << endl << "For your convenience alignment with unique sequences printed to " << filename << endl;
+ }
}
alignment = NULL; // from now on use tree->aln instead
// call main tree reconstruction
runTreeReconstruction(params, original_model, *tree, *model_info);
+
+ if (MPIHelper::getInstance().isMaster()) {
+
if (params.gbo_replicates && params.online_bootstrap) {
if (params.print_ufboot_trees)
tree->writeUFBootTrees(params);
@@ -2704,15 +2777,12 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) {
string current_tree = tree->getTreeString();
splitsfile = params.out_prefix;
splitsfile += ".contree";
+
+ IntVector rfdist;
+ tree->computeRFDist(splitsfile.c_str(), rfdist);
+ params.contree_rfdist = rfdist[0];
+
tree->readTreeFile(splitsfile);
- // bug fix
-// if ((tree->sse == LK_EIGEN || tree->sse == LK_EIGEN_SSE) && !tree->isBifurcating()) {
-// cout << "NOTE: Changing to old kernel as consensus tree is multifurcating" << endl;
-// if (tree->sse == LK_EIGEN)
-// tree->changeLikelihoodKernel(LK_NORMAL);
-// else
-// tree->changeLikelihoodKernel(LK_SSE);
-// }
tree->initializeAllPartialLh();
tree->fixNegativeBranch(true);
@@ -2725,13 +2795,20 @@ void runPhyloAnalysis(Params ¶ms, Checkpoint *checkpoint) {
// revert the best tree
tree->readTreeString(current_tree);
}
+ if (Params::getInstance().writeDistImdTrees) {
+ cout << endl;
+ cout << "Recomputing the log-likelihood of the intermediate trees ... " << endl;
+ tree->intermediateTrees.recomputeLoglOfAllTrees(*tree);
+ }
+ reportPhyloAnalysis(params, original_model, *tree, *model_info);
+ }
+
// reinsert identical sequences
if (tree->removed_seqs.size() > 0) {
// BUG FIX: dont use reinsertIdenticalSeqs anymore
tree->insertTaxa(tree->removed_seqs, tree->twin_seqs);
tree->printResultTree();
}
- reportPhyloAnalysis(params, original_model, *tree, *model_info);
delete model_info;
} else {
// the classical non-parameter bootstrap (SBS)
diff --git a/phyloanalysis.h b/phyloanalysis.h
index afcfede..2fd64c8 100644
--- a/phyloanalysis.h
+++ b/phyloanalysis.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2009 by BUI Quang Minh *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
diff --git a/phylokernel.h b/phylokernel.h
index e0b9f7c..8c9ab66 100644
--- a/phylokernel.h
+++ b/phylokernel.h
@@ -9,9 +9,11 @@
#define PHYLOKERNEL_H_
#include "phylotree.h"
-#include "vectorclass/vectorclass.h"
-#include "vectorclass/vectormath_exp.h"
+//#include "vectorclass/vectorclass.h"
+//#include "vectorclass/vectormath_exp.h"
+#include "superalignment.h"
+#ifdef __SSE2__
inline Vec2d horizontal_add(Vec2d x[2]) {
#if INSTRSET >= 3 // SSE3
return _mm_hadd_pd(x[0],x[1]);
@@ -20,7 +22,7 @@ inline Vec2d horizontal_add(Vec2d x[2]) {
Vec2d help1 = _mm_shuffle_pd(x[0], x[1], _MM_SHUFFLE2(1,1));
return _mm_add_pd(help0, help1);
#else
-#error "You must compile with SSE3 enabled!"
+#error "You must compile with SSE2 enabled!"
#endif
}
@@ -29,6 +31,7 @@ inline double horizontal_max(Vec2d const &a) {
a.store(x);
return max(x[0],x[1]);
}
+#endif
#ifdef __AVX__
@@ -56,10 +59,10 @@ inline double horizontal_max(Vec4d const &a) {
#endif // __AVX__
-template <class Numeric, class VectorClass, const int VCSIZE>
+template <class Numeric, class VectorClass>
Numeric PhyloTree::dotProductSIMD(Numeric *x, Numeric *y, int size) {
VectorClass res = VectorClass().load_a(x) * VectorClass().load_a(y);
- for (int i = VCSIZE; i < size; i += VCSIZE)
+ for (int i = VectorClass::size(); i < size; i += VectorClass::size())
res = mul_add(VectorClass().load_a(&x[i]), VectorClass().load_a(&y[i]), res);
return horizontal_add(res);
}
@@ -70,16 +73,10 @@ Numeric PhyloTree::dotProductSIMD(Numeric *x, Numeric *y, int size) {
*
*************************************************************************************************/
-
+/*
template <class VectorClass, const int VCSIZE, const int nstates>
void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- if (dad_branch->node->degree() > 3) {
- // TODO: SIMD version for multifurcating node
- computePartialLikelihoodEigen(dad_branch, dad);
- return;
- }
-
// don't recompute the likelihood
assert(dad);
if (dad_branch->partial_lh_computed & 1)
@@ -101,33 +98,38 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
size_t ptn, c;
- size_t orig_ntn = aln->size();
+ size_t orig_nptn = aln->size();
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
assert(nstates == aln->num_states && nstates >= VCSIZE && VCSIZE == VectorClass().size());
assert(model->isReversible()); // only works with reversible model!
const size_t nstatesqr=nstates*nstates;
size_t i, x, j;
- size_t block = nstates * ncat;
+ size_t block = nstates * ncat_mix;
+ size_t tip_block = nstates * model->getNMixtures();
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = m*nstatesqr;
+ }
// internal node
- assert(node->degree() == 3); // it works only for strictly bifurcating tree
+ dad_branch->lh_scale_factor = 0.0;
PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
+ int num_leaves = 0;
FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *nei = (PhyloNeighbor*)*it;
if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
+ if ((nei->partial_lh_computed & 1) == 0)
+ computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(nei, node);
+ dad_branch->lh_scale_factor += nei->lh_scale_factor;
+ if ((*it)->node->isLeaf()) num_leaves++;
}
- if (!left->node->isLeaf() && right->node->isLeaf()) {
- // swap left and right
- PhyloNeighbor *tmp = left;
- left = right;
- right = tmp;
- }
- if ((left->partial_lh_computed & 1) == 0)
- computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(left, node);
- if ((right->partial_lh_computed & 1) == 0)
- computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(right, node);
-
if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
// re-orient partial_lh
bool done = false;
@@ -149,103 +151,209 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
double *evec = model->getEigenvectors();
double *inv_evec = model->getInverseEigenvectors();
- VectorClass vc_inv_evec[nstates*nstates/VCSIZE];
assert(inv_evec && evec);
- for (i = 0; i < nstates; i++) {
- for (x = 0; x < nstates/VCSIZE; x++)
- // inv_evec is not aligned!
- vc_inv_evec[i*nstates/VCSIZE+x].load_a(&inv_evec[i*nstates+x*VCSIZE]);
- }
+// for (i = 0; i < tip_block; i++) {
+// for (x = 0; x < nstates/VCSIZE; x++)
+// // inv_evec is not aligned!
+// vc_inv_evec[i*nstates/VCSIZE+x].load_a(&inv_evec[i*nstates+x*VCSIZE]);
+// }
double *eval = model->getEigenvalues();
- dad_branch->lh_scale_factor = left->lh_scale_factor + right->lh_scale_factor;
- VectorClass *eleft = (VectorClass*)aligned_alloc<double>(block*nstates);
- VectorClass *eright = (VectorClass*)aligned_alloc<double>(block*nstates);
+ VectorClass *echildren = aligned_alloc<VectorClass>(block*nstates/VCSIZE*(node->degree()-1));
+ double *partial_lh_leaves = NULL;
+ if (num_leaves > 0)
+ partial_lh_leaves = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block*num_leaves);
+ VectorClass *echild = echildren;
+ double *partial_lh_leaf = partial_lh_leaves;
+
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ VectorClass expchild[nstates/VCSIZE];
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ VectorClass *echild_ptr = echild;
+ // precompute information buffer
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass len_child = site_rate->getRate(c%ncat) * child->length;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double *evec_ptr = evec + mix_addr[c];
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ // eval is not aligned!
+ expchild[i] = exp(VectorClass().load_a(&eval_ptr[i*VCSIZE]) * len_child);
+ }
+ for (x = 0; x < nstates; x++) {
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ // evec is not be aligned!
+ echild_ptr[i] = (VectorClass().load_a(&evec_ptr[x*nstates+i*VCSIZE]) * expchild[i]);
+ }
+ echild_ptr += nstates/VCSIZE;
+ }
+ }
- // precompute information buffer
- for (c = 0; c < ncat; c++) {
- VectorClass vc_evec;
- VectorClass expleft[nstates/VCSIZE];
- VectorClass expright[nstates/VCSIZE];
- double len_left = site_rate->getRate(c) * left->length;
- double len_right = site_rate->getRate(c) * right->length;
- for (i = 0; i < nstates/VCSIZE; i++) {
- // eval is not aligned!
- expleft[i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * VectorClass(len_left));
- expright[i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * VectorClass(len_right));
- }
- for (x = 0; x < nstates; x++)
- for (i = 0; i < nstates/VCSIZE; i++) {
- // evec is not be aligned!
- vc_evec.load_a(&evec[x*nstates+i*VCSIZE]);
- eleft[c*nstatesqr/VCSIZE+x*nstates/VCSIZE+i] = (vc_evec * expleft[i]);
- eright[c*nstatesqr/VCSIZE+x*nstates/VCSIZE+i] = (vc_evec * expright[i]);
- }
+ // pre compute information for tip
+ if (child->node->isLeaf()) {
+ vector<int>::iterator it;
+ for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+ int state = (*it);
+ double *this_partial_lh_leaf = partial_lh_leaf + state*block;
+ VectorClass *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *this_tip_partial_lh = (VectorClass*)(tip_partial_lh + state*tip_block + mix_addr_nstates[c]);
+ for (x = 0; x < nstates; x++) {
+ VectorClass vchild = 0.0;
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ vchild += echild_ptr[i] * this_tip_partial_lh[i];
+ }
+ this_partial_lh_leaf[x] = horizontal_add(vchild);
+ echild_ptr += nstates/VCSIZE;
+ }
+ this_partial_lh_leaf += nstates;
+ }
+ }
+ size_t addr = aln->STATE_UNKNOWN * block;
+ for (x = 0; x < block; x++) {
+ partial_lh_leaf[addr+x] = 1.0;
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ }
+ echild += block*nstates/VCSIZE;
+ }
+
+ VectorClass *eleft = echildren, *eright = echildren + block*nstates/VCSIZE;
+
+ if (!left->node->isLeaf() && right->node->isLeaf()) {
+ PhyloNeighbor *tmp = left;
+ left = right;
+ right = tmp;
+ VectorClass *etmp = eleft;
+ eleft = eright;
+ eright = etmp;
}
+
+
+ if (node->degree() > 3) {
- if (left->node->isLeaf() && right->node->isLeaf()) {
+ //--------------------- multifurcating node ------------------//
+ double sum_scale = 0.0;
+ // now for-loop computing partial_lh over all site-patterns
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
+#endif
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double partial_lh_all[block];
+ for (i = 0; i < block; i++)
+ partial_lh_all[i] = 1.0;
+ dad_branch->scale_num[ptn] = 0;
+
+ double *partial_lh_leaf = partial_lh_leaves;
+ double *echild = (double*)echildren;
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ if (child->node->isLeaf()) {
+ // external node
+ int state_child = (ptn < orig_nptn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
+ double *child_lh = partial_lh_leaf + state_child*block;
+ for (c = 0; c < block; c++) {
+ // compute real partial likelihood vector
+ partial_lh_all[c] *= child_lh[c];
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ } else {
+ // internal node
+ double *partial_lh = partial_lh_all;
+ double *partial_lh_child = child->partial_lh + ptn*block;
+ dad_branch->scale_num[ptn] += child->scale_num[ptn];
+
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ double vchild = 0.0;
+// double *echild_ptr = echild + (c*nstatesqr+x*nstates);
+ for (i = 0; i < nstates; i++) {
+ vchild += echild_ptr[i] * partial_lh_child[i];
+ }
+ echild_ptr += nstates;
+ partial_lh[x] *= vchild;
+ }
+ partial_lh += nstates;
+ partial_lh_child += nstates;
+ }
+ } // if
+ echild += block*nstates;
+ } // FOR_NEIGHBOR
+
+
+ // compute dot-product with inv_eigenvector
+ double lh_max = 0.0;
+ double *partial_lh_tmp = partial_lh_all;
+ double *partial_lh = dad_branch->partial_lh + ptn*block;
+ for (c = 0; c < ncat_mix; c++) {
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
+ for (i = 0; i < nstates; i++) {
+ double res = 0.0;
+ for (x = 0; x < nstates; x++) {
+ res += partial_lh_tmp[x]*inv_evec_ptr[x];
+ }
+ inv_evec_ptr += nstates;
+ partial_lh[i] = res;
+ lh_max = max(lh_max, fabs(res));
+ }
+ partial_lh += nstates;
+ partial_lh_tmp += nstates;
+ }
+ // check if one should scale partial likelihoods
+ if (lh_max < SCALING_THRESHOLD) {
+ partial_lh = dad_branch->partial_lh + ptn*block;
+ if (lh_max == 0.0) {
+ // for very shitty data
+ for (c = 0; c < ncat_mix; c++)
+ memcpy(&partial_lh[c*nstates], &tip_partial_lh[aln->STATE_UNKNOWN*nstates], nstates*sizeof(double));
+ sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
+ //sum_scale += log(lh_max) * ptn_freq[ptn];
+ dad_branch->scale_num[ptn] += 4;
+ int nsite = aln->getNSite();
+ for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
+ if (aln->getPatternID(i) == ptn) {
+ outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
+ x++;
+ }
+ } else if (ptn_invar[ptn] == 0.0) {
+ // now do the likelihood scaling
+ for (i = 0; i < block; i++) {
+ partial_lh[i] *= SCALING_THRESHOLD_INVER;
+ //partial_lh[i] /= lh_max;
+ }
+ // unobserved const pattern will never have underflow
+ sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
+ //sum_scale += log(lh_max) * ptn_freq[ptn];
+ dad_branch->scale_num[ptn] += 1;
+ }
+ }
+
+ } // for ptn
+ dad_branch->lh_scale_factor += sum_scale;
+
+ // end multifurcating treatment
+ } else if (left->node->isLeaf() && right->node->isLeaf()) {
// special treatment for TIP-TIP (cherry) case
// pre compute information for both tips
- double *partial_lh_left = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
- double *partial_lh_right = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
-
- vector<int>::iterator it;
- for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
- int state = (*it);
- VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
- VectorClass vleft[VCSIZE];
- size_t addr = state*nstates;
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_partial_lh_tmp[i].load_a(&tip_partial_lh[addr+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- addr = x*nstates/VCSIZE;
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = eleft[addr+j*nstates/VCSIZE] * vc_partial_lh_tmp[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = mul_add(eleft[addr+j*nstates/VCSIZE+i], vc_partial_lh_tmp[i], vleft[j]);
- }
- horizontal_add(vleft).store_a(&partial_lh_left[state*block+x]);
- }
- }
-
- for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
- int state = (*it);
- VectorClass vright[VCSIZE];
- VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
-
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_partial_lh_tmp[i].load_a(&tip_partial_lh[state*nstates+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- for (j = 0; j < VCSIZE; j++)
- vright[j] = eright[(x+j)*nstates/VCSIZE] * vc_partial_lh_tmp[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vright[j] = mul_add(eright[(x+j)*nstates/VCSIZE+i], vc_partial_lh_tmp[i], vright[j]);
- }
- horizontal_add(vright).store_a(&partial_lh_right[state*block+x]);
- }
- }
-
- size_t addr_unknown = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_left[addr_unknown+x] = 1.0;
- partial_lh_right[addr_unknown+x] = 1.0;
- }
+ double *partial_lh_left = partial_lh_leaves;
+ double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
// assign pointers for left and right partial_lh
- double **lh_left_ptr = aligned_alloc<double*>(nptn);
- double **lh_right_ptr = aligned_alloc<double*>(nptn);
- for (ptn = 0; ptn < orig_ntn; ptn++) {
- lh_left_ptr[ptn] = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
- lh_right_ptr[ptn] = &partial_lh_right[block * (aln->at(ptn))[right->node->id]];
- }
- for (ptn = orig_ntn; ptn < nptn; ptn++) {
- lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
- lh_right_ptr[ptn] = &partial_lh_right[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
- }
+// double **lh_left_ptr = aligned_alloc<double*>(nptn);
+// double **lh_right_ptr = aligned_alloc<double*>(nptn);
+// for (ptn = 0; ptn < orig_ntn; ptn++) {
+// lh_left_ptr[ptn] = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+// lh_right_ptr[ptn] = &partial_lh_right[block * (aln->at(ptn))[right->node->id]];
+// }
+// for (ptn = orig_ntn; ptn < nptn; ptn++) {
+// lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
+// lh_right_ptr[ptn] = &partial_lh_right[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
+// }
// scale number must be ZERO
memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
@@ -258,9 +366,17 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *lh_left = lh_left_ptr[ptn];
- double *lh_right = lh_right_ptr[ptn];
- for (c = 0; c < ncat; c++) {
+ double *lh_left;
+ double *lh_right;
+ if (ptn < orig_nptn) {
+ lh_left = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+ lh_right = &partial_lh_right[block * (aln->at(ptn))[right->node->id]];
+ } else {
+ lh_left = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ lh_right = &partial_lh_right[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ }
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (x = 0; x < nstates/VCSIZE; x++) {
@@ -269,11 +385,11 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++)
for (j = 0; j < VCSIZE; j++) {
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
}
horizontal_add(res).store_a(&partial_lh[i]);
}
@@ -284,52 +400,26 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
}
- aligned_free(lh_left_ptr);
- aligned_free(lh_right_ptr);
- aligned_free(partial_lh_right);
- aligned_free(partial_lh_left);
+ //aligned_free(lh_right_ptr);
+ //aligned_free(lh_left_ptr);
} else if (left->node->isLeaf() && !right->node->isLeaf()) {
// special treatment to TIP-INTERNAL NODE case
// only take scale_num from the right subtree
memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
// pre compute information for left tip
- double *partial_lh_left = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
+ double *partial_lh_left = partial_lh_leaves;
- vector<int>::iterator it;
- for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
- int state = (*it);
- VectorClass vc_tip_lh[nstates/VCSIZE];
- VectorClass vleft[VCSIZE];
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_tip_lh[i].load_a(&tip_partial_lh[state*nstates+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = eleft[(x+j)*nstates/VCSIZE] * vc_tip_lh[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = mul_add(eleft[(x+j)*nstates/VCSIZE+i], vc_tip_lh[i], vleft[j]);
- }
- horizontal_add(vleft).store_a(&partial_lh_left[state*block+x]);
- }
- }
-
- size_t addr_unknown = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_left[addr_unknown+x] = 1.0;
- }
-
// assign pointers for partial_lh_left
- double **lh_left_ptr = aligned_alloc<double*>(nptn);
- for (ptn = 0; ptn < orig_ntn; ptn++) {
- lh_left_ptr[ptn] = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
- }
- for (ptn = orig_ntn; ptn < nptn; ptn++) {
- lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
- }
-
- double sum_scale = 0.0;
+// double **lh_left_ptr = aligned_alloc<double*>(nptn);
+// for (ptn = 0; ptn < orig_ntn; ptn++) {
+// lh_left_ptr[ptn] = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+// }
+// for (ptn = orig_ntn; ptn < nptn; ptn++) {
+// lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
+// }
+ double sum_scale = 0.0;
VectorClass vc_lh_right[nstates/VCSIZE];
VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
VectorClass res[VCSIZE];
@@ -343,9 +433,15 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
double *partial_lh = dad_branch->partial_lh + ptn*block;
double *partial_lh_right = right->partial_lh + ptn*block;
- double *lh_left = lh_left_ptr[ptn];
+ double *lh_left;
+ if (ptn < orig_nptn) {
+ lh_left = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+ } else {
+ lh_left = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ }
vc_max = 0.0;
- for (c = 0; c < ncat; c++) {
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (i = 0; i < nstates/VCSIZE; i++)
vc_lh_right[i].load_a(&partial_lh_right[i*VCSIZE]);
@@ -365,11 +461,11 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
}
}
VectorClass sum_res = horizontal_add(res);
@@ -398,8 +494,7 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
dad_branch->lh_scale_factor += sum_scale;
- aligned_free(lh_left_ptr);
- aligned_free(partial_lh_left);
+ //aligned_free(lh_left_ptr);
} else {
// both left and right are internal node
@@ -421,7 +516,8 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
vc_max = 0.0;
- for (c = 0; c < ncat; c++) {
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (i = 0; i < nstates/VCSIZE; i++) {
vc_lh_left[i].load_a(&partial_lh_left[i*VCSIZE]);
@@ -447,11 +543,11 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++)
for (j = 0; j < VCSIZE; j++)
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
VectorClass sum_res = horizontal_add(res);
sum_res.store_a(&partial_lh[i]);
@@ -482,8 +578,9 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
- aligned_free(eright);
- aligned_free(eleft);
+ if (partial_lh_leaves)
+ aligned_free(partial_lh_leaves);
+ aligned_free(echildren);
}
template <class VectorClass, const int VCSIZE, const int nstates>
@@ -506,14 +603,19 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(node_branch, node);
df = ddf = 0.0;
size_t ncat = site_rate->getNRate();
-
- size_t block = ncat * nstates;
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
size_t maxptn = ((nptn+VCSIZE-1)/VCSIZE)*VCSIZE;
maxptn = max(maxptn, aln->size()+((model_factory->unobserved_ptns.size()+VCSIZE-1)/VCSIZE)*VCSIZE);
+
+ size_t mix_addr_nstates[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
double *eval = model->getEigenvalues();
assert(eval);
@@ -522,11 +624,15 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
VectorClass *vc_val2 = (VectorClass*)aligned_alloc<double>(block);
VectorClass vc_len = dad_branch->length;
- for (c = 0; c < ncat; c++) {
- VectorClass vc_rate = site_rate->getRate(c);
- VectorClass vc_prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ size_t mycat = c%ncat;
+ double *eval_ptr = eval + m*nstates;
+ VectorClass vc_rate = site_rate->getRate(mycat);
+ VectorClass vc_prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
for (i = 0; i < nstates/VCSIZE; i++) {
- VectorClass cof = VectorClass().load_a(&eval[i*VCSIZE]) * vc_rate;
+ VectorClass cof = VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_rate;
VectorClass val = exp(cof*vc_len) * vc_prop;
VectorClass val1_ = cof*val;
vc_val0[c*nstates/VCSIZE+i] = val;
@@ -543,24 +649,20 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
#ifdef _OPENMP
-#pragma omp parallel for private(ptn, i)
+#pragma omp parallel for private(ptn, i, c)
#endif
- for (ptn = 0; ptn < orig_nptn; ptn++) {
+ for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
double *theta = theta_all + ptn*block;
- double *lh_dad = &tip_partial_lh[(aln->at(ptn))[dad->id] * nstates];
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&lh_dad[i%nstates]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
- }
- }
- // ascertainment bias correction
- for (ptn = orig_nptn; ptn < nptn; ptn++) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *theta = theta_all + ptn*block;
- double *lh_dad = &tip_partial_lh[model_factory->unobserved_ptns[ptn-orig_nptn] * nstates];
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&lh_dad[i%nstates]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
- }
+ double *this_tip_partial_lh = tip_partial_lh + tip_block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
+ for (c = 0; c < ncat_mix; c++) {
+ double *lh_dad = this_tip_partial_lh + mix_addr_nstates[c];
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ (VectorClass().load_a(&lh_dad[i]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
+ }
+ partial_lh_dad += nstates;
+ theta += nstates;
+ }
}
} else {
// both dad and node are internal nodes
@@ -754,8 +856,12 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(node_branch, node);
double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ size_t mix_addr_nstates[ncat_mix];
- size_t block = ncat * nstates;
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
@@ -768,13 +874,16 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
VectorClass *vc_val = (VectorClass*)aligned_alloc<double>(block);
- for (c = 0; c < ncat; c++) {
- double len = site_rate->getRate(c)*dad_branch->length;
- VectorClass vc_len(len);
- VectorClass vc_prop(site_rate->getProp(c));
+ for (c = 0; c < ncat_mix; c++) {
+ size_t mycat = c%ncat;
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ VectorClass vc_len(site_rate->getRate(mycat)*dad_branch->length);
+ VectorClass vc_prop(site_rate->getProp(c) * model->getMixtureWeight(m));
for (i = 0; i < nstates/VCSIZE; i++) {
// eval is not aligned!
- vc_val[c*nstates/VCSIZE+i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * vc_len) * vc_prop;
+ vc_val[c*nstates/VCSIZE+i] = exp(VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_len) * vc_prop;
}
}
@@ -782,51 +891,90 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
- VectorClass vc_tip_partial_lh[nstates];
- VectorClass vc_partial_lh_dad[VCSIZE], vc_ptn[VCSIZE];
+
+ // precompute information from one tip
+ double *partial_lh_node = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
+ IntVector states_dad = aln->seq_states[dad->id];
+ states_dad.push_back(aln->STATE_UNKNOWN);
+ for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
+ double *lh_node = partial_lh_node + (*it)*block;
+ double *lh_tip = tip_partial_lh + (*it)*tip_block;
+ VectorClass *vc_val_tmp = vc_val;
+ for (c = 0; c < ncat_mix; c++) {
+ double *this_lh_tip = lh_tip + mix_addr_nstates[c];
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ (vc_val_tmp[i/VCSIZE] * VectorClass().load_a(&this_lh_tip[i])).store_a(&lh_node[i]);
+ }
+ lh_node += nstates;
+ vc_val_tmp += nstates/VCSIZE;
+ }
+ }
+
+
+ //VectorClass vc_tip_partial_lh[nstates];
+ //VectorClass vc_partial_lh_dad[VCSIZE]
+ VectorClass vc_ptn[VCSIZE];
VectorClass lh_final(0.0), vc_freq;
VectorClass lh_ptn; // store likelihoods of VCSIZE consecutive patterns
-
- double **lh_states_dad = aligned_alloc<double*>(maxptn);
- for (ptn = 0; ptn < orig_nptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[(aln->at(ptn))[dad->id] * nstates];
- for (ptn = orig_nptn; ptn < nptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[model_factory->unobserved_ptns[ptn-orig_nptn] * nstates];
- // initialize beyond #patterns for efficiency
- for (ptn = nptn; ptn < maxptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[aln->STATE_UNKNOWN * nstates];
+// double **lh_states_dad = aligned_alloc<double*>(maxptn);
+// for (ptn = 0; ptn < orig_nptn; ptn++)
+// lh_states_dad[ptn] = &tip_partial_lh[(aln->at(ptn))[dad->id] * tip_block];
+// for (ptn = orig_nptn; ptn < nptn; ptn++)
+// lh_states_dad[ptn] = &tip_partial_lh[model_factory->unobserved_ptns[ptn-orig_nptn] * tip_block];
+// // initialize beyond #patterns for efficiency
+// for (ptn = nptn; ptn < maxptn; ptn++)
+// lh_states_dad[ptn] = &tip_partial_lh[aln->STATE_UNKNOWN * tip_block];
+ int *ptn_states_dad = aligned_alloc<int>(maxptn);
+ for (ptn = 0; ptn < orig_nptn; ptn++)
+ ptn_states_dad[ptn] = (aln->at(ptn))[dad->id];
+ for (ptn = orig_nptn; ptn < nptn; ptn++)
+ ptn_states_dad[ptn] = model_factory->unobserved_ptns[ptn-orig_nptn];
+ // initialize beyond #patterns for efficiency
+ for (ptn = nptn; ptn < maxptn; ptn++)
+ ptn_states_dad[ptn] = aln->STATE_UNKNOWN;
// copy dummy values because VectorClass will access beyond nptn
for (ptn = nptn; ptn < maxptn; ptn++)
memcpy(&dad_branch->partial_lh[ptn*block], dad_branch->partial_lh, block*sizeof(double));
#ifdef _OPENMP
-#pragma omp parallel private(ptn, i, j, vc_tip_partial_lh, vc_partial_lh_dad, vc_ptn, vc_freq, lh_ptn)
+#pragma omp parallel private(ptn, i, j, vc_ptn, vc_freq, lh_ptn)
{
VectorClass lh_final_th = 0.0;
#pragma omp for nowait
#endif
// main loop over all patterns with a step size of VCSIZE
for (ptn = 0; ptn < orig_nptn; ptn+=VCSIZE) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+ //double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- // initialize vc_tip_partial_lh
for (j = 0; j < VCSIZE; j++) {
- double *lh_dad = lh_states_dad[ptn+j];
- for (i = 0; i < nstates/VCSIZE; i++) {
- vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load_a(&lh_dad[i*VCSIZE]);
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ int state_dad = ptn_states_dad[ptn+j];
+ double *lh_node = &partial_lh_node[state_dad*block];
+ for (i = 0; i < block; i+=VCSIZE) {
+ vc_ptn[j] = mul_add(VectorClass().load_a(&lh_node[i]),
+ VectorClass().load_a(&partial_lh_dad[i]), vc_ptn[j]);
}
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
- vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
}
- // compute vc_ptn
- for (i = 1; i < block/VCSIZE; i++)
- for (j = 0; j < VCSIZE; j++) {
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
- vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
- vc_partial_lh_dad[j], vc_ptn[j]);
- }
+ // initialize vc_tip_partial_lh
+// for (j = 0; j < VCSIZE; j++) {
+// double *lh_dad = lh_states_dad[ptn+j];
+// for (i = 0; i < nstates/VCSIZE; i++) {
+// vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load_a(&lh_dad[i*VCSIZE]);
+// }
+// vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
+// vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
+// }
+//
+// // compute vc_ptn
+// for (i = 1; i < block/VCSIZE; i++)
+// for (j = 0; j < VCSIZE; j++) {
+// vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
+// vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
+// vc_partial_lh_dad[j], vc_ptn[j]);
+// }
vc_freq.load_a(&ptn_freq[ptn]);
lh_ptn = horizontal_add(vc_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
@@ -869,32 +1017,23 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
// cout << "WARNING: Tree log-likelihood is set to " << tree_lh << endl;
}
- // ascertainment bias correction
if (orig_nptn < nptn) {
lh_final = 0.0;
lh_ptn = 0.0;
for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
- double *partial_lh_dad = &dad_branch->partial_lh[ptn*block];
+// double *partial_lh_dad = &dad_branch->partial_lh[ptn*block];
lh_final += lh_ptn;
-
- // initialize vc_tip_partial_lh
for (j = 0; j < VCSIZE; j++) {
- double *lh_dad = lh_states_dad[ptn+j];
- for (i = 0; i < nstates/VCSIZE; i++) {
- vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load(&lh_dad[i*VCSIZE]); // lh_dad is not aligned!
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ int state_dad = ptn_states_dad[ptn+j];
+ double *lh_node = &partial_lh_node[state_dad*block];
+ for (i = 0; i < block; i+=VCSIZE) {
+ vc_ptn[j] = mul_add(VectorClass().load_a(&lh_node[i]),
+ VectorClass().load_a(&partial_lh_dad[i]), vc_ptn[j]);
}
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
- vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
}
- // compute vc_ptn
- for (i = 1; i < block/VCSIZE; i++)
- for (j = 0; j < VCSIZE; j++) {
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
- vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
- vc_partial_lh_dad[j], vc_ptn[j]);
- }
-
// bugfix 2016-01-21, prob_const can be rescaled
for (j = 0; j < VCSIZE; j++)
if (dad_branch->scale_num[ptn+j] >= 1)
@@ -911,7 +1050,54 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
default: assert(0); break;
}
}
- aligned_free(lh_states_dad);
+ aligned_free(ptn_states_dad);
+ aligned_free(partial_lh_node);
+
+
+
+ // ascertainment bias correction
+// if (orig_nptn < nptn) {
+// lh_final = 0.0;
+// lh_ptn = 0.0;
+// for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
+// double *partial_lh_dad = &dad_branch->partial_lh[ptn*block];
+// lh_final += lh_ptn;
+//
+// // initialize vc_tip_partial_lh
+// for (j = 0; j < VCSIZE; j++) {
+// double *lh_dad = lh_states_dad[ptn+j];
+// for (i = 0; i < nstates/VCSIZE; i++) {
+// vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load(&lh_dad[i*VCSIZE]); // lh_dad is not aligned!
+// }
+// vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
+// vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
+// }
+//
+// // compute vc_ptn
+// for (i = 1; i < block/VCSIZE; i++)
+// for (j = 0; j < VCSIZE; j++) {
+// vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
+// vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
+// vc_partial_lh_dad[j], vc_ptn[j]);
+// }
+//
+// // bugfix 2016-01-21, prob_const can be rescaled
+// for (j = 0; j < VCSIZE; j++)
+// if (dad_branch->scale_num[ptn+j] >= 1)
+// vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+//
+// // ptn_invar[ptn] is not aligned
+// lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
+// }
+// switch ((nptn-orig_nptn)%VCSIZE) {
+// case 0: prob_const = horizontal_add(lh_final+lh_ptn); break;
+// case 1: prob_const = horizontal_add(lh_final)+lh_ptn[0]; break;
+// case 2: prob_const = horizontal_add(lh_final)+lh_ptn[0]+lh_ptn[1]; break;
+// case 3: prob_const = horizontal_add(lh_final)+lh_ptn[0]+lh_ptn[1]+lh_ptn[2]; break;
+// default: assert(0); break;
+// }
+// }
+// aligned_free(lh_states_dad);
} else {
// both dad and node are internal nodes
VectorClass vc_partial_lh_node[VCSIZE];
@@ -1032,7 +1218,10 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
double tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
size_t ncat = site_rate->getNRate();
- size_t block = ncat * nstates;
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
+ size_t block = ncat_mix * nstates;
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
@@ -1044,11 +1233,14 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
VectorClass *vc_val0 = (VectorClass*)aligned_alloc<double>(block);
VectorClass vc_len = current_it->length;
- for (c = 0; c < ncat; c++) {
- VectorClass vc_rate = site_rate->getRate(c);
- VectorClass vc_prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ double *eval_ptr = eval + (m)*nstates;
+ size_t mycat = c%ncat;
+ VectorClass vc_rate = site_rate->getRate(mycat);
+ VectorClass vc_prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
for (i = 0; i < nstates/VCSIZE; i++) {
- VectorClass cof = VectorClass().load_a(&eval[i*VCSIZE]) * vc_rate;
+ VectorClass cof = VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_rate;
VectorClass val = exp(cof*vc_len) * vc_prop;
vc_val0[c*nstates/VCSIZE+i] = val;
}
@@ -1194,7 +1386,7 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
return tree_lh;
}
-
+*/
/****************************************************************************
Highly optimized Parsimony function
****************************************************************************/
@@ -1270,8 +1462,8 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
if (dad_branch->partial_lh_computed & 2)
return;
Node *node = dad_branch->node;
- int nstates = aln->num_states;
- int site;
+ int nstates = aln->getMaxNumStates();
+ int site = 0;
const int VCSIZE = VectorClass::size();
const int NUM_BITS = VectorClass::size() * UINT_BITS;
@@ -1279,130 +1471,144 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
if (node->isLeaf() && dad) {
// external node
+ vector<Alignment*> *partitions = NULL;
+ if (aln->isSuperAlignment())
+ partitions = &((SuperAlignment*)aln)->partitions;
+ else {
+ partitions = new vector<Alignment*>;
+ partitions->push_back(aln);
+ }
if (aln->ordered_pattern.empty())
aln->orderPatternByNumChars();
int leafid = node->id;
int pars_size = getBitsBlockSize();
memset(dad_branch->partial_pars, 0, pars_size*sizeof(UINT));
-// int ptn;
-// int nptn = aln->size();
int ambi_aa[] = {2, 3, 5, 6, 9, 10}; // {4+8, 32+64, 512+1024};
-// int max_sites = ((aln->num_informative_sites+UINT_BITS-1)/UINT_BITS)*UINT_BITS;
-// UINT *x = dad_branch->partial_pars - (nstates*VCSIZE);
UINT *x = dad_branch->partial_pars;
- Alignment::iterator pat;
- switch (aln->seq_type) {
- case SEQ_DNA:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 4) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ int start_pos = 0;
+
+ for (vector<Alignment*>::iterator alnit = partitions->begin(); alnit != partitions->end(); alnit++) {
+ int end_pos = start_pos + (*alnit)->ordered_pattern.size();
+ switch ((*alnit)->seq_type) {
+ case SEQ_DNA:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 4) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ p[0] |= bit1;
+ p[VCSIZE] |= bit1;
+ p[2*VCSIZE] |= bit1;
+ p[3*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- p[0] |= bit1;
- p[VCSIZE] |= bit1;
- p[2*VCSIZE] |= bit1;
- p[3*VCSIZE] |= bit1;
- }
- } else {
- state -= 3;
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ } else {
+ state -= 3;
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT *p = x + ((site/UINT_BITS));
+
+ UINT bit1 = (1 << (site%UINT_BITS));
+ for (int i = 0; i < 4; i++)
+ if (state & (1<<i))
+ p[i*VCSIZE] |= bit1;
}
- UINT *p = x + ((site/UINT_BITS));
-
- UINT bit1 = (1 << (site%UINT_BITS));
- for (int i = 0; i < 4; i++)
- if (state & (1<<i))
- p[i*VCSIZE] |= bit1;
}
}
- }
- break;
- case SEQ_PROTEIN:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 20) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ break;
+ case SEQ_PROTEIN:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 20) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ for (int i = 0; i < 20; i++)
+ p[i*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- for (int i = 0; i < 20; i++)
- p[i*VCSIZE] |= bit1;
- }
- } else {
- assert(state < 23);
- state = (state-20)*2;
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ } else {
+ assert(state < 23);
+ state = (state-20)*2;
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT *p = x + ((site/UINT_BITS));
+ UINT bit1 = (1 << (site%UINT_BITS));
+
+ p[ambi_aa[state]*VCSIZE] |= bit1;
+ p[ambi_aa[state+1]*VCSIZE] |= bit1;
}
- UINT *p = x + ((site/UINT_BITS));
- UINT bit1 = (1 << (site%UINT_BITS));
-
- p[ambi_aa[state]*VCSIZE] |= bit1;
- p[ambi_aa[state+1]*VCSIZE] |= bit1;
}
}
- }
- break;
- default:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < nstates) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += nstates*VCSIZE;
- site = 0;
+ break;
+ default:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < (*alnit)->num_states) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += nstates*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ for (int i = 0; i < (*alnit)->num_states; i++)
+ p[i*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- for (int i = 0; i < nstates; i++)
- p[i*VCSIZE] |= bit1;
+ } else {
+ assert(0);
}
- } else {
- assert(0);
}
- }
- break;
- }
+ break;
+ } // end of switch
+ start_pos = end_pos;
+ } // of end FOR LOOP
+
+ assert(start_pos == aln->ordered_pattern.size());
+// assert(site == aln->num_informative_sites % NUM_BITS);
// add dummy states
if (site > 0 && site < NUM_BITS) {
x += site/UINT_BITS;
@@ -1411,6 +1617,8 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
int max_sites = ((site+UINT_BITS-1)/UINT_BITS);
memset(x, 255, (VCSIZE - max_sites)*sizeof(UINT));
}
+ if (!aln->isSuperAlignment())
+ delete partitions;
} else {
// internal node
assert(node->degree() == 3); // it works only for strictly bifurcating tree
@@ -1433,7 +1641,7 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
#pragma omp parallel for private (site) reduction(+: score) if(nsites>200)
#endif
for (site = 0; site<nsites; site++) {
- size_t offset = 4*VCSIZE*site;
+ size_t offset = entry_size*site;
VectorClass *x = (VectorClass*)(left->partial_pars + offset);
VectorClass *y = (VectorClass*)(right->partial_pars + offset);
VectorClass *z = (VectorClass*)(dad_branch->partial_pars + offset);
@@ -1504,7 +1712,7 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo
if ((node_branch->partial_lh_computed & 2) == 0)
computePartialParsimonyFastSIMD<VectorClass>(node_branch, node);
int site;
- int nstates = aln->num_states;
+ int nstates = aln->getMaxNumStates();
// VectorClass score = 0;
// VectorClass w;
diff --git a/phylokernelavx512.cpp b/phylokernelavx512.cpp
new file mode 100644
index 0000000..3c1e86a
--- /dev/null
+++ b/phylokernelavx512.cpp
@@ -0,0 +1,120 @@
+/*
+ * phylokernelavx512.cpp
+ *
+ * Created on: Sept 25, 2016
+ * Author: minh
+ */
+
+
+#define MAX_VECTOR_SIZE 512 // for VectorClass
+
+#include "vectorclass/vectorclass.h"
+#include "vectorclass/vectormath_exp.h"
+#include "phylokernel.h"
+//#include "phylokernelsafe.h"
+//#include "phylokernelmixture.h"
+//#include "phylokernelmixrate.h"
+//#include "phylokernelsitemodel.h"
+
+#include "phylokernelnew.h"
+#define KERNEL_FIX_STATES
+#include "phylokernelnew.h"
+
+
+#if !defined ( __AVX512F__ ) && !defined ( __AVX512__ )
+#error "You must compile this file with AVX512 enabled!"
+#endif
+
+void PhyloTree::setDotProductAVX512() {
+#ifdef BOOT_VAL_FLOAT
+ dotProduct = &PhyloTree::dotProductSIMD<float, Vec16f>;
+#else
+ dotProduct = &PhyloTree::dotProductSIMD<double, Vec8d>;
+#endif
+ dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec8d>;
+}
+
+void PhyloTree::setLikelihoodKernelAVX512() {
+// setParsimonyKernelAVX();
+ if (model_factory && model_factory->model->isSiteSpecificModel()) {
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec8d, NORM_LH, 4, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec8d, NORM_LH, 4, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec8d, NORM_LH, 4, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, NORM_LH, 4, true, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec8d, NORM_LH, 20, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec8d, NORM_LH, 20, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec8d, NORM_LH, 20, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, NORM_LH, 20, true, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec8d, NORM_LH, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec8d, NORM_LH, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec8d, NORM_LH, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec8d, NORM_LH, true, true>;
+ break;
+ }
+ return;
+ }
+
+ if (params->lk_safe_scaling) {
+ switch(aln->num_states) {
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, SAFE_LH, 2, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, SAFE_LH, 2, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, SAFE_LH, 2, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, SAFE_LH, 2, true>;
+ break;
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, SAFE_LH, 4, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, SAFE_LH, 4, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, SAFE_LH, 4, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, SAFE_LH, 4, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, SAFE_LH, 20, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, SAFE_LH, 20, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, SAFE_LH, 20, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, SAFE_LH, 20, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec8d, SAFE_LH, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec8d, SAFE_LH, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec8d, SAFE_LH, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec8d, SAFE_LH, true>;
+ break;
+ }
+ return;
+ }
+
+ switch(aln->num_states) {
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, NORM_LH, 2, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, NORM_LH, 2, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, NORM_LH, 2, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, NORM_LH, 2, true>;
+ break;
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, NORM_LH, 4, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, NORM_LH, 4, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, NORM_LH, 4, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, NORM_LH, 4, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec8d, NORM_LH, 20, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec8d, NORM_LH, 20, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec8d, NORM_LH, 20, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec8d, NORM_LH, 20, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec8d, NORM_LH, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec8d, NORM_LH, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec8d, NORM_LH, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec8d, NORM_LH, true>;
+ break;
+ }
+}
+
diff --git a/phylokernelfma.cpp b/phylokernelfma.cpp
new file mode 100644
index 0000000..1e9909c
--- /dev/null
+++ b/phylokernelfma.cpp
@@ -0,0 +1,164 @@
+/*
+ * phylokernelfma.cpp
+ *
+ * Created on: Sept 25, 2016
+ * Author: minh
+ */
+
+
+#include "vectorclass/vectormath_exp.h"
+#include "vectorclass/vectorclass.h"
+#include "phylokernel.h"
+//#include "phylokernelsafe.h"
+//#include "phylokernelmixture.h"
+//#include "phylokernelmixrate.h"
+//#include "phylokernelsitemodel.h"
+
+#include "phylokernelnew.h"
+#define KERNEL_FIX_STATES
+#include "phylokernelnew.h"
+
+#if !defined(__AVX2__) && !defined(__FMA__)
+#error "You must compile this file with AVX2 or FMA enabled!"
+#endif
+
+void PhyloTree::setDotProductFMA() {
+#ifdef BOOT_VAL_FLOAT
+ dotProduct = &PhyloTree::dotProductSIMD<float, Vec8f>;
+#else
+ dotProduct = &PhyloTree::dotProductSIMD<double, Vec4d>;
+#endif
+ dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec4d>;
+}
+
+void PhyloTree::setLikelihoodKernelFMA() {
+ vector_size = 4;
+// setParsimonyKernelAVX();
+
+ if (model_factory && model_factory->model->isSiteSpecificModel() && (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling)) {
+ // safe site-specific model
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, SAFE_LH, 4, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, SAFE_LH, 4, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, SAFE_LH, 4, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 4, true, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, SAFE_LH, 20, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, SAFE_LH, 20, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, SAFE_LH, 20, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 20, true, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec4d, SAFE_LH, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec4d, SAFE_LH, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec4d, SAFE_LH, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, true, true>;
+ break;
+ }
+ return;
+ }
+
+ if (model_factory && model_factory->model->isSiteSpecificModel()) {
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, NORM_LH, 4, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, NORM_LH, 4, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, NORM_LH, 4, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 4, true, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, NORM_LH, 20, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, NORM_LH, 20, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, NORM_LH, 20, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20, true, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec4d, NORM_LH, true, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec4d, NORM_LH, true, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec4d, NORM_LH, true, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, true, true>;
+ break;
+ }
+ return;
+ }
+
+ if (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling) {
+ switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 2, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 2, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 2, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 2, true>;
+ break;
+ */
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 4, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 4, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 4, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 4, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 20, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 20, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 20, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 20, true>;
+ break;
+ /*
+ case 64:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 64, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 64, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 64, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 64, true>;
+ break;
+ */
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, SAFE_LH, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, SAFE_LH, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, SAFE_LH, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, true>;
+ break;
+ }
+ return;
+ }
+
+ switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 2, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 2, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 2, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 2, true>;
+ break;
+ */
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 4, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 4, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 4, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 4, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 20, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 20, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 20, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20, true>;
+ break;
+ /*
+ case 64:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 64, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 64, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 64, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 64, true>;
+ break;
+ */
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, NORM_LH, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, NORM_LH, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, NORM_LH, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, true>;
+ break;
+ }
+}
+
diff --git a/phylokernelmixrate.h b/phylokernelmixrate.h
index a92559f..bdabd64 100644
--- a/phylokernelmixrate.h
+++ b/phylokernelmixrate.h
@@ -23,7 +23,7 @@ template <class VectorClass, const int VCSIZE, const int nstates>
void PhyloTree::computeMixratePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
if (dad_branch->node->degree() > 3) {
// TODO: SIMD version for multifurcating node
- computeMixratePartialLikelihoodEigen(dad_branch, dad);
+ computePartialLikelihoodEigen(dad_branch, dad);
return;
}
diff --git a/phylokernelmixture.h b/phylokernelmixture.h
index 6b08498..992d4a8 100644
--- a/phylokernelmixture.h
+++ b/phylokernelmixture.h
@@ -21,7 +21,7 @@ template <class VectorClass, const int VCSIZE, const int nstates>
void PhyloTree::computeMixturePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
if (dad_branch->node->degree() > 3) {
// TODO: SIMD version for multifurcating node
- computeMixturePartialLikelihoodEigen(dad_branch, dad);
+ computePartialLikelihoodEigen(dad_branch, dad);
return;
}
diff --git a/phylokernelnew.h b/phylokernelnew.h
new file mode 100644
index 0000000..11c29eb
--- /dev/null
+++ b/phylokernelnew.h
@@ -0,0 +1,2802 @@
+/*
+ * phylokernelnew.h
+ * Newly revised kernel based on vectorizing over alignment patterns
+ *
+ * Created on: Sept 23, 2016
+ * Author: minh
+ */
+
+
+#if !defined(PHYLOKERNELNEW_H_) || !defined(PHYLOKERNELNEW_STATE_H_)
+
+#ifdef KERNEL_FIX_STATES
+# define PHYLOKERNELNEW_STATE_H_
+#else
+# define PHYLOKERNELNEW_H_
+#endif
+
+#include "phylotree.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+//#include <thread>
+
+using namespace std;
+
+/*******************************************************
+ *
+ * Helper function for vectors and matrix multiplication
+ *
+ ******************************************************/
+
+/**
+ sum of elments of a vector:
+ X = A[0] + ... + A[N-1]
+ template FMA = true to allow FMA instruction, false otherwise
+ @param N number of elements
+ @param A vector of size N
+ @param[out] X sum of elements of A
+*/
+#ifndef KERNEL_FIX_STATES
+template <class VectorClass, const bool append>
+inline void sumVec(VectorClass *A, VectorClass &X, size_t N)
+{
+ if (N == 1) {
+ X = A[0];
+ return;
+ }
+
+ size_t i;
+ switch (N % 4) {
+ case 0: {
+ VectorClass V[4];
+ V[0] = A[0];
+ V[1] = A[1];
+ V[2] = A[2];
+ V[3] = A[3];
+ for (i = 4; i < N; i+=4) {
+ V[0] += A[i];
+ V[1] += A[i+1];
+ V[2] += A[i+2];
+ V[2] += A[i+3];
+ }
+ if (append)
+ X += (V[0] + V[1]) + (V[2] + V[3]);
+ else
+ X = (V[0] + V[1]) + (V[2] + V[3]);
+ break;
+ }
+
+ case 2: {
+ VectorClass V[2];
+ V[0] = A[0];
+ V[1] = A[1];
+ for (i = 2; i < N; i+=2) {
+ V[0] += A[i];
+ V[1] += A[i+1];
+ }
+ if (append)
+ X += V[0] + V[1];
+ else
+ X = V[0] + V[1];
+ break;
+ }
+
+ default: {
+ VectorClass V[2];
+ // odd N
+ V[0] = A[0];
+ V[1] = A[1];
+ for (i = 2; i < N-1; i+=2) {
+ V[0] += A[i];
+ V[1] += A[i+1];
+ }
+ if (append)
+ X += A[N-1] + V[0] + V[1];
+ else
+ X = A[N-1] + V[0] + V[1];
+ break;
+ }
+ }
+}
+#endif
+
+/**
+ dotProduct of two vectors A, B
+ X = A.B = A[0]*B[0] + ... + A[N-1]*B[N-1]
+ template FMA = true to allow FMA instruction, false otherwise
+ @param N number of elements
+ @param A first vector of size N
+ @param B second vector of size N
+ @param[out] X dot-product of A and B
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void dotProductVec(Numeric *A, VectorClass *B, VectorClass &X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProductVec(Numeric *A, VectorClass *B, VectorClass &X, size_t N)
+#endif
+{
+ size_t i, j;
+ switch (N % 4) {
+ case 0: {
+ VectorClass V[4];
+ for (j = 0; j < 4; j++)
+ V[j] = A[j] * B[j];
+ for (i = 4; i < N; i+=4) {
+ for (j = 0; j < 4; j++)
+ V[j] = mul_add(A[i+j], B[i+j], V[j]);
+ }
+ X = (V[0]+V[1]) + (V[2]+V[3]);
+ break;
+ }
+
+ case 2: {
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * B[j];
+ for (i = 2; i < N; i+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[i+j], B[i+j], V[j]);
+ }
+ X = (V[0]+V[1]);
+ break;
+ }
+
+ default: {
+ // odd number of states
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * B[j];
+ for (i = 2; i < N-1; i+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[i+j], B[i+j], V[j]);
+ }
+ X = mul_add(A[N-1], B[N-1], V[0]+V[1]);
+ break;
+ }
+ }
+}
+
+/**
+ Dual dotProduct of four vectors A, B, C, D to compute X:
+ X = (A.B) * (C.D), where
+ A.B = A[0]*B[0] + ... + A[N-1]*B[N-1]
+ C.D = C[0]*D[0] + ... + C[N-1]*D[N-1]
+ template FMA = true to allow FMA instruction, false otherwise
+ @param N number of elements
+ @param A first vector of size N
+ @param B second vector of size N
+ @param C third vector of size N
+ @param D fourth vector of size N
+ @param[out] X = (A.B) * (C.D)
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void dotProductDualVec(Numeric *A, VectorClass *B, Numeric *C, VectorClass *D, VectorClass &X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProductDualVec(Numeric *A, VectorClass *B, Numeric *C, VectorClass *D, VectorClass &X, size_t N)
+#endif
+{
+ size_t i, j;
+ switch (N % 4) {
+ case 0: {
+ VectorClass AB[4], CD[4];
+ for (j = 0; j < 4; j++) {
+ AB[j] = A[j] * B[j];
+ CD[j] = C[j] * D[j];
+ }
+ for (i = 4; i < N; i+=4) {
+
+ for (j = 0; j < 4; j++) {
+ AB[j] = mul_add(A[i+j], B[i+j], AB[j]);
+ CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+ }
+ }
+ X = ((AB[0]+AB[1])+(AB[2]+AB[3])) * ((CD[0]+CD[1])+CD[2]+CD[3]);
+ break;
+ }
+
+ case 2: {
+ VectorClass AB[2], CD[2];
+ for (j = 0; j < 2; j++) {
+ AB[j] = A[j] * B[j];
+ CD[j] = C[j] * D[j];
+ }
+ for (i = 2; i < N; i+=2) {
+ for (j = 0; j < 2; j++) {
+ AB[j] = mul_add(A[i+j], B[i+j], AB[j]);
+ CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+ }
+ }
+ X = ((AB[0]+AB[1])) * ((CD[0]+CD[1]));
+ break;
+ }
+
+ default: {
+ // odd states
+ VectorClass AB[2], CD[2];
+ for (j = 0; j < 2; j++) {
+ AB[j] = A[j] * B[j];
+ CD[j] = C[j] * D[j];
+ }
+ for (i = 2; i < N-1; i+=2) {
+ for (j = 0; j < 2; j++) {
+ AB[j] = mul_add(A[i+j], B[i+j], AB[j]);
+ CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+ }
+ }
+ AB[0] = mul_add(A[N-1], B[N-1], AB[0]+AB[1]);
+ CD[0] = mul_add(C[N-1], D[N-1], CD[0]+CD[1]);
+ X = AB[0] * CD[0];
+ break;
+ }
+ }
+}
+
+/**
+ compute product of a vector A and a matrix M, resulting in a vector X:
+ X[i] = A[0]*M[i,0] + ... + A[N-1]*M[i,N-1], for all i = 0,...,N-1
+ @param N number of elements
+ @param A input vector of size N
+ @param M input matrix of size N*N
+ @param[out] X output vector of size N
+*/
+// quick unrolling version of multiplying partial_lh with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, size_t N)
+#endif
+{
+ size_t i, j, x;
+
+ switch (N % 4) {
+ case 0:
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[4];
+ for (j = 0; j < 4; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 4; x < N; x+=4) {
+ for (j = 0; j < 4; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = (V[0]+V[1])+(V[2]+V[3]);
+ M += N;
+ }
+ break;
+
+ case 2:
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 2; x < N; x+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = (V[0]+V[1]);
+ M += N;
+ }
+ break;
+ default:
+ // odd number of states
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 2; x < N-1; x+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = mul_add(A[N-1], M[N-1], V[0]+V[1]);
+ M += N;
+ }
+ break;
+ }
+}
+
+
+/**
+ compute product of a vector A and a matrix M, resulting in a vector X:
+ X[i] = A[0]*M[i,0] + ... + A[N-1]*M[i,N-1], for all i = 0,...,N-1
+ and also return the maximum of absolute values of X
+ @param N number of elements
+ @param A input vector of size N
+ @param M input matrix of size N*N
+ @param[out] X output vector of size N
+ @param[out] Xmax max of |X[i]|
+*/
+// quick unrolling version of multiplying partial_lh with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, VectorClass &Xmax)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, VectorClass &Xmax, size_t N)
+#endif
+{
+ size_t i, j, x;
+
+ switch (N % 4) {
+ case 0:
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[4];
+ for (j = 0; j < 4; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 4; x < N; x+=4) {
+ for (j = 0; j < 4; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = (V[0]+V[1])+(V[2]+V[3]);
+ M += N;
+ Xmax = max(Xmax, abs(X[i]));
+ }
+ break;
+
+ case 2:
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 2; x < N; x+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = (V[0]+V[1]);
+ M += N;
+ Xmax = max(Xmax, abs(X[i]));
+ }
+ break;
+
+ default:
+ // odd number of states
+ for (i = 0; i < N; i++) {
+ // manual unrolling
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * M[j];
+
+ for (x = 2; x < N-1; x+=2) {
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[x+j], M[x+j], V[j]);
+ }
+ X[i] = mul_add(A[N-1], M[N-1], V[0]+V[1]);
+ M += N;
+ Xmax = max(Xmax, abs(X[i]));
+ }
+ break;
+ }
+}
+
+/**
+ compute dot-products of 3 vectors A, B, C with a single vector D and returns X, Y, Z:
+ X = A.D = A[0]*D[0] + ... + A[N-1]*D[N-1]
+ Y = B.D = B[0]*D[0] + ... + B[N-1]*D[N-1]
+ Z = C.D = C[0]*D[0] + ... + C[N-1]*D[N-1]
+ @param N number of elements
+ @param nstates number of states
+ @param A vector of size N
+ @param B vector of size N
+ @param C vector of size N
+ @param D vector of size N
+ @param[in/out] X = A.D
+ @param[out] Y = B.D
+ @param[out] Z = C.D
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t nstates, const bool FMA>
+inline void dotProductTriple(Numeric *A, Numeric *B, Numeric *C, VectorClass *D,
+ VectorClass &X, VectorClass &Y, VectorClass &Z, size_t N)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProductTriple(Numeric *A, Numeric *B, Numeric *C, VectorClass *D,
+ VectorClass &X, VectorClass &Y, VectorClass &Z, size_t N, size_t nstates)
+#endif
+{
+ size_t i, j;
+ if (nstates % 2 == 0) {
+ VectorClass AD[2], BD[2], CD[2];
+ for (j = 0; j < 2; j++) {
+ AD[j] = A[j] * D[j];
+ BD[j] = B[j] * D[j];
+ CD[j] = C[j] * D[j];
+ }
+ for (i = 2; i < N; i+=2) {
+ for (j = 0; j < 2; j++) {
+ AD[j] = mul_add(A[i+j], D[i+j], AD[j]);
+ BD[j] = mul_add(B[i+j], D[i+j], BD[j]);
+ CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+ }
+ }
+ X = AD[0] + AD[1];
+ Y = BD[0] + BD[1];
+ Z = CD[0] + CD[1];
+ } else {
+ // odd states
+ VectorClass AD[2], BD[2], CD[2];
+ for (j = 0; j < 2; j++) {
+ AD[j] = A[j] * D[j];
+ BD[j] = B[j] * D[j];
+ CD[j] = C[j] * D[j];
+ }
+ for (i = 2; i < N-1; i+=2) {
+ for (j = 0; j < 2; j++) {
+ AD[j] = mul_add(A[i+j], D[i+j], AD[j]);
+ BD[j] = mul_add(B[i+j], D[i+j], BD[j]);
+ CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+ }
+ }
+ X = mul_add(A[N-1], D[N-1], AD[0] + AD[1]);
+ Y = mul_add(B[N-1], D[N-1], BD[0] + BD[1]);
+ Z = mul_add(C[N-1], D[N-1], CD[0] + CD[1]);
+ }
+}
+
+
+/**
+ Given three vectors A, B, C, compute X:
+ X = A.B.C = A[0]*B[0]*C[0] + ... + A[N-1]*B[N-1]*C[N-1]
+ @param N number of elements
+ @param A vector of size N
+ @param B vector of size N
+ @param C vector of size N
+ @param[out] X = A.B.C
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void dotProduct3Vec(Numeric *A, VectorClass *B, VectorClass *C, VectorClass &X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProduct3Vec(Numeric *A, VectorClass *B, VectorClass *C, VectorClass &X, size_t N)
+#endif
+{
+ size_t i, j;
+ switch (N % 4) {
+ case 0: {
+ VectorClass V[4];
+ for (j = 0; j < 4; j++)
+ V[j] = A[j] * B[j] * C[j];
+ for (i = 4; i < N; i+=4)
+ for (j = 0; j < 4; j++)
+ V[j] = mul_add(A[i+j]*B[i+j], C[i+j], V[j]);
+ X = (V[0]+V[1])+(V[2]+V[3]);
+ break;
+ }
+
+ case 2: {
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * B[j] * C[j];
+ for (i = 2; i < N; i+=2)
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[i+j]*B[i+j], C[i+j], V[j]);
+ X = (V[0]+V[1]);
+ break;
+ }
+
+ default: {
+ // odd states
+ VectorClass V[2];
+ for (j = 0; j < 2; j++)
+ V[j] = A[j] * B[j] * C[j];
+ for (i = 2; i < N-1; i+=2)
+ for (j = 0; j < 2; j++)
+ V[j] = mul_add(A[i+j]*B[i+j], C[i+j], V[j]);
+ X = mul_add(A[N-1]*B[N-1], C[N-1], V[0]+V[1]);
+ break;
+ }
+ }
+}
+
+
+/**
+ given three vectors A, B, C and a numeric coefficient D, compute X:
+ X = exp(A[0]*D)*B[0]*C[0] + ... exp(A[N-1]*D)*B[N-1]*C[N-1]
+ @param N number of elements
+ @param A vector of size N
+ @param B vector of size N
+ @param C vector of size N
+ @param D coefficient for A
+ @param[out] X = exp(A[0]*D)*B[0]*C[0] + ... exp(A[N-1]*D)*B[N-1]*C[N-1]
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void dotProductExp(VectorClass *A, VectorClass *B, VectorClass *C, Numeric D, VectorClass &X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProductExp(VectorClass *A, VectorClass *B, VectorClass *C, Numeric D, VectorClass &X, size_t N)
+#endif
+{
+ size_t i;
+ X = exp(A[0]*D)*B[0]*C[0];
+ for (i = 1; i < N; i++)
+ X = mul_add(exp(A[i]*D), B[i]*C[i], X);
+}
+
+
+/**
+ given two vectors A, B and a numeric coefficient D, compute X:
+ X = exp(A[0]*D)*B[0] + ... exp(A[N-1]*D)*B[N-1]
+ @param N number of elements
+ @param A vector of size N
+ @param B vector of size N
+ @param D coefficient for A
+ @param[out] X = exp(A[0]*D)*B[0] + ... exp(A[N-1]*D)*B[N-1]
+*/
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, class Numeric, const size_t N, const bool FMA>
+inline void dotProductExp(VectorClass *A, VectorClass *B, Numeric D, VectorClass &X)
+#else
+template <class VectorClass, class Numeric, const bool FMA>
+inline void dotProductExp(VectorClass *A, VectorClass *B, Numeric D, VectorClass &X, size_t N)
+#endif
+{
+ size_t i;
+ X = exp(A[0]*D)*B[0];
+ for (i = 1; i < N; i++)
+ X = mul_add(exp(A[i]*D), B[i], X);
+}
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const size_t nstates>
+inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_partial_lh, UBYTE *dad_scale_num,
+ size_t ncat_mix)
+#else
+template <class VectorClass, const bool SAFE_NUMERIC>
+inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_partial_lh, UBYTE *dad_scale_num,
+ size_t ncat_mix, size_t nstates)
+#endif
+{
+ if (SAFE_NUMERIC) {
+ size_t x, i;
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0));
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ double *partial_lh = &dad_partial_lh[x];
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ dad_scale_num[x*ncat_mix] += 1;
+ }
+ }
+ } else {
+ size_t x, i;
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0);
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ size_t block = ncat_mix * nstates;
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ double *partial_lh = &dad_partial_lh[x];
+ // now do the likelihood scaling
+ for (i = 0; i < block; i++) {
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ }
+ dad_scale_num[x] += 1;
+ }
+ }
+ }
+}
+
+
+/*******************************************************
+ *
+ * Helper function to pre-compute traversal information
+ * and buffer to transition matrix
+ *
+ ******************************************************/
+
+#ifndef KERNEL_FIX_STATES
+inline bool PhyloTree::computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer) {
+
+ size_t nstates = aln->num_states;
+ PhyloNode *node = (PhyloNode*)dad_branch->node;
+
+ if ((dad_branch->partial_lh_computed & 1) || node->isLeaf()) {
+ return mem_slots.lock(dad_branch);
+ }
+
+
+ size_t num_leaves = 0;
+ bool locked[node->degree()];
+ memset(locked, 0, node->degree());
+
+ // sort neighbor in desceding size order
+ NeighborVec neivec = node->neighbors;
+ NeighborVec::iterator it, i2;
+ for (it = neivec.begin(); it != neivec.end(); it++)
+ for (i2 = it+1; i2 != neivec.end(); i2++)
+ if (((PhyloNeighbor*)*it)->size < ((PhyloNeighbor*)*i2)->size) {
+ Neighbor *nei = *it;
+ *it = *i2;
+ *i2 = nei;
+ }
+
+
+ // recursive
+ for (it = neivec.begin(); it != neivec.end(); it++)
+ if ((*it)->node != dad) {
+ locked[it - neivec.begin()] = computeTraversalInfo((PhyloNeighbor*)(*it), node, buffer);
+ if ((*it)->node->isLeaf())
+ num_leaves++;
+ }
+ dad_branch->partial_lh_computed |= 1;
+
+ // prepare information for this branch
+ TraversalInfo info(dad_branch, dad);
+ info.echildren = info.partial_lh_leaves = NULL;
+
+ // re-orient partial_lh
+ reorientPartialLh(dad_branch, dad);
+
+ if (!dad_branch->partial_lh || mem_slots.locked(dad_branch)) {
+ // still no free entry found, memory saving technique
+ int slot_id = mem_slots.allocate(dad_branch);
+ if (slot_id < 0) {
+ cout << "traversal order:";
+ for (auto it = traversal_info.begin(); it != traversal_info.end(); it++) {
+ it->dad_branch->node->name = convertIntToString(it->dad_branch->size);
+ cout << " ";
+ if (it->dad->isLeaf())
+ cout << it->dad->name;
+ else
+ cout << it->dad->id;
+ cout << "->";
+ if (it->dad_branch->node->isLeaf())
+ cout << it->dad_branch->node->name;
+ else
+ cout << it->dad_branch->node->id;
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+ if (it->dad_branch->partial_lh_computed)
+ cout << " [";
+ else
+ cout << " (";
+ cout << mem_slots.findNei(it->dad_branch) - mem_slots.begin();
+ if (it->dad_branch->partial_lh_computed)
+ cout << "]";
+ else
+ cout << ")";
+ }
+ }
+ cout << endl;
+ drawTree(cout);
+ assert(0 && "No free/unlocked mem slot found!");
+ }
+ } else
+ mem_slots.update(dad_branch);
+
+ if (verbose_mode >= VB_MED && params->lh_mem_save == LM_MEM_SAVE) {
+ int slot_id = mem_slots.findNei(dad_branch) - mem_slots.begin();
+ node->name = convertIntToString(slot_id);
+ cout << "Branch " << dad->id << "-" << node->id << " assigned slot " << slot_id << endl;
+ }
+
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+ for (it = neivec.begin(); it != neivec.end(); it++)
+ if ((*it)->node != dad) {
+ if (!(*it)->node->isLeaf() && locked[it-neivec.begin()])
+ mem_slots.unlock((PhyloNeighbor*)*it);
+ }
+ }
+
+ if (!model->isSiteSpecificModel()) {
+ //------- normal model -----
+ info.echildren = buffer;
+ size_t block = nstates * ((model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures());
+ buffer += get_safe_upper_limit(block*nstates*(node->degree()-1));
+ if (num_leaves) {
+ info.partial_lh_leaves = buffer;
+ buffer += get_safe_upper_limit((aln->STATE_UNKNOWN+1)*block*num_leaves);
+ }
+ }
+
+ traversal_info.push_back(info);
+ return mem_slots.lock(dad_branch);
+}
+#endif
+
+
+
+#ifdef KERNEL_FIX_STATES
+template<class VectorClass, const int nstates>
+#else
+template<class VectorClass>
+#endif
+void PhyloTree::computePartialInfo(TraversalInfo &info, VectorClass* buffer) {
+
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+
+ size_t c, i, x;
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t block = nstates * ncat_mix;
+ size_t tip_block = nstates * model->getNMixtures();
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+ double *evec = model->getEigenvectors();
+ double *eval = model->getEigenvalues();
+
+ PhyloNode *dad = info.dad, *node = (PhyloNode*)info.dad_branch->node;
+ double *echild = info.echildren;
+ double *partial_lh_leaf = info.partial_lh_leaves;
+
+ if (nstates % VectorClass::size() == 0) {
+ // vectorized version
+ VectorClass *expchild = (VectorClass*)buffer;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ VectorClass *echild_ptr = (VectorClass*)echild;
+ // precompute information buffer
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass len_child = site_rate->getRate(c%ncat) * child->length;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double *evec_ptr = evec + mix_addr[c];
+ for (i = 0; i < nstates/VectorClass::size(); i++) {
+ // eval is not aligned!
+ expchild[i] = exp(VectorClass().load_a(&eval_ptr[i*VectorClass::size()]) * len_child);
+ }
+ for (x = 0; x < nstates; x++) {
+ for (i = 0; i < nstates/VectorClass::size(); i++) {
+ // evec is not be aligned!
+ echild_ptr[i] = (VectorClass().load_a(&evec_ptr[x*nstates+i*VectorClass::size()]) * expchild[i]);
+ }
+ echild_ptr += nstates/VectorClass::size();
+ }
+ }
+ // pre compute information for tip
+ if (child->node->isLeaf()) {
+ vector<int>::iterator it;
+
+ for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+ int state = (*it);
+ double *this_partial_lh_leaf = partial_lh_leaf + state*block;
+ VectorClass *echild_ptr = (VectorClass*)echild;
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *this_tip_partial_lh = (VectorClass*)(tip_partial_lh + state*tip_block + mix_addr_nstates[c]);
+ for (x = 0; x < nstates; x++) {
+ VectorClass vchild = echild_ptr[0] * this_tip_partial_lh[0];
+ for (i = 1; i < nstates/VectorClass::size(); i++) {
+ vchild = mul_add(echild_ptr[i], this_tip_partial_lh[i], vchild);
+ }
+ this_partial_lh_leaf[x] = horizontal_add(vchild);
+ echild_ptr += nstates/VectorClass::size();
+ }
+ this_partial_lh_leaf += nstates;
+ }
+ }
+ size_t addr = aln->STATE_UNKNOWN * block;
+ for (x = 0; x < block; x++) {
+ partial_lh_leaf[addr+x] = 1.0;
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ }
+ echild += block*nstates;
+ }
+// aligned_free(expchild);
+ } else {
+ // non-vectorized version
+ double expchild[nstates];
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ // precompute information buffer
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ double len_child = site_rate->getRate(c%ncat) * child->length;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double *evec_ptr = evec + mix_addr[c];
+ for (i = 0; i < nstates; i++) {
+ expchild[i] = exp(eval_ptr[i]*len_child);
+ }
+ for (x = 0; x < nstates; x++) {
+ for (i = 0; i < nstates; i++) {
+ echild_ptr[i] = evec_ptr[x*nstates+i] * expchild[i];
+ }
+ echild_ptr += nstates;
+ }
+ }
+ // pre compute information for tip
+ if (child->node->isLeaf()) {
+ vector<int>::iterator it;
+ for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+ int state = (*it);
+ double *this_partial_lh_leaf = partial_lh_leaf + state*block;
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ double *this_tip_partial_lh = tip_partial_lh + state*tip_block + mix_addr_nstates[c];
+ for (x = 0; x < nstates; x++) {
+ double vchild = echild_ptr[0] * this_tip_partial_lh[0];
+ for (i = 1; i < nstates; i++) {
+ vchild += echild_ptr[i] * this_tip_partial_lh[i];
+ }
+ this_partial_lh_leaf[x] = vchild;
+ echild_ptr += nstates;
+ }
+ this_partial_lh_leaf += nstates;
+ }
+ }
+ size_t addr = aln->STATE_UNKNOWN * block;
+ for (x = 0; x < block; x++) {
+ partial_lh_leaf[addr+x] = 1.0;
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ }
+ echild += block*nstates;
+ }
+ }
+
+}
+
+#ifndef KERNEL_FIX_STATES
+template<class VectorClass>
+inline void computeBounds(int threads, size_t elements, vector<size_t> &limits) {
+ limits.reserve(threads+1);
+ elements = ((elements+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t rest_elem = elements;
+ limits.push_back(0);
+ size_t last = 0;
+ for (int rest_thread = threads; rest_thread > 1; rest_thread--) {
+ size_t block_size = rest_elem/rest_thread;
+ if (rest_elem % rest_thread != 0) block_size++;
+ // padding to the vector size
+ block_size = ((block_size+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+
+ last += block_size;
+ if (last >= elements)
+ break;
+ limits.push_back(last);
+ rest_elem -= block_size;
+ }
+
+ limits.push_back(elements);
+ assert(limits.size() == threads+1);
+}
+#endif
+
+#ifdef KERNEL_FIX_STATES
+template<class VectorClass, const int nstates>
+#else
+template<class VectorClass>
+#endif
+void PhyloTree::computeTraversalInfo(PhyloNode *node, PhyloNode *dad, bool compute_partial_lh) {
+
+ if (!tip_partial_lh_computed)
+ computeTipPartialLikelihood();
+
+ traversal_info.clear();
+
+ // reserve beginning of buffer_partial_lh for other purpose
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures();
+ size_t block = aln->num_states * ncat_mix;
+ double *buffer = buffer_partial_lh + block*VectorClass::size()*num_threads + get_safe_upper_limit(block)*(aln->STATE_UNKNOWN+2);
+
+ // sort subtrees for mem save technique
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+// sortNeighborBySubtreeSize(node, dad);
+// sortNeighborBySubtreeSize(dad, node);
+ int node_size = node->computeSize(dad);
+ int dad_size = dad->computeSize(node);
+// PhyloNeighbor *dad_branch = (PhyloNeighbor*)dad->findNeighbor(node);
+// PhyloNeighbor *node_branch = (PhyloNeighbor*)node->findNeighbor(dad);
+ if (node_size < dad_size) {
+ // swap node and dad due to tree size
+ PhyloNode *tmp = node;
+ node = dad;
+ dad = tmp;
+ }
+
+ }
+
+ PhyloNeighbor *dad_branch = (PhyloNeighbor*)dad->findNeighbor(node);
+ PhyloNeighbor *node_branch = (PhyloNeighbor*)node->findNeighbor(dad);
+ bool dad_locked = computeTraversalInfo(dad_branch, dad, buffer);
+ bool node_locked = computeTraversalInfo(node_branch, node, buffer);
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+ if (dad_locked)
+ mem_slots.unlock(dad_branch);
+ if (node_locked)
+ mem_slots.unlock(node_branch);
+ }
+
+ if (verbose_mode >= VB_DEBUG && traversal_info.size() > 0) {
+ Node *saved = root;
+ root = dad;
+ drawTree(cout);
+ root = saved;
+ }
+
+ if (traversal_info.empty())
+ return;
+
+ if (!model->isSiteSpecificModel()) {
+
+ int num_info = traversal_info.size();
+
+ if (verbose_mode >= VB_DEBUG) {
+ cout << "traversal order:";
+ for (auto it = traversal_info.begin(); it != traversal_info.end(); it++) {
+ cout << " ";
+ if (it->dad->isLeaf())
+ cout << it->dad->name;
+ else
+ cout << it->dad->id;
+ cout << "->";
+ if (it->dad_branch->node->isLeaf())
+ cout << it->dad_branch->node->name;
+ else
+ cout << it->dad_branch->node->id;
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+ if (it->dad_branch->partial_lh_computed)
+ cout << " [";
+ else
+ cout << " (";
+ cout << mem_slots.findNei(it->dad_branch) - mem_slots.begin();
+ if (it->dad_branch->partial_lh_computed)
+ cout << "]";
+ else
+ cout << ")";
+ }
+ }
+ cout << endl;
+ }
+
+#ifdef _OPENMP
+#pragma omp parallel if (num_info >= 3) num_threads(num_threads)
+ {
+ VectorClass *buffer_tmp = (VectorClass*)buffer + aln->num_states*omp_get_thread_num();
+#pragma omp for schedule(static)
+#else
+ VectorClass *buffer_tmp = (VectorClass*)buffer;
+#endif
+ for (int i = 0; i < num_info; i++) {
+ #ifdef KERNEL_FIX_STATES
+ computePartialInfo<VectorClass, nstates>(traversal_info[i], buffer_tmp);
+ #else
+ computePartialInfo<VectorClass>(traversal_info[i], buffer_tmp);
+ #endif
+ }
+#ifdef _OPENMP
+ }
+#endif
+ }
+
+ if (compute_partial_lh) {
+ vector<size_t> limits;
+ size_t orig_nptn = ((aln->size()+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = ((orig_nptn+model_factory->unobserved_ptns.size()+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ computeBounds<VectorClass>(num_threads, nptn, limits);
+
+ #ifdef _OPENMP
+ #pragma omp parallel for schedule(static, 1) num_threads(num_threads)
+ #endif
+ for (int thread_id = 0; thread_id < num_threads; thread_id++) {
+ for (vector<TraversalInfo>::iterator it = traversal_info.begin(); it != traversal_info.end(); it++)
+ computePartialLikelihood(*it, limits[thread_id], limits[thread_id+1], thread_id);
+ }
+ traversal_info.clear();
+ }
+ return;
+}
+
+/*******************************************************
+ *
+ * NEW! highly-vectorized partial likelihood function
+ *
+ ******************************************************/
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computePartialLikelihoodSIMD(TraversalInfo &info, size_t ptn_lower, size_t ptn_upper, int thread_id)
+#else
+template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t ptn_lower, size_t ptn_upper, int thread_id)
+#endif
+{
+
+ PhyloNeighbor *dad_branch = info.dad_branch;
+ PhyloNode *dad = info.dad;
+ // don't recompute the likelihood
+ assert(dad);
+// if (dad_branch->partial_lh_computed & 1)
+// return;
+// dad_branch->partial_lh_computed |= 1;
+ PhyloNode *node = (PhyloNode*)(dad_branch->node);
+
+
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+ const size_t states_square = nstates*nstates;
+ size_t orig_nptn = aln->size();
+ size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size();
+// size_t max_nptn = ((nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+
+// if (!tip_partial_lh_computed)
+// computeTipPartialLikelihood();
+
+ if (node->isLeaf()) {
+// dad_branch->lh_scale_factor = 0.0;
+ return;
+ }
+
+ size_t ptn, c;
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+ size_t i, x;
+ size_t block = nstates * ncat_mix;
+// size_t tip_block = nstates * model->getNMixtures();
+ size_t tip_mem_size = max_orig_nptn * nstates;
+// size_t scale_size = SAFE_NUMERIC ? max_nptn * ncat_mix : max_nptn;
+ size_t scale_size = SAFE_NUMERIC ? (ptn_upper-ptn_lower) * ncat_mix : (ptn_upper-ptn_lower);
+
+ double *evec = model->getEigenvectors();
+ double *inv_evec = model->getInverseEigenvectors();
+ assert(inv_evec && evec);
+ double *eval = model->getEigenvalues();
+
+ // internal node
+ PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *nei = (PhyloNeighbor*)(*it);
+ // make sure that the partial_lh of children are different!
+ assert(dad_branch->partial_lh != nei->partial_lh);
+ if (!left) left = nei; else right = nei;
+ }
+
+ // precomputed buffer to save times
+ double *buffer_partial_lh_ptr = buffer_partial_lh + (getBufferPartialLhSize() - (2*block+nstates)*VectorClass::size()*num_threads);
+ double *echildren = NULL;
+ double *partial_lh_leaves = NULL;
+
+ // pre-compute scaled branch length per category
+ double len_children[ncat*(node->degree()-1)]; // +1 in case num_leaves = 0
+ double *len_left = NULL, *len_right = NULL;
+
+ if (SITE_MODEL) {
+ double *len_children_ptr = len_children;
+ FOR_NEIGHBOR_IT(node, dad, it3) {
+ for (c = 0; c < ncat; c++) {
+ len_children_ptr[c] = site_rate->getRate(c) * (*it3)->length;
+ }
+ if (!len_left)
+ len_left = len_children_ptr;
+ else
+ len_right = len_children_ptr;
+ len_children_ptr += ncat;
+ }
+ } else {
+
+ echildren = info.echildren;
+ partial_lh_leaves = info.partial_lh_leaves;
+
+ }
+
+ double *eleft = echildren, *eright = echildren + block*nstates;
+
+ if (!left->node->isLeaf() && right->node->isLeaf()) {
+ PhyloNeighbor *tmp = left;
+ left = right;
+ right = tmp;
+ double *etmp = eleft;
+ eleft = eright;
+ eright = etmp;
+ etmp = len_left;
+ len_left = len_right;
+ len_right = etmp;
+ }
+
+ if (node->degree() > 3) {
+ /*--------------------- multifurcating node ------------------*/
+
+ // now for-loop computing partial_lh over all site-patterns
+ VectorClass *partial_lh_all = (VectorClass*) &buffer_partial_lh_ptr[block*VectorClass::size()*2*thread_id];
+ double *vec_tip = (double*)&partial_lh_all[block];
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ for (i = 0; i < block; i++)
+ partial_lh_all[i] = 1.0;
+ UBYTE *scale_dad = NULL;
+ if (SAFE_NUMERIC) {
+ scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ memset(scale_dad, 0, sizeof(UBYTE)*ncat_mix*VectorClass::size());
+ } else
+ memset(&dad_branch->scale_num[ptn], 0, sizeof(UBYTE)*VectorClass::size());
+
+ if (SITE_MODEL) {
+ VectorClass *expchild = partial_lh_all + block;
+ VectorClass *eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ VectorClass *evec_ptr = (VectorClass*) &evec[ptn*states_square];
+ double *len_child = len_children;
+ VectorClass vchild;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ UBYTE *scale_child = SAFE_NUMERIC ? child->scale_num + ptn*ncat_mix : NULL;
+ VectorClass *partial_lh = partial_lh_all;
+ if (child->node->isLeaf()) {
+ // external node
+ VectorClass *tip_partial_lh_child = (VectorClass*) &tip_partial_lh[child->node->id*tip_mem_size + ptn*nstates];
+ for (c = 0; c < ncat; c++) {
+ for (i = 0; i < nstates; i++)
+ expchild[i] = exp(eval_ptr[i]*len_child[c]) * tip_partial_lh_child[i];
+ for (x = 0; x < nstates; x++) {
+ VectorClass *this_evec = &evec_ptr[x*nstates];
+#ifdef KERNEL_FIX_STATES
+ dotProductVec<VectorClass, VectorClass, nstates, FMA>(expchild, this_evec, vchild);
+#else
+ dotProductVec<VectorClass, VectorClass, FMA>(expchild, this_evec, vchild, nstates);
+#endif
+ partial_lh[x] *= vchild;
+ }
+ partial_lh += nstates;
+ }
+ } else {
+ // internal node
+ VectorClass *partial_lh = partial_lh_all;
+ VectorClass *partial_lh_child = (VectorClass*)(child->partial_lh + ptn*block);
+ if (!SAFE_NUMERIC) {
+ for (i = 0; i < VectorClass::size(); i++)
+ dad_branch->scale_num[ptn+i] += child->scale_num[ptn+i];
+ }
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (SAFE_NUMERIC) {
+ for (x = 0; x < VectorClass::size(); x++)
+ scale_dad[x*ncat_mix+c] += scale_child[x*ncat_mix+c];
+ }
+ // compute real partial likelihood vector
+ for (i = 0; i < nstates; i++)
+ expchild[i] = exp(eval_ptr[i]*len_child[c]) * partial_lh_child[i];
+ for (x = 0; x < nstates; x++) {
+ VectorClass *this_evec = &evec_ptr[x*nstates];
+#ifdef KERNEL_FIX_STATES
+ dotProductVec<VectorClass, VectorClass, nstates, FMA>(expchild, this_evec, vchild);
+#else
+ dotProductVec<VectorClass, VectorClass, FMA>(expchild, this_evec, vchild, nstates);
+#endif
+ partial_lh[x] *= vchild;
+ }
+ partial_lh += nstates;
+ partial_lh_child += nstates;
+ }
+ } // if
+ len_child += ncat;
+ } // FOR_NEIGHBOR
+
+ } else {
+ // non site specific model
+ double *partial_lh_leaf = partial_lh_leaves;
+ double *echild = echildren;
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ UBYTE *scale_child = SAFE_NUMERIC ? child->scale_num + ptn*ncat_mix : NULL;
+ if (child->node->isLeaf()) {
+ // external node
+ // load data for tip
+ for (i = 0; i < VectorClass::size(); i++) {
+ double *child_lh;
+ if (ptn+i < orig_nptn)
+ child_lh = partial_lh_leaf + block*(aln->at(ptn+i))[child->node->id];
+ else if (ptn+i < max_orig_nptn)
+ child_lh = partial_lh_leaf + block*aln->STATE_UNKNOWN;
+ else if (ptn+i < nptn)
+ child_lh = partial_lh_leaf + block*model_factory->unobserved_ptns[ptn+i-max_orig_nptn];
+ else
+ child_lh = partial_lh_leaf + block*aln->STATE_UNKNOWN;
+ double *this_vec_tip = vec_tip+i;
+ for (c = 0; c < block; c++) {
+ *this_vec_tip = child_lh[c];
+ this_vec_tip += VectorClass::size();
+ }
+ }
+ VectorClass *vtip = (VectorClass*)vec_tip;
+ for (c = 0; c < block; c++) {
+ // compute real partial likelihood vector
+ partial_lh_all[c] *= vtip[c];
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ } else {
+ // internal node
+ VectorClass *partial_lh = partial_lh_all;
+ VectorClass *partial_lh_child = (VectorClass*)(child->partial_lh + ptn*block);
+ if (!SAFE_NUMERIC) {
+ for (i = 0; i < VectorClass::size(); i++)
+ dad_branch->scale_num[ptn+i] += child->scale_num[ptn+i];
+ }
+
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ if (SAFE_NUMERIC) {
+ for (x = 0; x < VectorClass::size(); x++)
+ scale_dad[x*ncat_mix+c] += scale_child[x*ncat_mix+c];
+ }
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ VectorClass vchild = echild_ptr[0] * partial_lh_child[0];
+ // double *echild_ptr = echild + (c*nstatesqr+x*nstates);
+ for (i = 1; i < nstates; i++) {
+ vchild = mul_add(echild_ptr[i], partial_lh_child[i], vchild);
+ }
+ echild_ptr += nstates;
+ partial_lh[x] *= vchild;
+ }
+ partial_lh += nstates;
+ partial_lh_child += nstates;
+ }
+ } // if
+ echild += block*nstates;
+ } // FOR_NEIGHBOR
+ } // if SITE_MODEL
+
+
+ // compute dot-product with inv_eigenvector
+ VectorClass *partial_lh_tmp = partial_lh_all;
+ VectorClass *partial_lh = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass lh_max = 0.0;
+ double *inv_evec_ptr = SITE_MODEL ? &inv_evec[ptn*states_square] : NULL;
+ for (c = 0; c < ncat_mix; c++) {
+ if (SAFE_NUMERIC)
+ lh_max = 0.0;
+ if (SITE_MODEL) {
+ // compute dot-product with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, VectorClass, nstates, FMA>(partial_lh_tmp, (VectorClass*)inv_evec_ptr, partial_lh);
+#else
+ productVecMat<VectorClass, VectorClass, FMA> (partial_lh_tmp, (VectorClass*)inv_evec_ptr, partial_lh, nstates);
+#endif
+ } else {
+ inv_evec_ptr = inv_evec + mix_addr[c];
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, double, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max);
+#else
+ productVecMat<VectorClass, double, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
+#endif
+ }
+ // check if one should scale partial likelihoods
+ if (SAFE_NUMERIC) {
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + c*nstates*VectorClass::size() + x);
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ dad_branch->scale_num[(ptn+x)*ncat_mix+c] += 1;
+ }
+ }
+ }
+ partial_lh += nstates;
+ partial_lh_tmp += nstates;
+ }
+
+ if (!SAFE_NUMERIC) {
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + x);
+ // now do the likelihood scaling
+ for (i = 0; i < block; i++) {
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ }
+// sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn+x];
+ dad_branch->scale_num[ptn+x] += 1;
+ }
+ }
+ }
+
+ } // for ptn
+
+ // end multifurcating treatment
+ } else if (left->node->isLeaf() && right->node->isLeaf()) {
+
+ /*--------------------- TIP-TIP (cherry) case ------------------*/
+
+ double *partial_lh_left = SITE_MODEL ? &tip_partial_lh[left->node->id * tip_mem_size] : partial_lh_leaves;
+ double *partial_lh_right = SITE_MODEL ? &tip_partial_lh[right->node->id * tip_mem_size] : partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
+
+ // scale number must be ZERO
+ memset(dad_branch->scale_num + (SAFE_NUMERIC ? ptn_lower*ncat_mix : ptn_lower), 0, scale_size * sizeof(UBYTE));
+ double *vec_left = buffer_partial_lh_ptr + (block*2 + nstates)*VectorClass::size()*thread_id;
+
+ double *vec_right = SITE_MODEL ? &vec_left[nstates*VectorClass::size()] : &vec_left[block*VectorClass::size()];
+ VectorClass *partial_lh_tmp = SITE_MODEL ? (VectorClass*)vec_right+nstates : (VectorClass*)vec_right+block;
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass *partial_lh = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+
+ if (SITE_MODEL) {
+ VectorClass* expleft = (VectorClass*) vec_left;
+ VectorClass* expright = (VectorClass*) vec_right;
+ VectorClass *vleft = (VectorClass*) &partial_lh_left[ptn*nstates];
+ VectorClass *vright = (VectorClass*) &partial_lh_right[ptn*nstates];
+ VectorClass *eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ VectorClass *evec_ptr = (VectorClass*) &evec[ptn*states_square];
+ VectorClass *inv_evec_ptr = (VectorClass*) &inv_evec[ptn*states_square];
+ for (c = 0; c < ncat; c++) {
+ for (i = 0; i < nstates; i++) {
+ expleft[i] = exp(eval_ptr[i]*len_left[c]) * vleft[i];
+ expright[i] = exp(eval_ptr[i]*len_right[c]) * vright[i];
+
+ }
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ VectorClass *this_evec = evec_ptr + x*nstates;
+#ifdef KERNEL_FIX_STATES
+ dotProductDualVec<VectorClass, VectorClass, nstates, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x]);
+#else
+ dotProductDualVec<VectorClass, VectorClass, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x], nstates);
+#endif
+ }
+ // compute dot-product with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, VectorClass, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh);
+#else
+ productVecMat<VectorClass, VectorClass, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, nstates);
+#endif
+ partial_lh += nstates;
+ } // FOR category
+ } else {
+ VectorClass *vleft = (VectorClass*)vec_left;
+ VectorClass *vright = (VectorClass*)vec_right;
+ // load data for tip
+ for (x = 0; x < VectorClass::size(); x++) {
+ double *tip_left, *tip_right;
+ if (ptn+x < orig_nptn) {
+ tip_left = partial_lh_left + block * (aln->at(ptn+x))[left->node->id];
+ tip_right = partial_lh_right + block * (aln->at(ptn+x))[right->node->id];
+ } else if (ptn+x < max_orig_nptn) {
+ tip_left = partial_lh_left + block * aln->STATE_UNKNOWN;
+ tip_right = partial_lh_right + block * aln->STATE_UNKNOWN;
+ } else if (ptn+x < nptn) {
+ tip_left = partial_lh_left + block * model_factory->unobserved_ptns[ptn+x-max_orig_nptn];
+ tip_right = partial_lh_right + block * model_factory->unobserved_ptns[ptn+x-max_orig_nptn];
+ } else {
+ tip_left = partial_lh_left + block * aln->STATE_UNKNOWN;
+ tip_right = partial_lh_right + block * aln->STATE_UNKNOWN;
+ }
+ double *this_vec_left = vec_left+x;
+ double *this_vec_right = vec_right+x;
+ for (i = 0; i < block; i++) {
+ *this_vec_left = tip_left[i];
+ *this_vec_right = tip_right[i];
+ this_vec_left += VectorClass::size();
+ this_vec_right += VectorClass::size();
+ }
+ }
+
+
+ for (c = 0; c < ncat_mix; c++) {
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ partial_lh_tmp[x] = vleft[x] * vright[x];
+ }
+
+ // compute dot-product with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, double, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh);
+#else
+ productVecMat<VectorClass, double, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, nstates);
+#endif
+
+ // increase pointer
+ vleft += nstates;
+ vright += nstates;
+ partial_lh += nstates;
+ } // FOR category
+ } // IF SITE_MODEL
+ } // FOR LOOP
+
+
+ } else if (left->node->isLeaf() && !right->node->isLeaf()) {
+
+ /*--------------------- TIP-INTERNAL NODE case ------------------*/
+
+ // only take scale_num from the right subtree
+ memcpy(
+ dad_branch->scale_num + (SAFE_NUMERIC ? ptn_lower*ncat_mix : ptn_lower),
+ right->scale_num + (SAFE_NUMERIC ? ptn_lower*ncat_mix : ptn_lower),
+ scale_size * sizeof(UBYTE));
+
+ double *partial_lh_left = SITE_MODEL ? &tip_partial_lh[left->node->id * tip_mem_size] : partial_lh_leaves;
+
+
+ double *vec_left = buffer_partial_lh_ptr + (2*block+nstates)*VectorClass::size()*thread_id;
+ VectorClass *partial_lh_tmp = SITE_MODEL ? (VectorClass*)vec_left+2*nstates : (VectorClass*)vec_left+block;
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass *partial_lh = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass *partial_lh_right = (VectorClass*)(right->partial_lh + ptn*block);
+// memset(partial_lh, 0, sizeof(VectorClass)*block);
+ VectorClass lh_max = 0.0;
+
+ if (SITE_MODEL) {
+ VectorClass *expleft = (VectorClass*)vec_left;
+ VectorClass *expright = expleft+nstates;
+ VectorClass *vleft = (VectorClass*)&partial_lh_left[ptn*nstates];
+ VectorClass *eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ VectorClass *evec_ptr = (VectorClass*) &evec[ptn*states_square];
+ VectorClass *inv_evec_ptr = (VectorClass*) &inv_evec[ptn*states_square];
+ for (c = 0; c < ncat; c++) {
+ for (i = 0; i < nstates; i++) {
+ expleft[i] = exp(eval_ptr[i]*len_left[c]) * vleft[i];
+ expright[i] = exp(eval_ptr[i]*len_right[c]) * partial_lh_right[i];
+ }
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ VectorClass *this_evec = evec_ptr + x*nstates;
+#ifdef KERNEL_FIX_STATES
+ dotProductDualVec<VectorClass, VectorClass, nstates, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x]);
+#else
+ dotProductDualVec<VectorClass, VectorClass, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x], nstates);
+#endif
+ }
+ // compute dot-product with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, VectorClass, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max);
+#else
+ productVecMat<VectorClass, VectorClass, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
+#endif
+ // check if one should scale partial likelihoods
+ if (SAFE_NUMERIC) {
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + c*nstates*VectorClass::size() + x);
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ dad_branch->scale_num[(ptn+x)*ncat_mix+c] += 1;
+ }
+ }
+ }
+ partial_lh_right += nstates;
+ partial_lh += nstates;
+ } // FOR category
+
+ } else {
+ VectorClass *vleft = (VectorClass*)vec_left;
+ // load data for tip
+ for (x = 0; x < VectorClass::size(); x++) {
+ double *tip;
+ if (ptn+x < orig_nptn) {
+ tip = partial_lh_left + block*(aln->at(ptn+x))[left->node->id];
+ } else if (ptn+x < max_orig_nptn) {
+ tip = partial_lh_left + block*aln->STATE_UNKNOWN;
+ } else if (ptn+x < nptn) {
+ tip = partial_lh_left + block*model_factory->unobserved_ptns[ptn+x-max_orig_nptn];
+ } else {
+ tip = partial_lh_left + block*aln->STATE_UNKNOWN;
+ }
+ double *this_vec_left = vec_left+x;
+ for (i = 0; i < block; i++) {
+ *this_vec_left = tip[i];
+ this_vec_left += VectorClass::size();
+ }
+ }
+
+ double *eright_ptr = eright;
+ for (c = 0; c < ncat_mix; c++) {
+ if (SAFE_NUMERIC)
+ lh_max = 0.0;
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ VectorClass vright;
+ #ifdef KERNEL_FIX_STATES
+ dotProductVec<VectorClass, double, nstates, FMA>(eright_ptr, partial_lh_right, vright);
+ #else
+ dotProductVec<VectorClass, double, FMA>(eright_ptr, partial_lh_right, vright, nstates);
+ #endif
+ eright_ptr += nstates;
+ partial_lh_tmp[x] = vleft[x] * (vright);
+ }
+
+ // compute dot-product with inv_eigenvector
+ #ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, double, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max);
+ #else
+ productVecMat<VectorClass, double, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
+ #endif
+ // check if one should scale partial likelihoods
+ if (SAFE_NUMERIC) {
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + c*nstates*VectorClass::size() + x);
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ dad_branch->scale_num[(ptn+x)*ncat_mix+c] += 1;
+ }
+ }
+ }
+ vleft += nstates;
+ partial_lh_right += nstates;
+ partial_lh += nstates;
+ } // FOR category
+ } // IF SITE_MODEL
+
+ if (!SAFE_NUMERIC) {
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + x);
+ // now do the likelihood scaling
+ for (i = 0; i < block; i++) {
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ }
+// sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn+x];
+ dad_branch->scale_num[ptn+x] += 1;
+ }
+ }
+ }
+
+ } // big for loop over ptn
+
+ } else {
+
+ /*--------------------- INTERNAL-INTERNAL NODE case ------------------*/
+
+ VectorClass *partial_lh_tmp = (VectorClass*)buffer_partial_lh_ptr + (2*block+nstates)*thread_id;
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass *partial_lh = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass *partial_lh_left = (VectorClass*)(left->partial_lh + ptn*block);
+ VectorClass *partial_lh_right = (VectorClass*)(right->partial_lh + ptn*block);
+ VectorClass lh_max = 0.0;
+ UBYTE *scale_dad, *scale_left, *scale_right;
+
+ if (SAFE_NUMERIC) {
+ size_t addr = ptn*ncat_mix;
+ scale_dad = dad_branch->scale_num + addr;
+ scale_left = left->scale_num + addr;
+ scale_right = right->scale_num + addr;
+ } else {
+ scale_dad = dad_branch->scale_num + ptn;
+ scale_left = left->scale_num + ptn;
+ scale_right = right->scale_num + ptn;
+ for (i = 0; i < VectorClass::size(); i++)
+ scale_dad[i] = scale_left[i] + scale_right[i];
+ }
+
+ double *eleft_ptr = eleft;
+ double *eright_ptr = eright;
+ VectorClass *expleft, *expright, *eval_ptr, *evec_ptr, *inv_evec_ptr;
+ if (SITE_MODEL) {
+ expleft = partial_lh_tmp + nstates;
+ expright = expleft + nstates;
+ eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ evec_ptr = (VectorClass*) &evec[ptn*states_square];
+ inv_evec_ptr = (VectorClass*) &inv_evec[ptn*states_square];
+ }
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (SAFE_NUMERIC) {
+ lh_max = 0.0;
+ for (x = 0; x < VectorClass::size(); x++)
+ scale_dad[x*ncat_mix] = scale_left[x*ncat_mix] + scale_right[x*ncat_mix];
+ }
+
+ if (SITE_MODEL) {
+ // site-specific model
+ for (i = 0; i < nstates; i++) {
+ expleft[i] = exp(eval_ptr[i]*len_left[c]) * partial_lh_left[i];
+ expright[i] = exp(eval_ptr[i]*len_right[c]) * partial_lh_right[i];
+ }
+ for (x = 0; x < nstates; x++) {
+ VectorClass *this_evec = evec_ptr + x*nstates;
+#ifdef KERNEL_FIX_STATES
+ dotProductDualVec<VectorClass, VectorClass, nstates, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x]);
+#else
+ dotProductDualVec<VectorClass, VectorClass, FMA>(this_evec, expleft, this_evec, expright, partial_lh_tmp[x], nstates);
+#endif
+ }
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, VectorClass, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max);
+#else
+ productVecMat<VectorClass, VectorClass, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
+#endif
+ } else {
+ // normal model
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+#ifdef KERNEL_FIX_STATES
+ dotProductDualVec<VectorClass, double, nstates, FMA>(eleft_ptr, partial_lh_left, eright_ptr, partial_lh_right, partial_lh_tmp[x]);
+#else
+ dotProductDualVec<VectorClass, double, FMA>(eleft_ptr, partial_lh_left, eright_ptr, partial_lh_right, partial_lh_tmp[x], nstates);
+#endif
+ eleft_ptr += nstates;
+ eright_ptr += nstates;
+ }
+
+ // compute dot-product with inv_eigenvector
+#ifdef KERNEL_FIX_STATES
+ productVecMat<VectorClass, double, nstates, FMA>(partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max);
+#else
+ productVecMat<VectorClass, double, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
+#endif
+ }
+
+ // check if one should scale partial likelihoods
+ if (SAFE_NUMERIC) {
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ if (horizontal_or(underflown))
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + c*nstates*VectorClass::size() + x);
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ scale_dad[x*ncat_mix] += 1;
+ }
+ scale_dad++;
+ scale_left++;
+ scale_right++;
+ }
+ partial_lh_left += nstates;
+ partial_lh_right += nstates;
+ partial_lh += nstates;
+ }
+
+ if (!SAFE_NUMERIC) {
+ // check if one should scale partial likelihoods
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ if (horizontal_or(underflown)) { // at least one site has numerical underflown
+ for (x = 0; x < VectorClass::size(); x++)
+ if (underflown[x]) {
+ double *partial_lh = dad_branch->partial_lh + (ptn*block + x);
+ // now do the likelihood scaling
+ for (i = 0; i < block; i++) {
+ partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+ }
+// sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn+x];
+ dad_branch->scale_num[ptn+x] += 1;
+ }
+ }
+ }
+
+ } // big for loop over ptn
+
+ }
+}
+
+/*******************************************************
+ *
+ * NEW! highly-vectorized log-likelihood derivative function
+ *
+ ******************************************************/
+
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computeLikelihoodBufferSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, size_t ptn_lower, size_t ptn_upper, int thread_id)
+#else
+template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computeLikelihoodBufferGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, size_t ptn_lower, size_t ptn_upper, int thread_id)
+#endif
+{
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+ size_t orig_nptn = aln->size();
+ size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size();
+ size_t ptn, i, c;
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+
+ // reserve 3*block for computeLikelihoodDerv
+ double *buffer_partial_lh_ptr = buffer_partial_lh + 3*get_safe_upper_limit(block);
+
+ // first compute partial_lh
+ for (vector<TraversalInfo>::iterator it = traversal_info.begin(); it != traversal_info.end(); it++)
+ computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id);
+
+ if (dad->isLeaf()) {
+ // special treatment for TIP-INTERNAL NODE case
+ double *tip_partial_lh_node = &tip_partial_lh[dad->id * max_orig_nptn*nstates];
+
+ double *vec_tip = buffer_partial_lh_ptr + tip_block*VectorClass::size()*thread_id;
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
+ //load tip vector
+ if (!SITE_MODEL)
+ for (i = 0; i < VectorClass::size(); i++) {
+ double *this_tip_partial_lh;
+ if (ptn+i < orig_nptn)
+ this_tip_partial_lh = tip_partial_lh + tip_block*(aln->at(ptn+i))[dad->id];
+ else if (ptn+i < max_orig_nptn)
+ this_tip_partial_lh = tip_partial_lh + tip_block*aln->STATE_UNKNOWN;
+ else if (ptn+i < nptn)
+ this_tip_partial_lh = tip_partial_lh + tip_block*model_factory->unobserved_ptns[ptn+i-max_orig_nptn];
+ else
+ this_tip_partial_lh = tip_partial_lh + tip_block*aln->STATE_UNKNOWN;
+ double *this_vec_tip = vec_tip+i;
+ for (c = 0; c < tip_block; c++) {
+ *this_vec_tip = this_tip_partial_lh[c];
+ this_vec_tip += VectorClass::size();
+ }
+
+ }
+ VectorClass *lh_tip;
+ if (SITE_MODEL)
+ lh_tip = (VectorClass*)&tip_partial_lh_node[ptn*nstates];
+ for (c = 0; c < ncat_mix; c++) {
+ if (!SITE_MODEL)
+ lh_tip = (VectorClass*)(vec_tip + mix_addr_nstates[c]*VectorClass::size());
+ for (i = 0; i < nstates; i++) {
+ theta[i] = lh_tip[i] * partial_lh_dad[i];
+ }
+ partial_lh_dad += nstates;
+ theta += nstates;
+ }
+ if (SAFE_NUMERIC) {
+ // numerical scaling per category
+ UBYTE *scale_dad;
+ UBYTE min_scale;
+ for (i = 0; i < VectorClass::size(); i++) {
+ scale_dad = dad_branch->scale_num+(ptn+i)*ncat_mix;
+ min_scale = scale_dad[0];
+ for (c = 1; c < ncat_mix; c++)
+ min_scale = min(min_scale, scale_dad[c]);
+
+ buffer_scale_all[ptn+i] = min_scale;
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (scale_dad[c] == min_scale+1) {
+ double *this_theta = &theta_all[ptn*block + c*nstates*VectorClass::size() + i];
+ for (size_t x = 0; x < nstates; x++) {
+ this_theta[x*VectorClass::size()] *= SCALING_THRESHOLD;
+ }
+ } else if (scale_dad[c] > min_scale+1) {
+ double *this_theta = &theta_all[ptn*block + c*nstates*VectorClass::size() + i];
+ for (size_t x = 0; x < nstates; x++) {
+ this_theta[x*VectorClass::size()] = 0.0;
+ }
+ }
+ }
+ }
+ } else {
+ // normal scaling
+ for (i = 0; i < VectorClass::size(); i++)
+ buffer_scale_all[ptn+i] = dad_branch->scale_num[ptn+i];
+ }
+ VectorClass *buf = (VectorClass*)(buffer_scale_all+ptn);
+ *buf *= LOG_SCALING_THRESHOLD;
+
+ } // FOR PTN LOOP
+// aligned_free(vec_tip);
+ } else {
+ //------- both dad and node are internal nodes --------//
+
+ // now compute theta
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
+ VectorClass *partial_lh_node = (VectorClass*)(node_branch->partial_lh + ptn*block);
+ VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ for (i = 0; i < block; i++)
+ theta[i] = partial_lh_node[i] * partial_lh_dad[i];
+
+ if (SAFE_NUMERIC) {
+ // numerical scaling per category
+ UBYTE min_scale;
+ UBYTE sum_scale[ncat_mix];
+ size_t ptn_ncat = ptn*ncat_mix;
+ UBYTE *scale_dad = dad_branch->scale_num + ptn_ncat;
+ UBYTE *scale_node = node_branch->scale_num + ptn_ncat;
+
+ for (i = 0; i < VectorClass::size(); i++) {
+ min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ buffer_scale_all[ptn+i] = min_scale;
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] == min_scale+1) {
+ double *this_theta = &theta_all[ptn*block + c*nstates*VectorClass::size() + i];
+ for (size_t x = 0; x < nstates; x++) {
+ this_theta[x*VectorClass::size()] *= SCALING_THRESHOLD;
+ }
+ } else if (sum_scale[c] > min_scale+1) {
+ double *this_theta = &theta_all[ptn*block + c*nstates*VectorClass::size() + i];
+ for (size_t x = 0; x < nstates; x++) {
+ this_theta[x*VectorClass::size()] = 0.0;
+ }
+ }
+ }
+ scale_dad += ncat_mix;
+ scale_node += ncat_mix;
+ }
+ } else {
+ for (i = 0; i < VectorClass::size(); i++)
+ buffer_scale_all[ptn+i] = dad_branch->scale_num[ptn+i] + node_branch->scale_num[ptn+i];
+ }
+ VectorClass *buf = (VectorClass*)(buffer_scale_all+ptn);
+ *buf *= LOG_SCALING_THRESHOLD;
+ } // FOR ptn
+ } // internal node
+}
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computeLikelihoodDervSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf)
+#else
+template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA, const bool SITE_MODEL>
+void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf)
+#endif
+{
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ if (!central_partial_lh)
+ initializeAllPartialLh();
+ if (node->isLeaf()) {
+ PhyloNode *tmp_node = dad;
+ dad = node;
+ node = tmp_node;
+ PhyloNeighbor *tmp_nei = dad_branch;
+ dad_branch = node_branch;
+ node_branch = tmp_nei;
+ }
+
+#ifdef KERNEL_FIX_STATES
+ computeTraversalInfo<VectorClass, nstates>(node, dad, false);
+#else
+ computeTraversalInfo<VectorClass>(node, dad, false);
+#endif
+
+//
+// if ((dad_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(dad_branch, dad);
+// if ((node_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(node_branch, node);
+
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+
+ size_t block = ncat_mix * nstates;
+// size_t tip_block = nstates * model->getNMixtures();
+ size_t ptn; // for big data size > 4GB memory required
+ size_t c, i;
+ size_t orig_nptn = aln->size();
+ size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size();
+ bool isASC = model_factory->unobserved_ptns.size() > 0;
+
+
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+
+ double *eval = model->getEigenvalues();
+ assert(eval);
+
+ double *buffer_partial_lh_ptr = buffer_partial_lh;
+ vector<size_t> limits;
+ computeBounds<VectorClass>(num_threads, nptn, limits);
+
+ assert(theta_all);
+
+ double *val0 = NULL;
+ double *val1 = NULL;
+ double *val2 = NULL;
+ double cat_rate[ncat];
+ double cat_prop[ncat];
+
+
+ if (SITE_MODEL) {
+ for (c = 0; c < ncat; c++) {
+ cat_rate[c] = site_rate->getRate(c);
+ cat_prop[c] = site_rate->getProp(c);
+ }
+ } else {
+ val0 = buffer_partial_lh_ptr;
+ val1 = val0 + get_safe_upper_limit(block);
+ val2 = val1 + get_safe_upper_limit(block);
+ if (nstates % VectorClass::size() == 0) {
+ VectorClass *vc_val0 = (VectorClass*)val0;
+ VectorClass *vc_val1 = (VectorClass*)val1;
+ VectorClass *vc_val2 = (VectorClass*)val2;
+
+ double len = dad_branch->length;
+ size_t loop_size = nstates/VectorClass::size();
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ VectorClass *eval_ptr = (VectorClass*)(eval + mix_addr_nstates[c]);
+ size_t mycat = c%ncat;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ double myrate = site_rate->getRate(mycat);
+ for (i = 0; i < loop_size; i++) {
+ VectorClass cof = eval_ptr[i] * myrate;
+ VectorClass val = exp(cof*len) * prop;
+ VectorClass val1_ = cof*val;
+ vc_val0[i] = val;
+ vc_val1[i] = val1_;
+ vc_val2[i] = cof*val1_;
+ }
+ vc_val0 += loop_size;
+ vc_val1 += loop_size;
+ vc_val2 += loop_size;
+ }
+ } else {
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ size_t mycat = c%ncat;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ size_t addr = c*nstates;
+ for (i = 0; i < nstates; i++) {
+ double cof = eval_ptr[i]*site_rate->getRate(mycat);
+ double val = exp(cof*dad_branch->length) * prop;
+ double val1_ = cof*val;
+ val0[addr+i] = val;
+ val1[addr+i] = val1_;
+ val2[addr+i] = cof*val1_;
+ }
+ }
+ }
+ }
+
+ double dad_length = dad_branch->length;
+
+ VectorClass all_df = 0.0, all_ddf = 0.0, all_prob_const = 0.0, all_df_const = 0.0, all_ddf_const = 0.0;
+// double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
+
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 1) private(ptn, i, c) num_threads(num_threads)
+#endif
+ for (int thread_id = 0; thread_id < num_threads; thread_id++) {
+ VectorClass my_df(0.0), my_ddf(0.0), vc_prob_const(0.0), vc_df_const(0.0), vc_ddf_const(0.0);
+ size_t ptn_lower = limits[thread_id];
+ size_t ptn_upper = limits[thread_id+1];
+
+ if (!theta_computed)
+ #ifdef KERNEL_FIX_STATES
+ computeLikelihoodBufferSIMD<VectorClass, SAFE_NUMERIC, nstates, FMA, SITE_MODEL>(dad_branch, dad, ptn_lower, ptn_upper, thread_id);
+ #else
+ computeLikelihoodBufferGenericSIMD<VectorClass, SAFE_NUMERIC, FMA, SITE_MODEL>(dad_branch, dad, ptn_lower, ptn_upper, thread_id);
+ #endif
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass lh_ptn;
+ //lh_ptn.load_a(&ptn_invar[ptn]);
+ VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
+ VectorClass df_ptn, ddf_ptn;
+
+ if (SITE_MODEL) {
+ VectorClass* eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ lh_ptn = 0.0; df_ptn = 0.0; ddf_ptn = 0.0;
+ for (c = 0; c < ncat; c++) {
+ VectorClass lh_cat(0.0), df_cat(0.0), ddf_cat(0.0);
+ for (i = 0; i < nstates; i++) {
+ VectorClass cof = eval_ptr[i] * cat_rate[c];
+ VectorClass val = exp(cof*dad_length)*theta[i];
+ VectorClass val1 = cof*val;
+ lh_cat += val;
+ df_cat += val1;
+ ddf_cat = mul_add(cof, val1, ddf_cat);
+ }
+ lh_ptn = mul_add(cat_prop[c], lh_cat, lh_ptn);
+ df_ptn = mul_add(cat_prop[c], df_cat, df_ptn);
+ ddf_ptn = mul_add(cat_prop[c], ddf_cat, ddf_ptn);
+ theta += nstates;
+
+ }
+ } else {
+ #ifdef KERNEL_FIX_STATES
+ dotProductTriple<VectorClass, double, nstates, FMA>(val0, val1, val2, theta, lh_ptn, df_ptn, ddf_ptn, block);
+ #else
+ dotProductTriple<VectorClass, double, FMA>(val0, val1, val2, theta, lh_ptn, df_ptn, ddf_ptn, block, nstates);
+ #endif
+ }
+ lh_ptn = abs(lh_ptn + VectorClass().load_a(&ptn_invar[ptn]));
+
+ if (ptn < orig_nptn) {
+ lh_ptn = 1.0 / lh_ptn;
+ VectorClass df_frac = df_ptn * lh_ptn;
+ VectorClass ddf_frac = ddf_ptn * lh_ptn;
+ VectorClass freq;
+ freq.load_a(&ptn_freq[ptn]);
+ VectorClass tmp1 = df_frac * freq;
+ VectorClass tmp2 = ddf_frac * freq;
+ my_df += tmp1;
+ my_ddf += nmul_add(tmp1, df_frac, tmp2);
+ } else {
+ // ascertainment bias correction
+ if (ptn+VectorClass::size() > nptn) {
+ // cutoff the last entries if going beyond
+ lh_ptn.cutoff(nptn-ptn);
+ df_ptn.cutoff(nptn-ptn);
+ ddf_ptn.cutoff(nptn-ptn);
+ }
+ if (horizontal_or(VectorClass().load_a(&buffer_scale_all[ptn]) != 0.0)) {
+ // some entries are rescaled
+ double *lh_ptn_dbl = (double*)&lh_ptn;
+ double *df_ptn_dbl = (double*)&df_ptn;
+ double *ddf_ptn_dbl = (double*)&ddf_ptn;
+ for (i = 0; i < VectorClass::size(); i++)
+ if (buffer_scale_all[ptn+i] != 0.0) {
+ lh_ptn_dbl[i] *= SCALING_THRESHOLD;
+ df_ptn_dbl[i] *= SCALING_THRESHOLD;
+ ddf_ptn_dbl[i] *= SCALING_THRESHOLD;
+ }
+ }
+
+ vc_prob_const += lh_ptn;
+ vc_df_const += df_ptn;
+ vc_ddf_const += ddf_ptn;
+ }
+ } // FOR ptn
+ #ifdef _OPENMP
+ #pragma omp critical
+ #endif
+ {
+ all_df += my_df;
+ all_ddf += my_ddf;
+ if (isASC) {
+ all_prob_const += vc_prob_const;
+ all_df_const += vc_df_const;
+ all_ddf_const += vc_ddf_const;
+ }
+ }
+ } // FOR thread
+
+ // mark buffer as computed
+ theta_computed = true;
+
+ df = horizontal_add(all_df);
+ ddf = horizontal_add(all_ddf);
+
+ if (!SAFE_NUMERIC && (std::isnan(df) || std::isinf(df)))
+ outError("Numerical underflow (lh-derivative). Run again with the safe likelihood kernel via `-safe` option");
+
+ if (isASC) {
+ double prob_const = 0.0, df_const = 0.0, ddf_const = 0.0;
+ prob_const = horizontal_add(all_prob_const);
+ df_const = horizontal_add(all_df_const);
+ ddf_const = horizontal_add(all_ddf_const);
+ // ascertainment bias correction
+ prob_const = 1.0 - prob_const;
+ double df_frac = df_const / prob_const;
+ double ddf_frac = ddf_const / prob_const;
+ int nsites = aln->getNSite();
+ df += nsites * df_frac;
+ ddf += nsites *(ddf_frac + df_frac*df_frac);
+ }
+
+ if (std::isnan(df) || std::isinf(df)) {
+ cerr << "WARNING: Numerical underflow for lh-derivative" << endl;
+ df = ddf = 0.0;
+ }
+}
+
+
+
+
+/*******************************************************
+ *
+ * NEW! highly-vectorized log-likelihood function
+ *
+ ******************************************************/
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA, const bool SITE_MODEL>
+double PhyloTree::computeLikelihoodBranchSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad)
+#else
+template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA, const bool SITE_MODEL>
+double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad)
+#endif
+{
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ if (!central_partial_lh)
+ initializeAllPartialLh();
+ if (node->isLeaf()) {
+ PhyloNode *tmp_node = dad;
+ dad = node;
+ node = tmp_node;
+ PhyloNeighbor *tmp_nei = dad_branch;
+ dad_branch = node_branch;
+ node_branch = tmp_nei;
+ }
+
+#ifdef KERNEL_FIX_STATES
+ computeTraversalInfo<VectorClass, nstates>(node, dad, false);
+#else
+ computeTraversalInfo<VectorClass>(node, dad, false);
+#endif
+// if ((dad_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(dad_branch, dad);
+// if ((node_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(node_branch, node);
+// double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
+ double tree_lh = 0.0;
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
+ size_t ptn; // for big data size > 4GB memory required
+ size_t c, i;
+ size_t orig_nptn = aln->size();
+ size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size();
+ size_t tip_mem_size = max_orig_nptn * nstates;
+ bool isASC = model_factory->unobserved_ptns.size() > 0;
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
+ double *eval = model->getEigenvalues();
+ assert(eval);
+
+// double *val = aligned_alloc<double>(block);
+ double *val = NULL;
+ double *buffer_partial_lh_ptr = buffer_partial_lh;
+
+
+ double cat_length[ncat];
+ double cat_prop[ncat];
+ if (SITE_MODEL) {
+ for (c = 0; c < ncat; c++) {
+ cat_length[c] = site_rate->getRate(c) * dad_branch->length;
+ cat_prop[c] = site_rate->getProp(c);
+ }
+ } else {
+ val = buffer_partial_lh_ptr;
+ buffer_partial_lh_ptr += get_safe_upper_limit(block);
+ if (nstates % VectorClass::size() == 0) {
+ size_t loop_size = nstates / VectorClass::size();
+ for (c = 0; c < ncat_mix; c++) {
+ size_t mycat = c%ncat;
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ VectorClass *eval_ptr = (VectorClass*)(eval + mix_addr_nstates[c]);
+ double len = site_rate->getRate(mycat)*dad_branch->length;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ VectorClass *this_val = (VectorClass*)(val + c*nstates);
+ for (i = 0; i < loop_size; i++)
+ this_val[i] = exp(eval_ptr[i]*len) * prop;
+ }
+ } else {
+ for (c = 0; c < ncat_mix; c++) {
+ size_t mycat = c%ncat;
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double len = site_rate->getRate(mycat)*dad_branch->length;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ double *this_val = val + c*nstates;
+ for (i = 0; i < nstates; i++)
+ this_val[i] = exp(eval_ptr[i]*len) * prop;
+ }
+ }
+ }
+
+ VectorClass all_tree_lh(0.0);
+ VectorClass all_prob_const(0.0);
+
+ vector<size_t> limits;
+ computeBounds<VectorClass>(num_threads, nptn, limits);
+
+ if (dad->isLeaf()) {
+ // special treatment for TIP-INTERNAL NODE case
+// double *partial_lh_node = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
+ double *partial_lh_node;
+ if (SITE_MODEL)
+ partial_lh_node = &tip_partial_lh[dad->id * tip_mem_size];
+ else {
+ partial_lh_node = buffer_partial_lh_ptr;
+ buffer_partial_lh_ptr += get_safe_upper_limit((aln->STATE_UNKNOWN+1)*block);
+ }
+
+ if (!SITE_MODEL) {
+ IntVector states_dad = aln->seq_states[dad->id];
+ states_dad.push_back(aln->STATE_UNKNOWN);
+ // precompute information from one tip
+ if (nstates % VectorClass::size() == 0) {
+ // vectorized version
+ for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
+ double *lh_node = partial_lh_node + (*it)*block;
+ double *lh_tip = tip_partial_lh + (*it)*tip_block;
+ double *vc_val_tmp = val;
+ for (c = 0; c < ncat_mix; c++) {
+ double *this_lh_tip = lh_tip + mix_addr_nstates[c];
+ for (i = 0; i < nstates; i+=VectorClass::size()) {
+ (VectorClass().load_a(&vc_val_tmp[i]) * VectorClass().load_a(&this_lh_tip[i])).store_a(&lh_node[i]);
+ }
+ lh_node += nstates;
+ vc_val_tmp += nstates;
+ }
+ }
+ } else {
+ // non-vectorized version
+ for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
+ double *lh_node = partial_lh_node +(*it)*block;
+ double *val_tmp = val;
+ double *this_tip_partial_lh = tip_partial_lh + (*it)*tip_block;
+ for (c = 0; c < ncat_mix; c++) {
+ double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
+ for (i = 0; i < nstates; i++) {
+ lh_node[i] = val_tmp[i] * lh_tip[i];
+ }
+ lh_node += nstates;
+ val_tmp += nstates;
+ }
+ }
+ }
+ }
+
+ // now do the real computation
+#ifdef _OPENMP
+#pragma omp parallel for private(ptn, i, c) schedule(static, 1) num_threads(num_threads)
+#endif
+ for (int thread_id = 0; thread_id < num_threads; thread_id++) {
+
+ VectorClass vc_tree_lh(0.0), vc_prob_const(0.0);
+
+ size_t ptn_lower = limits[thread_id];
+ size_t ptn_upper = limits[thread_id+1];
+
+ // reset memory for _pattern_lh_cat
+ memset(_pattern_lh_cat + ptn_lower*ncat_mix, 0, sizeof(double)*(ptn_upper-ptn_lower)*ncat_mix);
+
+ // first compute partial_lh
+ for (vector<TraversalInfo>::iterator it = traversal_info.begin(); it != traversal_info.end(); it++)
+ computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id);
+
+ double *vec_tip = buffer_partial_lh_ptr + block*VectorClass::size()*thread_id;
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass lh_ptn;
+ lh_ptn.load_a(&ptn_invar[ptn]);
+ VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
+ VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass *lh_node = SITE_MODEL ? (VectorClass*)&partial_lh_node[ptn*nstates] : (VectorClass*)vec_tip;
+
+ if (SITE_MODEL) {
+ // site-specific model
+ VectorClass* eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ for (c = 0; c < ncat; c++) {
+ #ifdef KERNEL_FIX_STATES
+ dotProductExp<VectorClass, double, nstates, FMA>(eval_ptr, lh_node, partial_lh_dad, cat_length[c], lh_cat[c]);
+ #else
+ dotProductExp<VectorClass, double, FMA>(eval_ptr, lh_node, partial_lh_dad, cat_length[c], lh_cat[c], nstates);
+ #endif
+ if (SAFE_NUMERIC)
+ lh_cat[c] *= cat_prop[c];
+ else
+ lh_ptn += (lh_cat[c] *= cat_prop[c]);
+
+ partial_lh_dad += nstates;
+ }
+ } else { // normal model
+ //load tip vector
+ for (i = 0; i < VectorClass::size(); i++) {
+ double *lh_tip;
+ if (ptn+i < orig_nptn)
+ lh_tip = partial_lh_node + block*(aln->at(ptn+i))[dad->id];
+ else if (ptn+i < max_orig_nptn)
+ lh_tip = partial_lh_node + block*aln->STATE_UNKNOWN;
+ else if (ptn+i < nptn)
+ lh_tip = partial_lh_node + block*model_factory->unobserved_ptns[ptn+i-max_orig_nptn];
+ else
+ lh_tip = partial_lh_node + block*aln->STATE_UNKNOWN;
+
+ double *this_vec_tip = vec_tip+i;
+ for (c = 0; c < block; c++) {
+ *this_vec_tip = lh_tip[c];
+ this_vec_tip += VectorClass::size();
+ }
+
+ }
+ // compute likelihood per category
+ for (c = 0; c < ncat_mix; c++) {
+ #ifdef KERNEL_FIX_STATES
+ dotProductVec<VectorClass, VectorClass, nstates, FMA>(lh_node, partial_lh_dad, lh_cat[c]);
+ #else
+ dotProductVec<VectorClass, VectorClass, FMA>(lh_node, partial_lh_dad, lh_cat[c], nstates);
+ #endif
+ if (!SAFE_NUMERIC)
+ lh_ptn += lh_cat[c];
+ lh_node += nstates;
+ partial_lh_dad += nstates;
+ }
+ } // if SITE_MODEL
+
+ // compute scaling factor per pattern
+ VectorClass vc_min_scale(0.0);
+ double* vc_min_scale_ptr = (double*)&vc_min_scale;
+ if (SAFE_NUMERIC) {
+ // numerical scaling per category
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ UBYTE min_scale;
+ for (i = 0; i < VectorClass::size(); i++) {
+ // scale_dad = dad_branch->scale_num+(ptn+i)*ncat_mix;
+ min_scale = scale_dad[0];
+ for (c = 1; c < ncat_mix; c++)
+ min_scale = min(min_scale, scale_dad[c]);
+
+ vc_min_scale_ptr[i] = min_scale;
+
+ double *this_lh_cat = &_pattern_lh_cat[ptn*ncat_mix + i];
+ for (c = 0; c < ncat_mix; c++) {
+ // rescale lh_cat if neccessary
+ if (scale_dad[c] == min_scale+1) {
+ this_lh_cat[c*VectorClass::size()] *= SCALING_THRESHOLD;
+ } else if (scale_dad[c] > min_scale+1) {
+ this_lh_cat[c*VectorClass::size()] = 0.0;
+ }
+ }
+ scale_dad += ncat_mix;
+ }
+ // now take the sum of (rescaled) lh_cat
+ sumVec<VectorClass, true>(lh_cat, lh_ptn, ncat_mix);
+
+ } else {
+ for (i = 0; i < VectorClass::size(); i++) {
+ vc_min_scale_ptr[i] = dad_branch->scale_num[ptn+i];
+ }
+ }
+ vc_min_scale *= LOG_SCALING_THRESHOLD;
+
+ lh_ptn = abs(lh_ptn);
+ if (ptn < orig_nptn) {
+ lh_ptn = log(lh_ptn) + vc_min_scale;
+ lh_ptn.store_a(&_pattern_lh[ptn]);
+ vc_tree_lh = mul_add(lh_ptn, VectorClass().load_a(&ptn_freq[ptn]), vc_tree_lh);
+ } else {
+ // ascertainment bias correction
+ if (ptn+VectorClass::size() > nptn) {
+ // cutoff the last entries if going beyond
+ lh_ptn.cutoff(nptn-ptn);
+ }
+ // bugfix 2016-01-21, prob_const can be rescaled
+ if (horizontal_or(vc_min_scale != 0.0)) {
+ // some entries are rescaled
+ double *lh_ptn_dbl = (double*)&lh_ptn;
+ for (i = 0; i < VectorClass::size(); i++)
+ if (vc_min_scale_ptr[i] != 0.0)
+ lh_ptn_dbl[i] *= SCALING_THRESHOLD;
+ }
+ vc_prob_const += lh_ptn;
+ }
+ } // FOR PTN
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+ {
+ all_tree_lh += vc_tree_lh;
+ if (isASC)
+ all_prob_const += vc_prob_const;
+ }
+ } // FOR thread
+
+ } else {
+
+// assert(0 && "Don't compute tree log-likelihood from internal branch!");
+ //-------- both dad and node are internal nodes -----------/
+
+#ifdef _OPENMP
+#pragma omp parallel for private(ptn, i, c) schedule(static, 1) num_threads(num_threads)
+#endif
+ for (int thread_id = 0; thread_id < num_threads; thread_id++) {
+
+ size_t ptn_lower = limits[thread_id];
+ size_t ptn_upper = limits[thread_id+1];
+
+ VectorClass vc_tree_lh(0.0), vc_prob_const(0.0);
+
+ // reset memory for _pattern_lh_cat
+ memset(_pattern_lh_cat + ptn_lower*ncat_mix, 0, sizeof(double)*(ptn_upper-ptn_lower)*ncat_mix);
+
+ // first compute partial_lh
+ for (vector<TraversalInfo>::iterator it = traversal_info.begin(); it != traversal_info.end(); it++)
+ computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id);
+
+ for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
+ VectorClass lh_ptn;
+ lh_ptn.load_a(&ptn_invar[ptn]);
+ VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
+ VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
+ VectorClass *partial_lh_node = (VectorClass*)(node_branch->partial_lh + ptn*block);
+
+ // compute likelihood per category
+ if (SITE_MODEL) {
+ VectorClass* eval_ptr = (VectorClass*) &eval[ptn*nstates];
+ for (c = 0; c < ncat; c++) {
+ #ifdef KERNEL_FIX_STATES
+ dotProductExp<VectorClass, double, nstates, FMA>(eval_ptr, partial_lh_node, partial_lh_dad, cat_length[c], lh_cat[c]);
+ #else
+ dotProductExp<VectorClass, double, FMA>(eval_ptr, partial_lh_node, partial_lh_dad, cat_length[c], lh_cat[c], nstates);
+ #endif
+ if (SAFE_NUMERIC)
+ lh_cat[c] *= cat_prop[c];
+ else
+ lh_ptn += (lh_cat[c] *= cat_prop[c]);
+ partial_lh_node += nstates;
+ partial_lh_dad += nstates;
+ }
+ } else {
+ double *val_tmp = val;
+ for (c = 0; c < ncat_mix; c++) {
+ #ifdef KERNEL_FIX_STATES
+ dotProduct3Vec<VectorClass, double, nstates, FMA>(val_tmp, partial_lh_node, partial_lh_dad, lh_cat[c]);
+ #else
+ dotProduct3Vec<VectorClass, double, FMA>(val_tmp, partial_lh_node, partial_lh_dad, lh_cat[c], nstates);
+ #endif
+ if (!SAFE_NUMERIC)
+ lh_ptn += lh_cat[c];
+ partial_lh_node += nstates;
+ partial_lh_dad += nstates;
+ val_tmp += nstates;
+ }
+ } // if SITE MODEL
+
+
+ // compute the scaling factor per pattern
+ VectorClass vc_min_scale(0.0);
+ double* vc_min_scale_ptr = (double*)&vc_min_scale;
+ if (SAFE_NUMERIC) {
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ UBYTE *scale_node = node_branch->scale_num + ptn*ncat_mix;
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale;
+
+ for (i = 0; i < VectorClass::size(); i++) {
+ min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ vc_min_scale_ptr[i] = min_scale;
+ double *this_lh_cat = &_pattern_lh_cat[ptn*ncat_mix + i];
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] == min_scale+1) {
+ this_lh_cat[c*VectorClass::size()] *= SCALING_THRESHOLD;
+ } else if (sum_scale[c] > min_scale+1) {
+ // reset if category is scaled a lot
+ this_lh_cat[c*VectorClass::size()] = 0.0;
+ }
+ }
+ scale_dad += ncat_mix;
+ scale_node += ncat_mix;
+ }
+ sumVec<VectorClass, true>(lh_cat, lh_ptn, ncat_mix);
+ } else {
+ for (i = 0; i < VectorClass::size(); i++) {
+ vc_min_scale_ptr[i] = dad_branch->scale_num[ptn+i] + node_branch->scale_num[ptn+i];
+ }
+ } // if SAFE_NUMERIC
+ vc_min_scale *= LOG_SCALING_THRESHOLD;
+
+ lh_ptn = abs(lh_ptn);
+
+ if (ptn < orig_nptn) {
+ lh_ptn = log(lh_ptn) + vc_min_scale;
+ lh_ptn.store_a(&_pattern_lh[ptn]);
+ vc_tree_lh = mul_add(lh_ptn, VectorClass().load_a(&ptn_freq[ptn]), vc_tree_lh);
+ } else {
+ // ascertainment bias correction
+ if (ptn+VectorClass::size() > nptn) {
+ // cutoff the last entries if going beyond
+ lh_ptn.cutoff(nptn-ptn);
+ }
+ // bugfix 2016-01-21, prob_const can be rescaled
+ if (horizontal_or(vc_min_scale != 0.0)) {
+ // some entries are rescaled
+ double *lh_ptn_dbl = (double*)&lh_ptn;
+ for (i = 0; i < VectorClass::size(); i++)
+ if (vc_min_scale_ptr[i] != 0.0)
+ lh_ptn_dbl[i] *= SCALING_THRESHOLD;
+ }
+ vc_prob_const += lh_ptn;
+ }
+ } // FOR LOOP ptn
+#ifdef _OPENMP
+#pragma omp critical
+#endif
+ {
+ all_tree_lh += vc_tree_lh;
+ if (isASC)
+ all_prob_const += vc_prob_const;
+ }
+ } // FOR thread
+ } // else
+
+ tree_lh += horizontal_add(all_tree_lh);
+
+ if (!SAFE_NUMERIC && (std::isnan(tree_lh) || std::isinf(tree_lh)))
+ outError("Numerical underflow (lh-branch). Run again with the safe likelihood kernel via `-safe` option");
+
+ assert(!std::isnan(tree_lh) && !std::isinf(tree_lh) && "Numerical underflow for lh-branch");
+
+ if (isASC) {
+ // ascertainment bias correction
+ double prob_const = horizontal_add(all_prob_const);
+ if (prob_const >= 1.0 || prob_const < 0.0) {
+ printTree(cout, WT_TAXON_ID + WT_BR_LEN + WT_NEWLINE);
+ model->writeInfo(cout);
+ }
+ assert(prob_const < 1.0 && prob_const >= 0.0);
+
+ // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
+// double inv_const = 1.0 / (1.0-prob_const);
+// size_t nptn_cat = orig_nptn*ncat;
+// for (ptn = 0; ptn < nptn_cat; ptn++)
+// _pattern_lh_cat[ptn] *= inv_const;
+
+ prob_const = log(1.0 - prob_const);
+ for (ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size())
+ (VectorClass().load_a(&_pattern_lh[ptn])-prob_const).store_a(&_pattern_lh[ptn]);
+// _pattern_lh[ptn] -= prob_const;
+ tree_lh -= aln->getNSite()*prob_const;
+ assert(!std::isnan(tree_lh) && !std::isinf(tree_lh));
+ }
+
+ return tree_lh;
+}
+
+
+/*******************************************************
+ *
+ * NEW! highly-vectorized log-likelihood from buffer
+ *
+ ******************************************************/
+
+#ifdef KERNEL_FIX_STATES
+template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA, const bool SITE_MODEL>
+double PhyloTree::computeLikelihoodFromBufferSIMD()
+#else
+template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA, const bool SITE_MODEL>
+double PhyloTree::computeLikelihoodFromBufferGenericSIMD()
+#endif
+{
+
+ assert(theta_all && theta_computed);
+
+// double tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
+
+#ifndef KERNEL_FIX_STATES
+ size_t nstates = aln->num_states;
+#endif
+ size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+
+ size_t block = ncat_mix * nstates;
+// size_t tip_block = nstates * model->getNMixtures();
+ size_t ptn; // for big data size > 4GB memory required
+ size_t c, i;
+ size_t orig_nptn = aln->size();
+ size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size();
+ size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size();
+ bool isASC = model_factory->unobserved_ptns.size() > 0;
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+
+ double *eval = model->getEigenvalues();
+ assert(eval);
+
+ double *val0 = NULL;
+ double cat_length[ncat];
+ double cat_prop[ncat];
+
+ if (SITE_MODEL) {
+ for (c = 0; c < ncat; c++) {
+ cat_length[c] = site_rate->getRate(c) * current_it->length;
+ cat_prop[c] = site_rate->getProp(c);
+ }
+ } else {
+ val0 = buffer_partial_lh;
+ if (nstates % VectorClass::size() == 0) {
+ VectorClass *vc_val0 = (VectorClass*)val0;
+ size_t loop_size = nstates / VectorClass::size();
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ VectorClass *eval_ptr = (VectorClass*)(eval + mix_addr_nstates[c]);
+ size_t mycat = c%ncat;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ double len = site_rate->getRate(mycat) * current_it->length;
+ for (i = 0; i < loop_size; i++) {
+ vc_val0[i] = exp(eval_ptr[i] * len) * prop;
+ }
+ vc_val0 += loop_size;
+ }
+ } else {
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ size_t mycat = c%ncat;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ size_t addr = c*nstates;
+ for (i = 0; i < nstates; i++) {
+ double cof = eval_ptr[i]*site_rate->getRate(mycat);
+ double val = exp(cof*current_it->length) * prop;
+ val0[addr+i] = val;
+ }
+ }
+ }
+ }
+
+// double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
+
+ VectorClass all_tree_lh(0.0), all_prob_const(0.0);
+
+#ifdef _OPENMP
+#pragma omp parallel private(ptn, i, c) num_threads(num_threads)
+ {
+#endif
+ VectorClass vc_tree_lh(0.0), vc_prob_const(0.0);
+#ifdef _OPENMP
+#pragma omp for schedule(static) nowait
+#endif
+ for (ptn = 0; ptn < nptn; ptn+=VectorClass::size()) {
+ VectorClass lh_ptn;
+ VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
+ if (SITE_MODEL) {
+ VectorClass *eval_ptr = (VectorClass*)&eval[ptn*nstates];
+ lh_ptn.load_a(&ptn_invar[ptn]);
+ for (c = 0; c < ncat; c++) {
+ VectorClass lh_cat;
+#ifdef KERNEL_FIX_STATES
+ dotProductExp<VectorClass, double, nstates, FMA>(eval_ptr, theta, cat_length[c], lh_cat);
+#else
+ dotProductExp<VectorClass, double, FMA>(eval_ptr, theta, cat_length[c], lh_cat, nstates);
+#endif
+ lh_ptn = mul_add(lh_cat, cat_prop[c], lh_ptn);
+ theta += nstates;
+ }
+ } else {
+ dotProductVec<VectorClass, double, FMA>(val0, theta, lh_ptn, block);
+ lh_ptn += VectorClass().load_a(&ptn_invar[ptn]);
+ }
+
+ if (ptn < orig_nptn) {
+ lh_ptn = log(abs(lh_ptn)) + VectorClass().load_a(&buffer_scale_all[ptn]);
+ lh_ptn.store_a(&_pattern_lh[ptn]);
+ vc_tree_lh = mul_add(lh_ptn, VectorClass().load_a(&ptn_freq[ptn]), vc_tree_lh);
+ } else {
+ // bugfix 2016-01-21, prob_const can be rescaled
+// if (min_scale >= 1)
+// lh_ptn *= SCALING_THRESHOLD;
+// _pattern_lh[ptn] = lh_ptn;
+ // ascertainment bias correction
+ if (ptn+VectorClass::size() > nptn) {
+ // cutoff the last entries if going beyond
+ lh_ptn.cutoff(nptn-ptn);
+ }
+ if (horizontal_or(VectorClass().load_a(&buffer_scale_all[ptn]) != 0.0)) {
+ // some entries are rescaled
+ double *lh_ptn_dbl = (double*)&lh_ptn;
+ for (i = 0; i < VectorClass::size(); i++)
+ if (buffer_scale_all[ptn+i] != 0.0)
+ lh_ptn_dbl[i] *= SCALING_THRESHOLD;
+ }
+ vc_prob_const += lh_ptn;
+ }
+ }
+#ifdef _OPENMP
+#pragma omp critical
+ {
+ all_tree_lh += vc_tree_lh;
+ if (isASC)
+ all_prob_const += vc_prob_const;
+ }
+ }
+#else
+ all_tree_lh = vc_tree_lh;
+ all_prob_const = vc_prob_const;
+#endif
+
+ double tree_lh = horizontal_add(all_tree_lh);
+
+ if (!SAFE_NUMERIC && (std::isnan(tree_lh) || std::isinf(tree_lh)))
+ outError("Numerical underflow (lh-from-buffer). Run again with the safe likelihood kernel via `-safe` option");
+
+ assert(!std::isnan(tree_lh) && !std::isinf(tree_lh) && "Numerical underflow for lh-from-buffer");
+
+ if (isASC) {
+ // ascertainment bias correction
+ double prob_const = horizontal_add(all_prob_const);
+ if (prob_const >= 1.0 || prob_const < 0.0) {
+ printTree(cout, WT_TAXON_ID + WT_BR_LEN + WT_NEWLINE);
+ model->writeInfo(cout);
+ }
+ assert(prob_const < 1.0 && prob_const >= 0.0);
+
+ // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
+// double inv_const = 1.0 / (1.0-prob_const);
+// size_t nptn_cat = orig_nptn*ncat;
+// for (ptn = 0; ptn < nptn_cat; ptn++)
+// _pattern_lh_cat[ptn] *= inv_const;
+
+ prob_const = log(1.0 - prob_const);
+ for (ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size())
+ (VectorClass().load_a(&_pattern_lh[ptn])-prob_const).store_a(&_pattern_lh[ptn]);
+// _pattern_lh[ptn] -= prob_const;
+ tree_lh -= aln->getNSite()*prob_const;
+ assert(!std::isnan(tree_lh) && !std::isinf(tree_lh));
+ }
+
+ return tree_lh;
+}
+
+
+#endif //PHYLOKERNELNEW_H_
diff --git a/phylokernel.h b/phylokernelsafe.h
similarity index 52%
copy from phylokernel.h
copy to phylokernelsafe.h
index e0b9f7c..b378fbb 100644
--- a/phylokernel.h
+++ b/phylokernelsafe.h
@@ -1,17 +1,21 @@
/*
- * phylokernel.h
+ * phylokernelsafe.h
+ * Safe likelihood kernel that scales likelihood per category
*
- * Created on: Dec 14, 2014
+ * Created on: Sept 23, 2016
* Author: minh
*/
-#ifndef PHYLOKERNEL_H_
-#define PHYLOKERNEL_H_
+#ifndef PHYLOKERNELSAFE_H_
+#define PHYLOKERNELSAFE_H_
#include "phylotree.h"
-#include "vectorclass/vectorclass.h"
-#include "vectorclass/vectormath_exp.h"
+//#include "vectorclass/vectorclass.h"
+//#include "vectorclass/vectormath_exp.h"
+#include "superalignment.h"
+
+#ifdef __SSE__
inline Vec2d horizontal_add(Vec2d x[2]) {
#if INSTRSET >= 3 // SSE3
return _mm_hadd_pd(x[0],x[1]);
@@ -20,7 +24,7 @@ inline Vec2d horizontal_add(Vec2d x[2]) {
Vec2d help1 = _mm_shuffle_pd(x[0], x[1], _MM_SHUFFLE2(1,1));
return _mm_add_pd(help0, help1);
#else
-#error "You must compile with SSE3 enabled!"
+#error "You must compile with SSE2 enabled!"
#endif
}
@@ -30,6 +34,8 @@ inline double horizontal_max(Vec2d const &a) {
return max(x[0],x[1]);
}
+#endif
+
#ifdef __AVX__
inline Vec4d horizontal_add(Vec4d x[4]) {
@@ -74,12 +80,6 @@ Numeric PhyloTree::dotProductSIMD(Numeric *x, Numeric *y, int size) {
template <class VectorClass, const int VCSIZE, const int nstates>
void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- if (dad_branch->node->degree() > 3) {
- // TODO: SIMD version for multifurcating node
- computePartialLikelihoodEigen(dad_branch, dad);
- return;
- }
-
// don't recompute the likelihood
assert(dad);
if (dad_branch->partial_lh_computed & 1)
@@ -101,33 +101,39 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
size_t ptn, c;
- size_t orig_ntn = aln->size();
+ size_t orig_nptn = aln->size();
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
assert(nstates == aln->num_states && nstates >= VCSIZE && VCSIZE == VectorClass().size());
assert(model->isReversible()); // only works with reversible model!
const size_t nstatesqr=nstates*nstates;
size_t i, x, j;
- size_t block = nstates * ncat;
+ size_t block = nstates * ncat_mix;
+ size_t tip_block = nstates * model->getNMixtures();
+ size_t scale_size = nptn * ncat_mix;
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = m*nstatesqr;
+ }
// internal node
- assert(node->degree() == 3); // it works only for strictly bifurcating tree
+ dad_branch->lh_scale_factor = 0.0;
PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
+ int num_leaves = 0;
FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *nei = (PhyloNeighbor*)*it;
if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
+ if ((nei->partial_lh_computed & 1) == 0)
+ computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(nei, node);
+ dad_branch->lh_scale_factor += nei->lh_scale_factor;
+ if ((*it)->node->isLeaf()) num_leaves++;
}
- if (!left->node->isLeaf() && right->node->isLeaf()) {
- // swap left and right
- PhyloNeighbor *tmp = left;
- left = right;
- right = tmp;
- }
- if ((left->partial_lh_computed & 1) == 0)
- computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(left, node);
- if ((right->partial_lh_computed & 1) == 0)
- computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(right, node);
-
if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
// re-orient partial_lh
bool done = false;
@@ -149,93 +155,181 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
double *evec = model->getEigenvectors();
double *inv_evec = model->getInverseEigenvectors();
- VectorClass vc_inv_evec[nstates*nstates/VCSIZE];
assert(inv_evec && evec);
- for (i = 0; i < nstates; i++) {
- for (x = 0; x < nstates/VCSIZE; x++)
- // inv_evec is not aligned!
- vc_inv_evec[i*nstates/VCSIZE+x].load_a(&inv_evec[i*nstates+x*VCSIZE]);
- }
+// for (i = 0; i < tip_block; i++) {
+// for (x = 0; x < nstates/VCSIZE; x++)
+// // inv_evec is not aligned!
+// vc_inv_evec[i*nstates/VCSIZE+x].load_a(&inv_evec[i*nstates+x*VCSIZE]);
+// }
double *eval = model->getEigenvalues();
- dad_branch->lh_scale_factor = left->lh_scale_factor + right->lh_scale_factor;
- VectorClass *eleft = (VectorClass*)aligned_alloc<double>(block*nstates);
- VectorClass *eright = (VectorClass*)aligned_alloc<double>(block*nstates);
+ VectorClass *echildren = aligned_alloc<VectorClass>(block*nstates/VCSIZE*(node->degree()-1));
+ double *partial_lh_leaves = NULL;
+ if (num_leaves > 0)
+ partial_lh_leaves = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block*num_leaves);
+ VectorClass *echild = echildren;
+ double *partial_lh_leaf = partial_lh_leaves;
+
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ VectorClass expchild[nstates/VCSIZE];
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ VectorClass *echild_ptr = echild;
+ // precompute information buffer
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass len_child = site_rate->getRate(c%ncat) * child->length;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double *evec_ptr = evec + mix_addr[c];
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ // eval is not aligned!
+ expchild[i] = exp(VectorClass().load_a(&eval_ptr[i*VCSIZE]) * len_child);
+ }
+ for (x = 0; x < nstates; x++) {
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ // evec is not be aligned!
+ echild_ptr[i] = (VectorClass().load_a(&evec_ptr[x*nstates+i*VCSIZE]) * expchild[i]);
+ }
+ echild_ptr += nstates/VCSIZE;
+ }
+ }
- // precompute information buffer
- for (c = 0; c < ncat; c++) {
- VectorClass vc_evec;
- VectorClass expleft[nstates/VCSIZE];
- VectorClass expright[nstates/VCSIZE];
- double len_left = site_rate->getRate(c) * left->length;
- double len_right = site_rate->getRate(c) * right->length;
- for (i = 0; i < nstates/VCSIZE; i++) {
- // eval is not aligned!
- expleft[i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * VectorClass(len_left));
- expright[i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * VectorClass(len_right));
- }
- for (x = 0; x < nstates; x++)
- for (i = 0; i < nstates/VCSIZE; i++) {
- // evec is not be aligned!
- vc_evec.load_a(&evec[x*nstates+i*VCSIZE]);
- eleft[c*nstatesqr/VCSIZE+x*nstates/VCSIZE+i] = (vc_evec * expleft[i]);
- eright[c*nstatesqr/VCSIZE+x*nstates/VCSIZE+i] = (vc_evec * expright[i]);
- }
+ // pre compute information for tip
+ if (child->node->isLeaf()) {
+ vector<int>::iterator it;
+ for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+ int state = (*it);
+ double *this_partial_lh_leaf = partial_lh_leaf + state*block;
+ VectorClass *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *this_tip_partial_lh = (VectorClass*)(tip_partial_lh + state*tip_block + mix_addr_nstates[c]);
+ for (x = 0; x < nstates; x++) {
+ VectorClass vchild = 0.0;
+ for (i = 0; i < nstates/VCSIZE; i++) {
+ vchild += echild_ptr[i] * this_tip_partial_lh[i];
+ }
+ this_partial_lh_leaf[x] = horizontal_add(vchild);
+ echild_ptr += nstates/VCSIZE;
+ }
+ this_partial_lh_leaf += nstates;
+ }
+ }
+ size_t addr = aln->STATE_UNKNOWN * block;
+ for (x = 0; x < block; x++) {
+ partial_lh_leaf[addr+x] = 1.0;
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ }
+ echild += block*nstates/VCSIZE;
+ }
+
+ VectorClass *eleft = echildren, *eright = echildren + block*nstates/VCSIZE;
+
+ if (!left->node->isLeaf() && right->node->isLeaf()) {
+ PhyloNeighbor *tmp = left;
+ left = right;
+ right = tmp;
+ VectorClass *etmp = eleft;
+ eleft = eright;
+ eright = etmp;
}
+
+
+ if (node->degree() > 3) {
- if (left->node->isLeaf() && right->node->isLeaf()) {
- // special treatment for TIP-TIP (cherry) case
-
- // pre compute information for both tips
- double *partial_lh_left = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
- double *partial_lh_right = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
-
- vector<int>::iterator it;
- for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
- int state = (*it);
- VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
- VectorClass vleft[VCSIZE];
- size_t addr = state*nstates;
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_partial_lh_tmp[i].load_a(&tip_partial_lh[addr+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- addr = x*nstates/VCSIZE;
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = eleft[addr+j*nstates/VCSIZE] * vc_partial_lh_tmp[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = mul_add(eleft[addr+j*nstates/VCSIZE+i], vc_partial_lh_tmp[i], vleft[j]);
- }
- horizontal_add(vleft).store_a(&partial_lh_left[state*block+x]);
- }
- }
+ /*--------------------- multifurcating node ------------------*/
+ // now for-loop computing partial_lh over all site-patterns
+#ifdef _OPENMP
+#pragma omp parallel for private(ptn, c, x, i) schedule(static)
+#endif
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double partial_lh_all[block];
+ for (i = 0; i < block; i++)
+ partial_lh_all[i] = 1.0;
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ memset(scale_dad, 0, sizeof(UBYTE)*ncat_mix);
+
+ double *partial_lh_leaf = partial_lh_leaves;
+ double *echild = (double*)echildren;
+
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ UBYTE *scale_child = child->scale_num + ptn*ncat_mix;
+ if (child->node->isLeaf()) {
+ // external node
+ int state_child = (ptn < orig_nptn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
+ double *child_lh = partial_lh_leaf + state_child*block;
+ for (c = 0; c < block; c++) {
+ // compute real partial likelihood vector
+ partial_lh_all[c] *= child_lh[c];
+ }
+ partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+ } else {
+ // internal node
+ double *partial_lh = partial_lh_all;
+ double *partial_lh_child = child->partial_lh + ptn*block;
+
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ scale_dad[c] += scale_child[c];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ double vchild = 0.0;
+// double *echild_ptr = echild + (c*nstatesqr+x*nstates);
+ for (i = 0; i < nstates; i++) {
+ vchild += echild_ptr[i] * partial_lh_child[i];
+ }
+ echild_ptr += nstates;
+ partial_lh[x] *= vchild;
+ }
+ partial_lh += nstates;
+ partial_lh_child += nstates;
+ }
+ } // if
+ echild += block*nstates;
+ } // FOR_NEIGHBOR
+
+
+ // compute dot-product with inv_eigenvector
+ double *partial_lh_tmp = partial_lh_all;
+ double *partial_lh = dad_branch->partial_lh + ptn*block;
+ for (c = 0; c < ncat_mix; c++) {
+ double lh_max = 0.0;
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
+ for (i = 0; i < nstates; i++) {
+ double res = 0.0;
+ for (x = 0; x < nstates; x++) {
+ res += partial_lh_tmp[x]*inv_evec_ptr[x];
+ }
+ inv_evec_ptr += nstates;
+ partial_lh[i] = res;
+ lh_max = max(lh_max, fabs(res));
+ }
+ // check if one should scale partial likelihoods
+ if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
+ if (ptn_invar[ptn] == 0.0) {
+ // now do the likelihood scaling
+ for (i = 0; i < nstates; i++)
+ partial_lh[i] *= SCALING_THRESHOLD_INVER;
+ scale_dad[c] += 1;
+ }
+ }
+ partial_lh += nstates;
+ partial_lh_tmp += nstates;
+ }
- for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
- int state = (*it);
- VectorClass vright[VCSIZE];
- VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
+ } // for ptn
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_partial_lh_tmp[i].load_a(&tip_partial_lh[state*nstates+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- for (j = 0; j < VCSIZE; j++)
- vright[j] = eright[(x+j)*nstates/VCSIZE] * vc_partial_lh_tmp[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vright[j] = mul_add(eright[(x+j)*nstates/VCSIZE+i], vc_partial_lh_tmp[i], vright[j]);
- }
- horizontal_add(vright).store_a(&partial_lh_right[state*block+x]);
- }
- }
+ // end multifurcating treatment
+ } else if (left->node->isLeaf() && right->node->isLeaf()) {
+ // special treatment for TIP-TIP (cherry) case
- size_t addr_unknown = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_left[addr_unknown+x] = 1.0;
- partial_lh_right[addr_unknown+x] = 1.0;
- }
+ // pre compute information for both tips
+ double *partial_lh_left = partial_lh_leaves;
+ double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
// assign pointers for left and right partial_lh
+ /*
double **lh_left_ptr = aligned_alloc<double*>(nptn);
double **lh_right_ptr = aligned_alloc<double*>(nptn);
for (ptn = 0; ptn < orig_ntn; ptn++) {
@@ -246,9 +340,10 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
lh_right_ptr[ptn] = &partial_lh_right[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
}
+ */
// scale number must be ZERO
- memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
+ memset(dad_branch->scale_num, 0, scale_size * sizeof(UBYTE));
VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
VectorClass res[VCSIZE];
@@ -258,9 +353,17 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *lh_left = lh_left_ptr[ptn];
- double *lh_right = lh_right_ptr[ptn];
- for (c = 0; c < ncat; c++) {
+ double *lh_left;
+ double *lh_right;
+ if (ptn < orig_nptn) {
+ lh_left = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+ lh_right = &partial_lh_right[block * (aln->at(ptn))[right->node->id]];
+ } else {
+ lh_left = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ lh_right = &partial_lh_right[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ }
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (x = 0; x < nstates/VCSIZE; x++) {
@@ -269,11 +372,11 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++)
for (j = 0; j < VCSIZE; j++) {
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
}
horizontal_add(res).store_a(&partial_lh[i]);
}
@@ -284,43 +387,19 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
}
}
- aligned_free(lh_left_ptr);
- aligned_free(lh_right_ptr);
- aligned_free(partial_lh_right);
- aligned_free(partial_lh_left);
+ //aligned_free(lh_right_ptr);
+ //aligned_free(lh_left_ptr);
} else if (left->node->isLeaf() && !right->node->isLeaf()) {
// special treatment to TIP-INTERNAL NODE case
// only take scale_num from the right subtree
- memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
+ memcpy(dad_branch->scale_num, right->scale_num, scale_size * sizeof(UBYTE));
// pre compute information for left tip
- double *partial_lh_left = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
+ double *partial_lh_left = partial_lh_leaves;
- vector<int>::iterator it;
- for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
- int state = (*it);
- VectorClass vc_tip_lh[nstates/VCSIZE];
- VectorClass vleft[VCSIZE];
- for (i = 0; i < nstates/VCSIZE; i++)
- vc_tip_lh[i].load_a(&tip_partial_lh[state*nstates+i*VCSIZE]);
- for (x = 0; x < block; x+=VCSIZE) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = eleft[(x+j)*nstates/VCSIZE] * vc_tip_lh[0];
- for (i = 1; i < nstates/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++)
- vleft[j] = mul_add(eleft[(x+j)*nstates/VCSIZE+i], vc_tip_lh[i], vleft[j]);
- }
- horizontal_add(vleft).store_a(&partial_lh_left[state*block+x]);
- }
- }
-
- size_t addr_unknown = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_left[addr_unknown+x] = 1.0;
- }
-
// assign pointers for partial_lh_left
+ /*
double **lh_left_ptr = aligned_alloc<double*>(nptn);
for (ptn = 0; ptn < orig_ntn; ptn++) {
lh_left_ptr[ptn] = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
@@ -328,8 +407,7 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
for (ptn = orig_ntn; ptn < nptn; ptn++) {
lh_left_ptr[ptn] = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_ntn]];
}
-
- double sum_scale = 0.0;
+ */
VectorClass vc_lh_right[nstates/VCSIZE];
VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
VectorClass res[VCSIZE];
@@ -337,15 +415,22 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
VectorClass vright[VCSIZE];
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private (ptn, c, x, i, j, vc_lh_right, vc_partial_lh_tmp, res, vc_max, vright)
+#pragma omp parallel for private (ptn, c, x, i, j, vc_lh_right, vc_partial_lh_tmp, res, vc_max, vright)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh = dad_branch->partial_lh + ptn*block;
double *partial_lh_right = right->partial_lh + ptn*block;
- double *lh_left = lh_left_ptr[ptn];
- vc_max = 0.0;
- for (c = 0; c < ncat; c++) {
+ double *lh_left;
+ if (ptn < orig_nptn) {
+ lh_left = &partial_lh_left[block * (aln->at(ptn))[left->node->id]];
+ } else {
+ lh_left = &partial_lh_left[block * model_factory->unobserved_ptns[ptn-orig_nptn]];
+ }
+
+ for (c = 0; c < ncat_mix; c++) {
+ vc_max = 0.0;
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (i = 0; i < nstates/VCSIZE; i++)
vc_lh_right[i].load_a(&partial_lh_right[i*VCSIZE]);
@@ -365,46 +450,37 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
}
}
VectorClass sum_res = horizontal_add(res);
sum_res.store_a(&partial_lh[i]);
vc_max = max(vc_max, abs(sum_res)); // take the maximum for scaling check
}
+ // check if one should scale partial likelihoods
+ double lh_max = horizontal_max(vc_max);
+ if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0 && lh_max != 0.0) {
+ // now do the likelihood scaling
+ VectorClass scale_thres(SCALING_THRESHOLD_INVER);
+ for (i = 0; i < block; i+=VCSIZE) {
+ (VectorClass().load_a(&partial_lh[i]) * scale_thres).store_a(&partial_lh[i]);
+ }
+ dad_branch->scale_num[ptn*ncat_mix+c] += 1;
+ }
lh_left += nstates;
partial_lh_right += nstates;
partial_lh += nstates;
}
- // check if one should scale partial likelihoods
- double lh_max = horizontal_max(vc_max);
- if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0) {
- // now do the likelihood scaling
- partial_lh -= block; // revert its pointer
- VectorClass scale_thres(SCALING_THRESHOLD_INVER);
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&partial_lh[i]) * scale_thres).store_a(&partial_lh[i]);
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- partial_lh += block; // increase the pointer again
- }
}
- dad_branch->lh_scale_factor += sum_scale;
-
- aligned_free(lh_left_ptr);
- aligned_free(partial_lh_left);
} else {
// both left and right are internal node
- double sum_scale = 0.0;
VectorClass vc_max; // maximum of partial likelihood, for scaling check
VectorClass vc_partial_lh_tmp[nstates/VCSIZE];
VectorClass vc_lh_left[nstates/VCSIZE], vc_lh_right[nstates/VCSIZE];
@@ -412,16 +488,20 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
VectorClass vleft[VCSIZE], vright[VCSIZE];
#ifdef _OPENMP
-#pragma omp parallel for reduction (+: sum_scale) private(ptn, c, x, i, j, vc_max, vc_partial_lh_tmp, vc_lh_left, vc_lh_right, res, vleft, vright)
+#pragma omp parallel for private(ptn, c, x, i, j, vc_max, vc_partial_lh_tmp, vc_lh_left, vc_lh_right, res, vleft, vright)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh = dad_branch->partial_lh + ptn*block;
double *partial_lh_left = left->partial_lh + ptn*block;
double *partial_lh_right = right->partial_lh + ptn*block;
-
- dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
- vc_max = 0.0;
- for (c = 0; c < ncat; c++) {
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ UBYTE *scale_left = left->scale_num + ptn*ncat_mix;
+ UBYTE *scale_right = right->scale_num + ptn*ncat_mix;
+
+ for (c = 0; c < ncat_mix; c++) {
+ scale_dad[c] = scale_left[c] + scale_right[c];
+ vc_max = 0.0;
+ VectorClass *vc_inv_evec_ptr = (VectorClass*)(inv_evec + mix_addr[c]);
// compute real partial likelihood vector
for (i = 0; i < nstates/VCSIZE; i++) {
vc_lh_left[i].load_a(&partial_lh_left[i*VCSIZE]);
@@ -447,43 +527,40 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
// compute dot-product with inv_eigenvector
for (i = 0; i < nstates; i+=VCSIZE) {
for (j = 0; j < VCSIZE; j++) {
- res[j] = vc_partial_lh_tmp[0] * vc_inv_evec[(i+j)*nstates/VCSIZE];
+ res[j] = vc_partial_lh_tmp[0] * vc_inv_evec_ptr[(i+j)*nstates/VCSIZE];
}
for (x = 1; x < nstates/VCSIZE; x++)
for (j = 0; j < VCSIZE; j++)
- res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec[(i+j)*nstates/VCSIZE+x], res[j]);
+ res[j] = mul_add(vc_partial_lh_tmp[x], vc_inv_evec_ptr[(i+j)*nstates/VCSIZE+x], res[j]);
VectorClass sum_res = horizontal_add(res);
sum_res.store_a(&partial_lh[i]);
vc_max = max(vc_max, abs(sum_res)); // take the maximum for scaling check
}
+ // check if one should scale partial likelihoods
+ double lh_max = horizontal_max(vc_max);
+ if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0 && lh_max != 0.0) {
+ // now do the likelihood scaling
+ VectorClass scale_thres(SCALING_THRESHOLD_INVER);
+ for (i = 0; i < block; i+=VCSIZE) {
+ (VectorClass().load_a(&partial_lh[i]) * scale_thres).store_a(&partial_lh[i]);
+ }
+ // unobserved const pattern will never have underflow
+ scale_dad[c] += 1;
+ }
partial_lh += nstates;
partial_lh_left += nstates;
partial_lh_right += nstates;
}
- // check if one should scale partial likelihoods
- double lh_max = horizontal_max(vc_max);
- if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0) {
- // now do the likelihood scaling
- partial_lh -= block; // revert its pointer
- VectorClass scale_thres(SCALING_THRESHOLD_INVER);
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&partial_lh[i]) * scale_thres).store_a(&partial_lh[i]);
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- partial_lh += block; // increase the pointer again
- }
}
- dad_branch->lh_scale_factor += sum_scale;
}
- aligned_free(eright);
- aligned_free(eleft);
+ if (partial_lh_leaves)
+ aligned_free(partial_lh_leaves);
+ aligned_free(echildren);
}
template <class VectorClass, const int VCSIZE, const int nstates>
@@ -506,14 +583,19 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(node_branch, node);
df = ddf = 0.0;
size_t ncat = site_rate->getNRate();
-
- size_t block = ncat * nstates;
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
size_t maxptn = ((nptn+VCSIZE-1)/VCSIZE)*VCSIZE;
maxptn = max(maxptn, aln->size()+((model_factory->unobserved_ptns.size()+VCSIZE-1)/VCSIZE)*VCSIZE);
+
+ size_t mix_addr_nstates[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
double *eval = model->getEigenvalues();
assert(eval);
@@ -522,11 +604,15 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
VectorClass *vc_val2 = (VectorClass*)aligned_alloc<double>(block);
VectorClass vc_len = dad_branch->length;
- for (c = 0; c < ncat; c++) {
- VectorClass vc_rate = site_rate->getRate(c);
- VectorClass vc_prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ size_t mycat = c%ncat;
+ double *eval_ptr = eval + m*nstates;
+ VectorClass vc_rate = site_rate->getRate(mycat);
+ VectorClass vc_prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
for (i = 0; i < nstates/VCSIZE; i++) {
- VectorClass cof = VectorClass().load_a(&eval[i*VCSIZE]) * vc_rate;
+ VectorClass cof = VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_rate;
VectorClass val = exp(cof*vc_len) * vc_prop;
VectorClass val1_ = cof*val;
vc_val0[c*nstates/VCSIZE+i] = val;
@@ -538,41 +624,79 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
assert(theta_all);
if (!theta_computed) {
theta_computed = true;
+ double scale_all = 0.0;
// precompute theta for fast branch length optimization
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
#ifdef _OPENMP
-#pragma omp parallel for private(ptn, i)
+#pragma omp parallel for private(ptn, i, c) reduction(+: scale_all)
#endif
- for (ptn = 0; ptn < orig_nptn; ptn++) {
+ for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+ UBYTE *scale_dad = dad_branch->scale_num+ptn*ncat_mix;
double *theta = theta_all + ptn*block;
- double *lh_dad = &tip_partial_lh[(aln->at(ptn))[dad->id] * nstates];
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&lh_dad[i%nstates]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
- }
- }
- // ascertainment bias correction
- for (ptn = orig_nptn; ptn < nptn; ptn++) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *theta = theta_all + ptn*block;
- double *lh_dad = &tip_partial_lh[model_factory->unobserved_ptns[ptn-orig_nptn] * nstates];
- for (i = 0; i < block; i+=VCSIZE) {
- (VectorClass().load_a(&lh_dad[i%nstates]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
- }
+ double *this_tip_partial_lh = tip_partial_lh + tip_block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
+ UBYTE min_scale = scale_dad[0];
+ for (c = 1; c < ncat_mix; c++)
+ min_scale = min(min_scale, scale_dad[c]);
+
+ scale_all += (double)min_scale;
+
+ for (c = 0; c < ncat_mix; c++) {
+ double *lh_dad = this_tip_partial_lh + mix_addr_nstates[c];
+ if (scale_dad[c] == min_scale) {
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ (VectorClass().load_a(&lh_dad[i]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
+ }
+ } else if (scale_dad[c] == min_scale+1) {
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ (VectorClass().load_a(&lh_dad[i]) * VectorClass().load_a(&partial_lh_dad[i]) * VectorClass(SCALING_THRESHOLD)).store_a(&theta[i]);
+ }
+ } else {
+ memset(theta, 0, sizeof(double)*nstates);
+ }
+ partial_lh_dad += nstates;
+ theta += nstates;
+ }
}
} else {
// both dad and node are internal nodes
- double *partial_lh_node = node_branch->partial_lh;
- double *partial_lh_dad = dad_branch->partial_lh;
- size_t all_entries = nptn*block;
#ifdef _OPENMP
-#pragma omp parallel for private(i)
+#pragma omp parallel for private(i, c) reduction(+: scale_all)
#endif
- for (i = 0; i < all_entries; i+=VCSIZE) {
- (VectorClass().load_a(&partial_lh_node[i]) * VectorClass().load_a(&partial_lh_dad[i]))
- .store_a(&theta_all[i]);
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double *theta = theta_all + ptn*block;
+ double *partial_lh_node = node_branch->partial_lh + ptn*block;
+ double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+
+ size_t ptn_ncat = ptn*ncat_mix;
+ UBYTE *scale_dad = dad_branch->scale_num + ptn_ncat;
+ UBYTE *scale_node = node_branch->scale_num + ptn_ncat;
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ scale_all += (double)min_scale;
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] == min_scale) {
+ for (i = 0; i < nstates; i++) {
+ (VectorClass().load_a(&partial_lh_node[i]) * VectorClass().load_a(&partial_lh_dad[i])).store_a(&theta[i]);
+ }
+ } else if (sum_scale[c] == min_scale+1) {
+ for (i = 0; i < nstates; i++) {
+ (VectorClass().load_a(&partial_lh_node[i]) * VectorClass().load_a(&partial_lh_dad[i]) * VectorClass(SCALING_THRESHOLD)).store_a(&theta[i]);
+ }
+ } else {
+ memset(theta, 0, sizeof(double)*nstates);
+ }
+ theta += nstates;
+ partial_lh_dad += nstates;
+ partial_lh_node += nstates;
+ }
}
}
if (nptn < maxptn) {
@@ -580,6 +704,7 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
for (ptn = nptn; ptn < maxptn; ptn++)
memcpy(&theta_all[ptn*block], theta_all, block*sizeof(double));
}
+ buffer_scale_all = scale_all*LOG_SCALING_THRESHOLD;
}
@@ -648,12 +773,8 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
#endif
df = horizontal_add(df_final);
ddf = horizontal_add(ddf_final);
- if (isnan(df) || isinf(df)) {
- df = 0.0;
- ddf = 0.0;
-// outWarning("Numerical instability (some site-likelihood = 0)");
- }
-
+
+ assert(!isnan(df) && !isinf(df) && "Numerical underflow for SIMD lh-derivative");
// assert(isnormal(tree_lh));
if (orig_nptn < nptn) {
@@ -754,8 +875,12 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
computePartialLikelihoodEigenSIMD<VectorClass, VCSIZE, nstates>(node_branch, node);
double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ size_t mix_addr_nstates[ncat_mix];
- size_t block = ncat * nstates;
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
@@ -768,13 +893,16 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
VectorClass *vc_val = (VectorClass*)aligned_alloc<double>(block);
- for (c = 0; c < ncat; c++) {
- double len = site_rate->getRate(c)*dad_branch->length;
- VectorClass vc_len(len);
- VectorClass vc_prop(site_rate->getProp(c));
+ for (c = 0; c < ncat_mix; c++) {
+ size_t mycat = c%ncat;
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ VectorClass vc_len(site_rate->getRate(mycat)*dad_branch->length);
+ VectorClass vc_prop(site_rate->getProp(c) * model->getMixtureWeight(m));
for (i = 0; i < nstates/VCSIZE; i++) {
// eval is not aligned!
- vc_val[c*nstates/VCSIZE+i] = exp(VectorClass().load_a(&eval[i*VCSIZE]) * vc_len) * vc_prop;
+ vc_val[c*nstates/VCSIZE+i] = exp(VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_len) * vc_prop;
}
}
@@ -782,55 +910,84 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
- VectorClass vc_tip_partial_lh[nstates];
- VectorClass vc_partial_lh_dad[VCSIZE], vc_ptn[VCSIZE];
+
+ // precompute information from one tip
+ double *partial_lh_node = aligned_alloc<double>((aln->STATE_UNKNOWN+1)*block);
+ IntVector states_dad = aln->seq_states[dad->id];
+ states_dad.push_back(aln->STATE_UNKNOWN);
+ for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
+ double *lh_node = partial_lh_node + (*it)*block;
+ double *lh_tip = tip_partial_lh + (*it)*tip_block;
+ VectorClass *vc_val_tmp = vc_val;
+ for (c = 0; c < ncat_mix; c++) {
+ double *this_lh_tip = lh_tip + mix_addr_nstates[c];
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ (vc_val_tmp[i/VCSIZE] * VectorClass().load_a(&this_lh_tip[i])).store_a(&lh_node[i]);
+ }
+ lh_node += nstates;
+ vc_val_tmp += nstates/VCSIZE;
+ }
+ }
+
+
+ //VectorClass vc_tip_partial_lh[nstates];
+ //VectorClass vc_partial_lh_dad[VCSIZE]
+ VectorClass vc_ptn[VCSIZE];
VectorClass lh_final(0.0), vc_freq;
VectorClass lh_ptn; // store likelihoods of VCSIZE consecutive patterns
- double **lh_states_dad = aligned_alloc<double*>(maxptn);
- for (ptn = 0; ptn < orig_nptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[(aln->at(ptn))[dad->id] * nstates];
- for (ptn = orig_nptn; ptn < nptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[model_factory->unobserved_ptns[ptn-orig_nptn] * nstates];
- // initialize beyond #patterns for efficiency
- for (ptn = nptn; ptn < maxptn; ptn++)
- lh_states_dad[ptn] = &tip_partial_lh[aln->STATE_UNKNOWN * nstates];
+ int *ptn_states_dad = aligned_alloc<int>(maxptn);
+ for (ptn = 0; ptn < orig_nptn; ptn++)
+ ptn_states_dad[ptn] = (aln->at(ptn))[dad->id];
+ for (ptn = orig_nptn; ptn < nptn; ptn++)
+ ptn_states_dad[ptn] = model_factory->unobserved_ptns[ptn-orig_nptn];
+ // initialize beyond #patterns for efficiency
+ for (ptn = nptn; ptn < maxptn; ptn++)
+ ptn_states_dad[ptn] = aln->STATE_UNKNOWN;
// copy dummy values because VectorClass will access beyond nptn
for (ptn = nptn; ptn < maxptn; ptn++)
memcpy(&dad_branch->partial_lh[ptn*block], dad_branch->partial_lh, block*sizeof(double));
#ifdef _OPENMP
-#pragma omp parallel private(ptn, i, j, vc_tip_partial_lh, vc_partial_lh_dad, vc_ptn, vc_freq, lh_ptn)
+#pragma omp parallel private(ptn, i, j, vc_ptn, vc_freq, lh_ptn)
{
VectorClass lh_final_th = 0.0;
#pragma omp for nowait
#endif
// main loop over all patterns with a step size of VCSIZE
for (ptn = 0; ptn < orig_nptn; ptn+=VCSIZE) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
-
- // initialize vc_tip_partial_lh
+ //double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+ VectorClass vc_scale;
for (j = 0; j < VCSIZE; j++) {
- double *lh_dad = lh_states_dad[ptn+j];
- for (i = 0; i < nstates/VCSIZE; i++) {
- vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load_a(&lh_dad[i*VCSIZE]);
- }
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
- vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ UBYTE *scale_dad = dad_branch->scale_num + (ptn+j)*ncat_mix;
+ // determine the min scaling
+ UBYTE min_scale = *min_element(scale_dad, scale_dad+ncat_mix);
+ vc_scale.insert(j, (double)min_scale);
+
+ double *lh_node = &partial_lh_node[ptn_states_dad[ptn+j]*block];
+
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass this_vc_ptn = 0.0;
+ if (scale_dad[c] <= min_scale+1) {
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ this_vc_ptn = mul_add(VectorClass().load_a(&lh_node[i]), VectorClass().load_a(&partial_lh_dad[i]), this_vc_ptn);
+ }
+ if (scale_dad[c] == min_scale)
+ vc_ptn[j] += this_vc_ptn;
+ else
+ vc_ptn[j] += this_vc_ptn * VectorClass(SCALING_THRESHOLD);
+ }
+ lh_node += nstates;
+ partial_lh_dad += nstates;
+ }
}
- // compute vc_ptn
- for (i = 1; i < block/VCSIZE; i++)
- for (j = 0; j < VCSIZE; j++) {
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
- vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
- vc_partial_lh_dad[j], vc_ptn[j]);
- }
-
vc_freq.load_a(&ptn_freq[ptn]);
lh_ptn = horizontal_add(vc_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
- lh_ptn = log(abs(lh_ptn));
+ lh_ptn = log(abs(lh_ptn)) + vc_scale*LOG_SCALING_THRESHOLD;
lh_ptn.store_a(&_pattern_lh[ptn]);
// multiply with pattern frequency
@@ -849,57 +1006,46 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
}
#endif
tree_lh += horizontal_add(lh_final);
- if (isnan(tree_lh) || isinf(tree_lh)) {
- cout << "WARNING: Numerical underflow caused by alignment sites";
- i = aln->getNSite();
- for (j = 0; j < i; j++) {
- ptn = aln->getPatternID(j);
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- cout << " " << j+1;
- }
- }
- tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
- for (ptn = 0; ptn < orig_nptn; ptn++) {
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- _pattern_lh[ptn] = LOG_SCALING_THRESHOLD*4; // log(2^(-1024))
- }
- tree_lh += _pattern_lh[ptn] * ptn_freq[ptn];
- }
- cout << endl;
-// cout << "WARNING: Tree log-likelihood is set to " << tree_lh << endl;
- }
- // ascertainment bias correction
+ assert(!isnan(tree_lh) & !isinf(tree_lh) && "Numerical underflow for SIMD lh-branch");
+
+ // ascertainment bias correction
if (orig_nptn < nptn) {
lh_final = 0.0;
lh_ptn = 0.0;
for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
- double *partial_lh_dad = &dad_branch->partial_lh[ptn*block];
+// double *partial_lh_dad = &dad_branch->partial_lh[ptn*block];
+ VectorClass vc_scale;
lh_final += lh_ptn;
-
- // initialize vc_tip_partial_lh
for (j = 0; j < VCSIZE; j++) {
- double *lh_dad = lh_states_dad[ptn+j];
- for (i = 0; i < nstates/VCSIZE; i++) {
- vc_tip_partial_lh[j*(nstates/VCSIZE)+i].load(&lh_dad[i*VCSIZE]); // lh_dad is not aligned!
- }
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block]);
- vc_ptn[j] = vc_val[0] * vc_tip_partial_lh[j*(nstates/VCSIZE)] * vc_partial_lh_dad[j];
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ UBYTE *scale_dad = dad_branch->scale_num + (ptn+j)*ncat_mix;
+ // determine the min scaling
+ UBYTE min_scale = *min_element(scale_dad, scale_dad+ncat_mix);
+ vc_scale.insert(j, min_scale);
+
+ int state_dad = ptn_states_dad[ptn+j];
+ double *lh_node = &partial_lh_node[state_dad*block];
+
+ for (c = 0; c < ncat_mix; c++) {
+ VectorClass this_vc_ptn = 0.0;
+ if (scale_dad[c] <= min_scale+1) {
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ this_vc_ptn = mul_add(VectorClass().load_a(&lh_node[i]), VectorClass().load_a(&partial_lh_dad[i]), this_vc_ptn);
+ }
+ if (scale_dad[c] == min_scale)
+ vc_ptn[j] += this_vc_ptn;
+ else
+ vc_ptn[j] += this_vc_ptn * VectorClass(SCALING_THRESHOLD);
+ }
+ lh_node += nstates;
+ partial_lh_dad += nstates;
+ }
+ // bugfix 2016-01-21, prob_const can be rescaled
+ if (min_scale >= 1)
+ vc_ptn[j] = vc_ptn[j] * VectorClass(SCALING_THRESHOLD);
}
-
- // compute vc_ptn
- for (i = 1; i < block/VCSIZE; i++)
- for (j = 0; j < VCSIZE; j++) {
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[j*block+i*VCSIZE]);
- vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
- vc_partial_lh_dad[j], vc_ptn[j]);
- }
-
- // bugfix 2016-01-21, prob_const can be rescaled
- for (j = 0; j < VCSIZE; j++)
- if (dad_branch->scale_num[ptn+j] >= 1)
- vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
-
// ptn_invar[ptn] is not aligned
lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
}
@@ -911,7 +1057,9 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
default: assert(0); break;
}
}
- aligned_free(lh_states_dad);
+ aligned_free(ptn_states_dad);
+ aligned_free(partial_lh_node);
+
} else {
// both dad and node are internal nodes
VectorClass vc_partial_lh_node[VCSIZE];
@@ -932,25 +1080,46 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
#pragma omp for nowait
#endif
for (ptn = 0; ptn < orig_nptn; ptn+=VCSIZE) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *partial_lh_node = node_branch->partial_lh + ptn*block;
- for (j = 0; j < VCSIZE; j++)
- vc_ptn[j] = 0.0;
+ VectorClass vc_scale;
+ for (j = 0; j < VCSIZE; j++) {
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ double *partial_lh_node = node_branch->partial_lh + (ptn+j)*block;
+ VectorClass *val_tmp = vc_val;
+ UBYTE *scale_dad = dad_branch->scale_num + (ptn+j)*ncat_mix;
+ UBYTE *scale_node = node_branch->scale_num + (ptn+j)*ncat_mix;
+ // determine the min scaling
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale = sum_scale[0] = scale_dad[0]+scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ vc_scale.insert(j, min_scale);
- for (i = 0; i < block; i+=VCSIZE) {
- for (j = 0; j < VCSIZE; j++) {
- vc_partial_lh_node[j].load_a(&partial_lh_node[i+j*block]);
- vc_partial_lh_dad[j].load_a(&partial_lh_dad[i+j*block]);
- vc_ptn[j] = mul_add(vc_val[i/VCSIZE] * vc_partial_lh_node[j], vc_partial_lh_dad[j], vc_ptn[j]);
- }
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] <= min_scale+1) {
+ VectorClass this_vc_ptn = 0.0;
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ this_vc_ptn = mul_add(VectorClass().load_a(&partial_lh_node[i]) * VectorClass().load_a(&partial_lh_dad[i]), val_tmp[i/VCSIZE], this_vc_ptn);
+ }
+ if (sum_scale[c] == min_scale)
+ vc_ptn[j] += this_vc_ptn;
+ else
+ vc_ptn[j] += this_vc_ptn * VectorClass(SCALING_THRESHOLD);
+ }
+ partial_lh_node += nstates;
+ partial_lh_dad += nstates;
+ val_tmp += nstates/VCSIZE;
+ }
}
vc_freq.load_a(&ptn_freq[ptn]);
lh_ptn = horizontal_add(vc_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
- lh_ptn = log(abs(lh_ptn));
+ lh_ptn = log(abs(lh_ptn)) + vc_scale*LOG_SCALING_THRESHOLD;
lh_ptn.store_a(&_pattern_lh[ptn]);
#ifdef _OPENMP
lh_final_th = mul_add(lh_ptn, vc_freq, lh_final_th);
@@ -973,12 +1142,47 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
// ascertainment bias correction
lh_final = 0.0;
lh_ptn = 0.0;
- double *partial_lh_node = &node_branch->partial_lh[orig_nptn*block];
- double *partial_lh_dad = &dad_branch->partial_lh[orig_nptn*block];
for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
lh_final += lh_ptn;
+ VectorClass vc_scale;
+ for (j = 0; j < VCSIZE; j++) {
+ vc_ptn[j] = 0.0;
+ double *partial_lh_dad = dad_branch->partial_lh + (ptn+j)*block;
+ double *partial_lh_node = node_branch->partial_lh + (ptn+j)*block;
+ VectorClass *val_tmp = vc_val;
+ UBYTE *scale_dad = dad_branch->scale_num + (ptn+j)*ncat_mix;
+ UBYTE *scale_node = node_branch->scale_num + (ptn+j)*ncat_mix;
+ // determine the min scaling
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale = sum_scale[0] = scale_dad[0]+scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ vc_scale.insert(j, min_scale);
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] <= min_scale+1) {
+ VectorClass this_vc_ptn = 0.0;
+ for (i = 0; i < nstates; i+=VCSIZE) {
+ this_vc_ptn = mul_add(VectorClass().load_a(&partial_lh_node[i]) * VectorClass().load_a(&partial_lh_dad[i]), val_tmp[i/VCSIZE], this_vc_ptn);
+ }
+ if (sum_scale[c] == min_scale)
+ vc_ptn[j] += this_vc_ptn;
+ else
+ vc_ptn[j] += this_vc_ptn * VectorClass(SCALING_THRESHOLD);
+ }
+ partial_lh_node += nstates;
+ partial_lh_dad += nstates;
+ val_tmp += nstates/VCSIZE;
+ }
+ if (min_scale >= 1)
+ vc_ptn[j] *= VectorClass(SCALING_THRESHOLD);
+ }
+
+ /*
for (j = 0; j < VCSIZE; j++)
vc_ptn[j] = 0.0;
@@ -990,15 +1194,14 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
}
}
+
// bugfix 2016-01-21, prob_const can be rescaled
for (j = 0; j < VCSIZE; j++)
if (dad_branch->scale_num[ptn+j] + node_branch->scale_num[ptn+j] >= 1)
vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
-
+ */
// ptn_invar[ptn] is not aligned
lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
- partial_lh_node += block*VCSIZE;
- partial_lh_dad += block*VCSIZE;
}
switch ((nptn-orig_nptn)%VCSIZE) {
case 0: prob_const = horizontal_add(lh_final+lh_ptn); break;
@@ -1032,7 +1235,10 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
double tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
size_t ncat = site_rate->getNRate();
- size_t block = ncat * nstates;
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
+ size_t block = ncat_mix * nstates;
size_t ptn; // for big data size > 4GB memory required
size_t c, i, j;
size_t orig_nptn = aln->size();
@@ -1044,11 +1250,14 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
VectorClass *vc_val0 = (VectorClass*)aligned_alloc<double>(block);
VectorClass vc_len = current_it->length;
- for (c = 0; c < ncat; c++) {
- VectorClass vc_rate = site_rate->getRate(c);
- VectorClass vc_prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ double *eval_ptr = eval + (m)*nstates;
+ size_t mycat = c%ncat;
+ VectorClass vc_rate = site_rate->getRate(mycat);
+ VectorClass vc_prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
for (i = 0; i < nstates/VCSIZE; i++) {
- VectorClass cof = VectorClass().load_a(&eval[i*VCSIZE]) * vc_rate;
+ VectorClass cof = VectorClass().load_a(&eval_ptr[i*VCSIZE]) * vc_rate;
VectorClass val = exp(cof*vc_len) * vc_prop;
vc_val0[c*nstates/VCSIZE+i] = val;
}
@@ -1100,30 +1309,9 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
}
}
#endif
- tree_lh += horizontal_add(lh_final);
- if (isnan(tree_lh) || isinf(tree_lh)) {
- cout << "WARNING: Numerical underflow caused by alignment sites";
- i = aln->getNSite();
- for (j = 0, c = 0; j < i; j++) {
- ptn = aln->getPatternID(j);
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- cout << " " << j+1;
- c++;
- if (c >= 10) {
- cout << " ...";
- break;
- }
- }
- }
- cout << endl;
- tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
- for (ptn = 0; ptn < orig_nptn; ptn++) {
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- _pattern_lh[ptn] = LOG_SCALING_THRESHOLD*4; // log(2^(-1024))
- }
- tree_lh += _pattern_lh[ptn] * ptn_freq[ptn];
- }
- }
+ tree_lh += horizontal_add(lh_final) + buffer_scale_all;
+
+ assert(!isnan(tree_lh) && !isinf(tree_lh) && "Numerical underflow for SIMD lh-FromBuffer");
if (orig_nptn < nptn) {
// ascertaiment bias correction
@@ -1132,41 +1320,43 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
double prob_const;// df_const, ddf_const;
double *theta = &theta_all[orig_nptn*block];
- UBYTE sum_scale_num[nstates+VCSIZE];
+ UBYTE sum_scale_num[(nstates+VCSIZE)*ncat_mix];
memset(sum_scale_num, 0, sizeof(UBYTE)*(nstates+VCSIZE));
if (current_it->node->isLeaf())
- memcpy(sum_scale_num, current_it_back->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+ memcpy(sum_scale_num, current_it_back->scale_num+orig_nptn*ncat_mix, sizeof(UBYTE)*(nptn-orig_nptn)*ncat_mix);
else if (current_it_back->node->isLeaf())
- memcpy(sum_scale_num, current_it->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+ memcpy(sum_scale_num, current_it->scale_num+orig_nptn*ncat_mix, sizeof(UBYTE)*(nptn-orig_nptn)*ncat_mix);
else {
- for (ptn = orig_nptn; ptn < nptn; ptn++)
- sum_scale_num[ptn-orig_nptn] = current_it->scale_num[ptn] + current_it_back->scale_num[ptn];
+ UBYTE *cur_scale_num = current_it->scale_num + orig_nptn*ncat_mix;
+ UBYTE *back_scale_num = current_it_back->scale_num + orig_nptn*ncat_mix;
+ c = (nptn-orig_nptn)*ncat_mix;
+ for (i = 0; i < c; i++)
+ sum_scale_num[i] = cur_scale_num[i] + back_scale_num[i];
}
- for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
- lh_final += lh_ptn;
-
+ for (ptn = orig_nptn; ptn < nptn; ptn++) {
+ //lh_final += lh_ptn;
// initialization
- for (i = 0; i < VCSIZE; i++) {
- vc_ptn[i] = vc_val0[0] * VectorClass().load_a(theta+i*block);
- }
+ VectorClass this_vc_ptn = vc_val0[0] * VectorClass().load_a(theta);
+
+ UBYTE *this_sum_scale = sum_scale_num + (ptn-orig_nptn)*ncat_mix;
+ UBYTE min_scale = *min_element(this_sum_scale, this_sum_scale + ncat_mix);
for (i = 1; i < block/VCSIZE; i++) {
- for (j = 0; j < VCSIZE; j++) {
- vc_ptn[j] = mul_add(VectorClass().load_a(&theta[i*VCSIZE+j*block]), vc_val0[i], vc_ptn[j]);
- }
+ this_vc_ptn = mul_add(VectorClass().load_a(&theta[i*VCSIZE]), vc_val0[i], this_vc_ptn);
}
- theta += block*VCSIZE;
+ theta += block;
// bugfix 2016-01-21, prob_const can be rescaled
- for (j = 0; j < VCSIZE; j++)
- if (sum_scale_num[ptn+j-orig_nptn] >= 1)
- vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+ if (min_scale >= 1)
+ this_vc_ptn *= VectorClass(SCALING_THRESHOLD);
- // ptn_invar[ptn] is not aligned
- lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
+ // no +I for +ASC!
+ prob_const = horizontal_add(this_vc_ptn);
}
+
+ /*
switch ((nptn-orig_nptn) % VCSIZE) {
case 0:
prob_const = horizontal_add(lh_final+lh_ptn);
@@ -1184,6 +1374,7 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
assert(0);
break;
}
+ */
prob_const = log(1.0 - prob_const);
tree_lh -= aln->getNSite() * prob_const;
for (ptn = 0; ptn < orig_nptn; ptn++)
@@ -1270,8 +1461,8 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
if (dad_branch->partial_lh_computed & 2)
return;
Node *node = dad_branch->node;
- int nstates = aln->num_states;
- int site;
+ int nstates = aln->getMaxNumStates();
+ int site = 0;
const int VCSIZE = VectorClass::size();
const int NUM_BITS = VectorClass::size() * UINT_BITS;
@@ -1279,130 +1470,144 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
if (node->isLeaf() && dad) {
// external node
+ vector<Alignment*> *partitions = NULL;
+ if (aln->isSuperAlignment())
+ partitions = &((SuperAlignment*)aln)->partitions;
+ else {
+ partitions = new vector<Alignment*>;
+ partitions->push_back(aln);
+ }
if (aln->ordered_pattern.empty())
aln->orderPatternByNumChars();
int leafid = node->id;
int pars_size = getBitsBlockSize();
memset(dad_branch->partial_pars, 0, pars_size*sizeof(UINT));
-// int ptn;
-// int nptn = aln->size();
int ambi_aa[] = {2, 3, 5, 6, 9, 10}; // {4+8, 32+64, 512+1024};
-// int max_sites = ((aln->num_informative_sites+UINT_BITS-1)/UINT_BITS)*UINT_BITS;
-// UINT *x = dad_branch->partial_pars - (nstates*VCSIZE);
UINT *x = dad_branch->partial_pars;
- Alignment::iterator pat;
- switch (aln->seq_type) {
- case SEQ_DNA:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 4) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ int start_pos = 0;
+
+ for (vector<Alignment*>::iterator alnit = partitions->begin(); alnit != partitions->end(); alnit++) {
+ int end_pos = start_pos + (*alnit)->ordered_pattern.size();
+ switch ((*alnit)->seq_type) {
+ case SEQ_DNA:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 4) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ p[0] |= bit1;
+ p[VCSIZE] |= bit1;
+ p[2*VCSIZE] |= bit1;
+ p[3*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- p[0] |= bit1;
- p[VCSIZE] |= bit1;
- p[2*VCSIZE] |= bit1;
- p[3*VCSIZE] |= bit1;
- }
- } else {
- state -= 3;
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 4*VCSIZE;
- site = 0;
+ } else {
+ state -= 3;
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT *p = x + ((site/UINT_BITS));
+
+ UINT bit1 = (1 << (site%UINT_BITS));
+ for (int i = 0; i < 4; i++)
+ if (state & (1<<i))
+ p[i*VCSIZE] |= bit1;
}
- UINT *p = x + ((site/UINT_BITS));
-
- UINT bit1 = (1 << (site%UINT_BITS));
- for (int i = 0; i < 4; i++)
- if (state & (1<<i))
- p[i*VCSIZE] |= bit1;
}
}
- }
- break;
- case SEQ_PROTEIN:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 20) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ break;
+ case SEQ_PROTEIN:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 20) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ for (int i = 0; i < 20; i++)
+ p[i*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- for (int i = 0; i < 20; i++)
- p[i*VCSIZE] |= bit1;
- }
- } else {
- assert(state < 23);
- state = (state-20)*2;
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += 20*VCSIZE;
- site = 0;
+ } else {
+ assert(state < 23);
+ state = (state-20)*2;
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT *p = x + ((site/UINT_BITS));
+ UINT bit1 = (1 << (site%UINT_BITS));
+
+ p[ambi_aa[state]*VCSIZE] |= bit1;
+ p[ambi_aa[state+1]*VCSIZE] |= bit1;
}
- UINT *p = x + ((site/UINT_BITS));
- UINT bit1 = (1 << (site%UINT_BITS));
-
- p[ambi_aa[state]*VCSIZE] |= bit1;
- p[ambi_aa[state+1]*VCSIZE] |= bit1;
}
}
- }
- break;
- default:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < nstates) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += nstates*VCSIZE;
- site = 0;
+ break;
+ default:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < (*alnit)->num_states) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
}
- x[state*VCSIZE + site/UINT_BITS] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- if (site == NUM_BITS) {
- x += nstates*VCSIZE;
- site = 0;
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ if (site == NUM_BITS) {
+ x += nstates*VCSIZE;
+ site = 0;
+ }
+ UINT bit1 = (1 << (site%UINT_BITS));
+ UINT *p = x+(site/UINT_BITS);
+ for (int i = 0; i < (*alnit)->num_states; i++)
+ p[i*VCSIZE] |= bit1;
}
- UINT bit1 = (1 << (site%UINT_BITS));
- UINT *p = x+(site/UINT_BITS);
- for (int i = 0; i < nstates; i++)
- p[i*VCSIZE] |= bit1;
+ } else {
+ assert(0);
}
- } else {
- assert(0);
}
- }
- break;
- }
+ break;
+ } // end of switch
+ start_pos = end_pos;
+ } // of end FOR LOOP
+
+ assert(start_pos == aln->ordered_pattern.size());
+// assert(site == aln->num_informative_sites % NUM_BITS);
// add dummy states
if (site > 0 && site < NUM_BITS) {
x += site/UINT_BITS;
@@ -1411,6 +1616,8 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
int max_sites = ((site+UINT_BITS-1)/UINT_BITS);
memset(x, 255, (VCSIZE - max_sites)*sizeof(UINT));
}
+ if (!aln->isSuperAlignment())
+ delete partitions;
} else {
// internal node
assert(node->degree() == 3); // it works only for strictly bifurcating tree
@@ -1433,7 +1640,7 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
#pragma omp parallel for private (site) reduction(+: score) if(nsites>200)
#endif
for (site = 0; site<nsites; site++) {
- size_t offset = 4*VCSIZE*site;
+ size_t offset = entry_size*site;
VectorClass *x = (VectorClass*)(left->partial_pars + offset);
VectorClass *y = (VectorClass*)(right->partial_pars + offset);
VectorClass *z = (VectorClass*)(dad_branch->partial_pars + offset);
@@ -1504,7 +1711,7 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo
if ((node_branch->partial_lh_computed & 2) == 0)
computePartialParsimonyFastSIMD<VectorClass>(node_branch, node);
int site;
- int nstates = aln->num_states;
+ int nstates = aln->getMaxNumStates();
// VectorClass score = 0;
// VectorClass w;
@@ -1576,4 +1783,4 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo
}
-#endif /* PHYLOKERNEL_H_ */
+#endif /* PHYLOKERNELSAFE_H_ */
diff --git a/phylokernelsitemodel.cpp b/phylokernelsitemodel.cpp
index d681d6f..da70ef0 100644
--- a/phylokernelsitemodel.cpp
+++ b/phylokernelsitemodel.cpp
@@ -590,7 +590,7 @@ double PhyloTree::computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branc
size_t nptn = aln->size();
- memset(_pattern_lh_cat, 0, nptn*ncat*sizeof(double));
+ memset(_pattern_lh_cat, 0, sizeof(double)*nptn*ncat);
ModelSet *models = (ModelSet*)model;
if (dad->isLeaf()) {
@@ -610,7 +610,7 @@ double PhyloTree::computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branc
double len = site_rate->getRate(c)*dad_branch->length;
double prop = site_rate->getProp(c);
for (i = 0; i < nstates; i++) {
- *lh_cat += exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i];
+ *lh_cat += (exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i]);
}
*lh_cat *= prop;
lh_ptn += *lh_cat;
@@ -640,7 +640,7 @@ double PhyloTree::computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branc
double len = site_rate->getRate(c)*dad_branch->length;
double prop = site_rate->getProp(c);
for (i = 0; i < nstates; i++) {
- *lh_cat += exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i];
+ *lh_cat += (exp(eval[i]*len) * partial_lh_node[i] * partial_lh_dad[i]);
}
*lh_cat *= prop;
lh_ptn += *lh_cat;
diff --git a/phylokernelsse.cpp b/phylokernelsse.cpp
new file mode 100644
index 0000000..bb3ee7f
--- /dev/null
+++ b/phylokernelsse.cpp
@@ -0,0 +1,169 @@
+/*
+ * phylokernelavx.cpp
+ *
+ * Created on: Sept 25, 2016
+ * Author: minh
+ */
+
+
+#include "vectorclass/vectormath_exp.h"
+#include "vectorclass/vectorclass.h"
+#include "phylokernel.h"
+//#include "phylokernelsafe.h"
+//#include "phylokernelmixture.h"
+//#include "phylokernelmixrate.h"
+//#include "phylokernelsitemodel.h"
+
+#include "phylokernelnew.h"
+#define KERNEL_FIX_STATES
+#include "phylokernelnew.h"
+
+
+#if !defined ( __SSE2__ ) && !defined ( __x86_64__ )
+#error "You must compile this file with SSE2 enabled!"
+#endif
+
+void PhyloTree::setParsimonyKernelSSE() {
+ computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFastSIMD<Vec4ui>;
+ computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFastSIMD<Vec4ui>;
+}
+
+void PhyloTree::setDotProductSSE() {
+#ifdef BOOT_VAL_FLOAT
+ dotProduct = &PhyloTree::dotProductSIMD<float, Vec4f>;
+#else
+ dotProduct = &PhyloTree::dotProductSIMD<double, Vec2d>;
+#endif
+ dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec2d>;
+}
+
+void PhyloTree::setLikelihoodKernelSSE() {
+ vector_size = 2;
+ setParsimonyKernelSSE();
+
+ if (model_factory && model_factory->model->isSiteSpecificModel() && (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling)) {
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec2d, SAFE_LH, 4, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec2d, SAFE_LH, 4, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec2d, SAFE_LH, 4, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 4, false, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec2d, SAFE_LH, 20, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec2d, SAFE_LH, 20, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec2d, SAFE_LH, 20, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 20, false, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec2d, SAFE_LH, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec2d, SAFE_LH, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec2d, SAFE_LH, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, SAFE_LH, false, true>;
+ break;
+ }
+ return;
+ }
+
+ if (model_factory && model_factory->model->isSiteSpecificModel()) {
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec2d, NORM_LH, 4, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec2d, NORM_LH, 4, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec2d, NORM_LH, 4, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 4, false, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec2d, NORM_LH, 20, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec2d, NORM_LH, 20, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec2d, NORM_LH, 20, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 20, false, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec2d, NORM_LH, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec2d, NORM_LH, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec2d, NORM_LH, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, NORM_LH, false, true>;
+ break;
+ }
+ return;
+ }
+
+ if (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling) {
+ switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, SAFE_LH, 2>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, SAFE_LH, 2>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, SAFE_LH, 2>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 2>;
+ break;
+ */
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, SAFE_LH, 4>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, SAFE_LH, 4>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, SAFE_LH, 4>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 4>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, SAFE_LH, 20>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, SAFE_LH, 20>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, SAFE_LH, 20>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 20>;
+ break;
+ /*
+ case 64:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, SAFE_LH, 64>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, SAFE_LH, 64>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, SAFE_LH, 64>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, SAFE_LH, 64>;
+ break;
+ */
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec2d, SAFE_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec2d, SAFE_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec2d, SAFE_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, SAFE_LH>;
+ break;
+ }
+ return;
+ }
+
+ switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, NORM_LH, 2>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, NORM_LH, 2>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, NORM_LH, 2>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 2>;
+ break;
+ */
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, NORM_LH, 4>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, NORM_LH, 4>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, NORM_LH, 4>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 4>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, NORM_LH, 20>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, NORM_LH, 20>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, NORM_LH, 20>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 20>;
+ break;
+ /*
+ case 64:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec2d, NORM_LH, 64>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec2d, NORM_LH, 64>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec2d, NORM_LH, 64>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 64>;
+ break;
+ */
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec2d, NORM_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec2d, NORM_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec2d, NORM_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, NORM_LH>;
+ break;
+ }
+}
+
diff --git a/phylonode.cpp b/phylonode.cpp
index fa39d84..c33de75 100644
--- a/phylonode.cpp
+++ b/phylonode.cpp
@@ -19,33 +19,15 @@ void PhyloNeighbor::clearForwardPartialLh(Node *dad) {
((PhyloNeighbor*)*it)->clearForwardPartialLh(node);
}
-void PhyloNeighbor::reorientPartialLh(Node *dad) {
- if (partial_lh)
- return;
- bool done = false;
- FOR_NEIGHBOR_IT(node, dad, it) {
- PhyloNeighbor *backnei = (PhyloNeighbor*)(*it)->node->findNeighbor(node);
- if (backnei->partial_lh) {
- partial_lh = backnei->partial_lh;
- scale_num = backnei->scale_num;
- backnei->partial_lh = NULL;
- backnei->scale_num = NULL;
- backnei->partial_lh_computed &= ~1; // clear bit
- done = true;
- break;
- }
- }
- assert(done && "partial_lh is not re-oriented");
-}
-
-
void PhyloNode::clearReversePartialLh(PhyloNode *dad) {
// PhyloNeighbor *node_nei = (PhyloNeighbor*)findNeighbor(dad);
// assert(node_nei);
// node_nei->partial_lh_computed = 0;
for (NeighborVec::iterator it = neighbors.begin(); it != neighbors.end(); it ++)
if ((*it)->node != dad) {
- ((PhyloNeighbor*)(*it)->node->findNeighbor(this))->partial_lh_computed = 0;
+ PhyloNeighbor *nei = (PhyloNeighbor*)(*it)->node->findNeighbor(this);
+ nei->partial_lh_computed = 0;
+ nei->size = 0;
((PhyloNode*)(*it)->node)->clearReversePartialLh(this);
}
}
@@ -55,10 +37,17 @@ void PhyloNode::clearAllPartialLh(bool make_null, PhyloNode *dad) {
node_nei->partial_lh_computed = 0;
if (make_null) node_nei->partial_lh = NULL;
+
+ if (Params::getInstance().lh_mem_save == LM_MEM_SAVE)
+ node_nei->size = 0;
+
node_nei = (PhyloNeighbor*)dad->findNeighbor(this);
node_nei->partial_lh_computed = 0;
if (make_null) node_nei->partial_lh = NULL;
+ if (Params::getInstance().lh_mem_save == LM_MEM_SAVE)
+ node_nei->size = 0;
+
for (NeighborVec::iterator it = neighbors.begin(); it != neighbors.end(); it ++)
if ((*it)->node != dad)
((PhyloNode*)(*it)->node)->clearAllPartialLh(make_null, this);
@@ -94,3 +83,21 @@ void PhyloNode::init() {
void PhyloNode::addNeighbor(Node *node, double length, int id) {
neighbors.push_back(new PhyloNeighbor(node, length, id));
}
+
+
+int PhyloNode::computeSize(Node *dad) {
+ PhyloNeighbor *nei = (PhyloNeighbor*)dad->findNeighbor(this);
+ if (nei->size > 0)
+ return nei->size;
+
+ if (isLeaf()) {
+ nei->size = 1;
+ return nei->size;
+ }
+ nei->size = 0;
+ FOR_NEIGHBOR_IT(this, dad, it) {
+ nei->size += ((PhyloNode*)(*it)->node)->computeSize(this);
+ }
+ return nei->size;
+}
+
diff --git a/phylonode.h b/phylonode.h
index 5bc4563..0e9ff79 100644
--- a/phylonode.h
+++ b/phylonode.h
@@ -14,7 +14,7 @@
#include "node.h"
-typedef short int UBYTE;
+typedef unsigned short UBYTE;
/**
A neighbor in a phylogenetic tree
@@ -26,6 +26,7 @@ class PhyloNeighbor : public Neighbor {
friend class PhyloTree;
friend class IQTree;
friend class PhyloSuperTree;
+ friend class MemSlotVector;
public:
friend class TinaTree;
@@ -42,6 +43,7 @@ public:
partial_lh_computed = 0;
lh_scale_factor = 0.0;
partial_pars = NULL;
+ size = 0;
}
/**
@@ -56,6 +58,7 @@ public:
partial_lh_computed = 0;
lh_scale_factor = 0.0;
partial_pars = NULL;
+ size = 0;
}
/**
@@ -79,10 +82,11 @@ public:
void clearForwardPartialLh(Node *dad);
/**
+ DEPRECATED, moved to PhyloTree
if partial_lh is NULL, reorient partial_lh (LM_PER_NODE technique)
@param dad dad of this neighbor
*/
- void reorientPartialLh(Node *dad);
+// void reorientPartialLh(Node *dad);
/**
* For Upper Bounds analysis: get partial likelihood and lh scale factor
@@ -99,6 +103,10 @@ public:
return partial_lh_computed;
}
+ int getSize() {
+ return size;
+ }
+
private:
/**
@@ -126,6 +134,9 @@ private:
*/
UINT *partial_pars;
+ /** size of subtree below this neighbor in terms of number of taxa */
+ int size;
+
};
/**
@@ -189,6 +200,13 @@ public:
void computeReversePartialLh(PhyloNode *dad);
+ /**
+ compute the size (#taxa) of the subtree rooted at this node
+ using buffered 'size' attribute if computed beforehand
+ @param dad dad of this node
+ */
+ int computeSize(Node *dad);
+
};
diff --git a/phylosupertree.cpp b/phylosupertree.cpp
index 9dbf3e1..8fb8918 100644
--- a/phylosupertree.cpp
+++ b/phylosupertree.cpp
@@ -551,8 +551,9 @@ PhyloSuperTree::PhyloSuperTree(Params ¶ms) : IQTree() {
#ifdef _OPENMP
if (params.num_threads > size()) {
- outWarning("More threads (" + convertIntToString(params.num_threads) + ") than number of partitions (" + convertIntToString(size()) + ") might not be necessary.");
- outWarning("You are recommended to rerun with '-nt " + convertIntToString(size()) + "' and see if this is faster");
+ cout << "Info: multi-threading strategy over alignment sites" << endl;
+ } else {
+ cout << "Info: multi-threading strategy over partitions" << endl;
}
#endif
cout << endl;
@@ -568,34 +569,23 @@ void PhyloSuperTree::setParams(Params* params) {
void PhyloSuperTree::initSettings(Params ¶ms) {
IQTree::initSettings(params);
+ num_threads = (size() >= params.num_threads) ? params.num_threads : 1;
for (iterator it = begin(); it != end(); it++) {
(*it)->params = ¶ms;
- (*it)->setLikelihoodKernel(params.SSE);
+ (*it)->setLikelihoodKernel(params.SSE, (size() >= params.num_threads) ? 1 : params.num_threads);
(*it)->optimize_by_newton = params.optimize_by_newton;
}
}
-void PhyloSuperTree::setLikelihoodKernel(LikelihoodKernel lk) {
- PhyloTree::setLikelihoodKernel(lk);
+void PhyloSuperTree::setLikelihoodKernel(LikelihoodKernel lk, int num_threads) {
+ PhyloTree::setLikelihoodKernel(lk, (size() >= num_threads) ? num_threads : 1);
for (iterator it = begin(); it != end(); it++)
- (*it)->setLikelihoodKernel(lk);
+ (*it)->setLikelihoodKernel(lk, (size() >= num_threads) ? 1 : num_threads);
}
void PhyloSuperTree::changeLikelihoodKernel(LikelihoodKernel lk) {
PhyloTree::changeLikelihoodKernel(lk);
-// if ((sse == LK_EIGEN || sse == LK_EIGEN_SSE) && (lk == LK_NORMAL || lk == LK_SSE)) {
-// // need to increase the memory usage when changing from new kernel to old kernel
-// setLikelihoodKernel(lk);
-// for (iterator it = begin(); it != end(); it++)
-// (*it)->setLikelihoodKernel(lk);
-// deleteAllPartialLh();
-// initializeAllPartialLh();
-// clearAllPartialLH();
-// } else {
-// for (iterator it = begin(); it != end(); it++)
-// (*it)->setLikelihoodKernel(lk);
-// }
}
string PhyloSuperTree::getTreeString() {
@@ -891,7 +881,7 @@ void PhyloSuperTree::clearAllPartialLH(bool make_null) {
}
}
-int PhyloSuperTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
+int PhyloSuperTree::computeParsimonyBranchObsolete(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
int score = 0, part = 0;
SuperNeighbor *dad_nei = (SuperNeighbor*)dad_branch;
SuperNeighbor *node_nei = (SuperNeighbor*)(dad_branch->node->findNeighbor(dad));
@@ -967,7 +957,7 @@ double PhyloSuperTree::computeLikelihood(double *pattern_lh) {
} else {
if (part_order.empty()) computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
+ #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
#endif
for (int j = 0; j < ntrees; j++) {
int i = part_order[j];
@@ -1028,7 +1018,7 @@ double PhyloSuperTree::optimizeAllBranches(int my_iterations, double tolerance,
int ntrees = size();
if (part_order.empty()) computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
+ #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
#endif
for (int j = 0; j < ntrees; j++) {
int i = part_order[j];
@@ -1123,6 +1113,26 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
break;
}
+ // check for compatibility with constraint tree
+ bool nni_ok[2] = {true, true};
+ int nniid = 0;
+ FOR_NEIGHBOR(node2, node1, node2_it) {
+ NNIMove nni;
+ nni.node1 = node1;
+ nni.node2 = node2;
+ nni.node1Nei_it = node1->findNeighborIt(node1_nei->node);
+ nni.node2Nei_it = node2_it;
+ nni_ok[nniid++] = constraintTree.isCompatible(nni);
+ }
+ assert(nniid == 2);
+ myMove.node1 = myMove.node2 = NULL;
+ myMove.newloglh = -DBL_MAX;
+ // return if both NNIs do not satisfy constraint
+ if (!nni_ok[0] && !nni_ok[1]) {
+ assert(!nniMoves);
+ return myMove;
+ }
+
//double bestScore = optimizeOneBranch(node1, node2, false);
int ntrees = size(), part;
@@ -1131,7 +1141,7 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
if (part_order.empty()) computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic) if(ntrees >= params->num_threads)
+ #pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic) if(num_threads>1)
#endif
for (int treeid = 0; treeid < ntrees; treeid++) {
part = part_order_by_nptn[treeid];
@@ -1201,6 +1211,9 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
totalNNIs += local_totalNNIs;
evalNNIs += local_evalNNIs;
double nni_scores[2] = {nni_score1, nni_score2};
+
+ if (!nni_ok[0]) nni_scores[0] = -DBL_MAX;
+ if (!nni_ok[1]) nni_scores[1] = -DBL_MAX;
myMove.node1Nei_it = node1->findNeighborIt(node1_nei->node);
myMove.node1 = node1;
@@ -1221,8 +1234,9 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
//now setup pattern likelihoods per partition
double *save_lh_factor = new double [ntrees];
double *save_lh_factor_back = new double [ntrees];
- int nnino = 0;
- FOR_NEIGHBOR(node2, node1, node2_it) {
+ nniid = 0;
+ FOR_NEIGHBOR(node2, node1, node2_it) if (nni_ok[nniid])
+ {
// do the NNI
node2_nei = (SuperNeighbor*)(*node2_it);
@@ -1242,18 +1256,18 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
if (!is_nni)
memcpy(at(part)->_pattern_lh, part_info[part].cur_ptnlh, at(part)->getAlnNPattern() * sizeof(double));
else
- memcpy(at(part)->_pattern_lh, part_info[part].nniMoves[nnino].ptnlh, at(part)->getAlnNPattern() * sizeof(double));
+ memcpy(at(part)->_pattern_lh, part_info[part].nniMoves[nniid].ptnlh, at(part)->getAlnNPattern() * sizeof(double));
save_lh_factor[part] = at(part)->current_it->lh_scale_factor;
save_lh_factor_back[part] = at(part)->current_it_back->lh_scale_factor;
at(part)->current_it->lh_scale_factor = 0.0;
at(part)->current_it_back->lh_scale_factor = 0.0;
}
if (nniMoves) {
- nniMoves[nnino].newloglh = nni_scores[nnino];
- computePatternLikelihood(nniMoves[nnino].ptnlh, &nni_scores[nnino]);
+ nniMoves[nniid].newloglh = nni_scores[nniid];
+ computePatternLikelihood(nniMoves[nniid].ptnlh, &nni_scores[nniid]);
}
if (save_all_trees == 2)
- saveCurrentTree(nni_scores[nnino]);
+ saveCurrentTree(nni_scores[nniid]);
// restore information
for (part = 0; part < ntrees; part++) {
@@ -1266,7 +1280,7 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
node1_nei->node->updateNeighbor(node2, node1);
node2->updateNeighbor(node2_it, node2_nei);
node2_nei->node->updateNeighbor(node1, node2);
- nnino++;
+ nniid++;
}
@@ -1433,12 +1447,12 @@ PhyloTree *PhyloSuperTree::extractSubtree(IntVector &ids) {
return tree;
}
-uint64_t PhyloSuperTree::getMemoryRequired(size_t ncategory) {
+uint64_t PhyloSuperTree::getMemoryRequired(size_t ncategory, bool full_mem) {
// uint64_t mem_size = PhyloTree::getMemoryRequired(ncategory);
// supertree does not need any memory for likelihood vectors!
uint64_t mem_size = 0;
for (iterator it = begin(); it != end(); it++)
- mem_size += (*it)->getMemoryRequired(ncategory);
+ mem_size += (*it)->getMemoryRequired(ncategory, full_mem);
return mem_size;
}
diff --git a/phylosupertree.h b/phylosupertree.h
index e850977..d21ea97 100644
--- a/phylosupertree.h
+++ b/phylosupertree.h
@@ -96,7 +96,7 @@ public:
*/
virtual void initSettings(Params& params);
- virtual void setLikelihoodKernel(LikelihoodKernel lk);
+ virtual void setLikelihoodKernel(LikelihoodKernel lk, int num_threads);
virtual void changeLikelihoodKernel(LikelihoodKernel lk);
@@ -265,6 +265,7 @@ public:
virtual void changeNNIBrans(NNIMove nnimove);
/**
+ OBSOLETE!
* Restore the branch lengths from the saved values
* @param node the current node of the post-order tree traversal
* @param dad the dad of that node used to direct the traversal
@@ -321,7 +322,7 @@ public:
* compute the memory size required for storing partial likelihood vectors
* @return memory size required in bytes
*/
- virtual uint64_t getMemoryRequired(size_t ncategory = 1);
+ virtual uint64_t getMemoryRequired(size_t ncategory = 1, bool full_mem = false);
/**
* count the number of super branches that map to no branches in gene trees
@@ -338,7 +339,7 @@ public:
*/
virtual int fixNegativeBranch(bool force = false, Node *node = NULL, Node *dad = NULL);
- virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
+ virtual int computeParsimonyBranchObsolete(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
/** True when mixed codon with other data type */
bool rescale_codon_brlen;
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index 929be9a..753d84f 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -129,7 +129,7 @@ double PartitionModelPlen::optimizeParameters(int fixed_len, bool write_info, do
cur_lh = 0.0;
if (tree->part_order.empty()) tree->computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: cur_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
+ #pragma omp parallel for reduction(+: cur_lh) schedule(dynamic) if(tree->num_threads > 1)
#endif
for (int partid = 0; partid < ntrees; partid++) {
int part = tree->part_order[partid];
@@ -229,15 +229,28 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
double score = 0.0;
double nsites = tree->getAlnNSite();
+ DoubleVector brlen;
+ brlen.resize(tree->branchNum);
+ tree->getBranchLengths(brlen);
+ double max_brlen = 0.0;
+ for (i = 0; i < brlen.size(); i++)
+ if (brlen[i] > max_brlen)
+ max_brlen = brlen[i];
+
if (tree->part_order.empty()) tree->computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->size() >= tree->params->num_threads)
+ #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->num_threads > 1)
#endif
for (int j = 0; j < tree->size(); j++) {
int i = tree->part_order[j];
+ double min_scaling = 1.0/tree->at(i)->getAlnNSite();
double max_scaling = nsites / tree->at(i)->getAlnNSite();
- tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(1.0/tree->at(i)->getAlnNSite(), tree->part_info[i].part_rate, max_scaling, gradient_epsilon);
+ if (max_scaling < tree->part_info[i].part_rate)
+ max_scaling = tree->part_info[i].part_rate;
+ if (min_scaling > tree->part_info[i].part_rate)
+ min_scaling = tree->part_info[i].part_rate;
+ tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(min_scaling, tree->part_info[i].part_rate, max_scaling, gradient_epsilon);
score += tree->part_info[i].cur_score;
}
// now normalize the rates
@@ -251,6 +264,12 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
nsite += tree->at(i)->aln->getNSite();
}
sum /= nsite;
+
+ if (sum > tree->params->max_branch_length / max_brlen) {
+ cerr << endl << "ERROR: Too high (saturated) partition rates of the proportion partition model!"
+ << endl << "Please switch to the edge-equal partition model via -q option instead of -spp" << endl << endl;
+ exit(EXIT_FAILURE);
+ }
tree->scaleLength(sum);
sum = 1.0/sum;
for (i = 0; i < tree->size(); i++)
@@ -324,6 +343,8 @@ void PhyloSuperTreePlen::deleteAllPartialLh() {
(*it)->_pattern_lh = NULL;
(*it)->_pattern_lh_cat = NULL;
(*it)->theta_all = NULL;
+ (*it)->buffer_scale_all = NULL;
+ (*it)->buffer_partial_lh = NULL;
(*it)->ptn_freq = NULL;
(*it)->ptn_freq_computed = false;
(*it)->ptn_invar = NULL;
@@ -343,6 +364,8 @@ PhyloSuperTreePlen::~PhyloSuperTreePlen()
(*it)->_pattern_lh = NULL;
(*it)->_pattern_lh_cat = NULL;
(*it)->theta_all = NULL;
+ (*it)->buffer_scale_all = NULL;
+ (*it)->buffer_partial_lh = NULL;
(*it)->ptn_freq = NULL;
(*it)->ptn_freq_computed = false;
(*it)->ptn_invar = NULL;
@@ -453,7 +476,7 @@ void PhyloSuperTreePlen::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, b
if (part_order.empty()) computePartitionOrder();
// bug fix: assign cur_score into part_info
#ifdef _OPENMP
- #pragma omp parallel for private(part) schedule(dynamic) if(size() >= params->num_threads)
+ #pragma omp parallel for private(part) schedule(dynamic) if(num_threads > 1)
#endif
for (int partid = 0; partid < size(); partid++) {
part = part_order_by_nptn[partid];
@@ -493,7 +516,7 @@ double PhyloSuperTreePlen::computeFunction(double value) {
if (part_order.empty()) computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
+ #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
#endif
for (int partid = 0; partid < ntrees; partid++) {
int part = part_order_by_nptn[partid];
@@ -551,7 +574,7 @@ void PhyloSuperTreePlen::computeFuncDerv(double value, double &df_ret, double &d
if (part_order.empty()) computePartitionOrder();
#ifdef _OPENMP
- #pragma omp parallel for reduction(+: df, ddf) schedule(dynamic) if(ntrees >= params->num_threads)
+ #pragma omp parallel for reduction(+: df, ddf) schedule(dynamic) if(num_threads > 1)
#endif
for (int partid = 0; partid < ntrees; partid++) {
int part = part_order_by_nptn[partid];
@@ -567,6 +590,7 @@ void PhyloSuperTreePlen::computeFuncDerv(double value, double &df_ret, double &d
if(nei1_part->length<-1e-4){
cout<<"lambda = "<<lambda<<endl;
cout<<"NEGATIVE BRANCH len = "<<nei1_part->length<<endl<<" rate = "<<part_info[part].part_rate<<endl;
+ assert(0);
outError("shit!! ",__func__);
}
at(part)->computeLikelihoodDerv(nei2_part,(PhyloNode*)nei1_part->node, df_aux, ddf_aux);
@@ -628,10 +652,20 @@ NNIMove PhyloSuperTreePlen::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2
// Initialize node1 and node2 in nniMoves
nniMoves[0].node1 = nniMoves[1].node1 = node1;
nniMoves[0].node2 = nniMoves[1].node2 = node2;
+ nniMoves[0].newloglh = nniMoves[1].newloglh = -DBL_MAX;
+
+ // check for compatibility with constraint
+ // check for consistency with constraint tree
+ for (cnt = 0; cnt < 2; cnt++) {
+ if (!constraintTree.isCompatible(nniMoves[cnt])) {
+ nniMoves[cnt].node1 = nniMoves[cnt].node2 = NULL;
+ }
+ }
//--------------------------------------------------------------------------
- this->swapNNIBranch(0.0, node1, node2, &nni_param, nniMoves);
+ if (nniMoves[0].node1 || nniMoves[1].node1)
+ this->swapNNIBranch(0.0, node1, node2, &nni_param, nniMoves);
// restore curScore
@@ -651,8 +685,8 @@ NNIMove PhyloSuperTreePlen::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2
return myMove;
}
-void PhyloSuperTreePlen::doNNIs(int nni2apply, bool changeBran) {
- IQTree::doNNIs(nni2apply, changeBran);
+void PhyloSuperTreePlen::doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran) {
+ IQTree::doNNIs(compatibleNNIs, changeBran);
mapBranchLen();
//clearAllPartialLH();
}
@@ -954,6 +988,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
//evalNNIs++;
//part_info[part].evalNNIs++;
+ int mem_id = 0;
+
// one branch optimization ------------------------------------------------------------------
for(id = 0; id < 2; id++){
/*
@@ -969,8 +1005,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
// Create a new PhyloNeighbor, with new partial lhs, scale number and set the branch id as before
*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, saved_nei[id]->link_neighbors[part]->length);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
+ if (saved_nei[id]->link_neighbors[part]->partial_lh) {
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (mem_id*total_block_size + lh_addr);
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (mem_id*total_scale_block_size + scale_addr);
+ mem_id++;
+ }
(*sub_saved_it[part*6 + id])->id = saved_nei[id]->link_neighbors[part]->id;
// update link_neighbor[part]: for New SuperNeighbor we set the corresponding new PhyloNeighbor on partition part
@@ -984,8 +1023,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
node_link = ((SuperNeighbor*)(*node_nei_it[id-2]))->link_neighbors[part]->node;
sub_saved_it[part*6 + id] = node_link->findNeighborIt(nei_link);
*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, saved_nei[id]->link_neighbors[part]->length);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
+ if (saved_nei[id]->link_neighbors[part]->partial_lh) {
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (mem_id*total_block_size + lh_addr);
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (mem_id*total_scale_block_size + scale_addr);
+ mem_id++;
+ }
(*sub_saved_it[part*6 + id])->id = saved_nei[id]->link_neighbors[part]->id;
// update link_neighbor[part]
@@ -993,8 +1035,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
}
}
+ assert(mem_id == 2);
+
} else if(is_nni[part]==NNI_ONE_EPSILON){
+ int mem_id = 0;
// Make sure to update all the necessary link_neighbors and take care of branch lengths
// (increase/decrease by central branch where necessary).
@@ -1028,8 +1073,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
sub_saved_branch[6*part + id] = nei->link_neighbors[part]->length;
*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, nei->link_neighbors[part]->length);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
- ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
+ if (nei->link_neighbors[part]->partial_lh) {
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (mem_id*total_block_size + lh_addr);
+ ((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (mem_id*total_scale_block_size + scale_addr);
+ mem_id++;
+ }
(*sub_saved_it[part*6 + id])->id = nei->link_neighbors[part]->id;
// If nni5 we update the link neighbors already here, otherwise
@@ -1043,6 +1091,7 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
id_eps[part] = id;
}
}
+ assert(mem_id == 1);
}else if(is_nni[part]==NNI_THREE_EPSILON && params->nni5){
// you fill out link neighbors vector for newly allocated SuperNeighbors
for(id = 2; id < 6; id++){
@@ -1093,7 +1142,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
* - restore if necessary.
*===========================================================================================*/
int cnt;
- for (cnt = 0; cnt < 2; cnt++) {
+ for (cnt = 0; cnt < 2; cnt++) if (nniMoves[cnt].node1) // only if nniMove satisfy constraint
+ {
//cout<<"NNI Loop-----------------------------NNI."<<cnt<<endl;
NeighborVec::iterator node1_it = nniMoves[cnt].node1Nei_it;
@@ -1129,6 +1179,10 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
//cout<<part<<"- NO_EPS: do NNI swap"<<endl;
//allNNIcases_computed[0] += 1;
+ // reorient partial_lh before swap
+ reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
+ reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
+
// Do NNI swap on partition
node1_link[part]->updateNeighbor(node1_link_it[part], node2_link_nei[part]);
node2_link_nei[part]->node->updateNeighbor(node2_link[part], node1_link[part]);
@@ -1420,6 +1474,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
for(part = 0; part < ntrees; part++){
if(is_nni[part]==NNI_NO_EPSILON){
+
+ // reorient partial_lh before swap
+ reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
+ reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
+
node1_link[part]->updateNeighbor(node1_link_it[part], node1_link_nei[part]);
node1_link_nei[part]->node->updateNeighbor(node2_link[part], node1_link[part]);
node2_link[part]->updateNeighbor(node2_link_it[part], node2_link_nei[part]);
@@ -1752,10 +1811,17 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
block_size.resize(ntrees);
scale_block_size.resize(ntrees);
- vector<uint64_t> mem_size, lh_cat_size;
+ vector<uint64_t> mem_size, lh_cat_size, buffer_size;
mem_size.resize(ntrees);
lh_cat_size.resize(ntrees);
- uint64_t total_mem_size = 0, total_block_size = 0, total_lh_cat_size = 0;
+ buffer_size.resize(ntrees);
+
+ uint64_t
+ total_mem_size = 0,
+ total_block_size = 0,
+ total_scale_block_size = 0,
+ total_lh_cat_size = 0,
+ total_buffer_size = 0;
if (part_order.empty())
computePartitionOrder();
@@ -1763,20 +1829,21 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
for (partid = 0; partid < ntrees; partid++) {
part = part_order[partid];
it = begin() + part;
- size_t nptn = (*it)->getAlnNPattern() + (*it)->aln->num_states; // extra #numStates for ascertainment bias correction
- if (instruction_set >= 7)
- mem_size[part] = ((nptn +3)/4)*4;
- else
- mem_size[part] = ((nptn % 2) == 0) ? nptn : (nptn + 1);
- scale_block_size[part] = nptn;
- block_size[part] = mem_size[part] * (*it)->aln->num_states * (*it)->getRate()->getNRate() *
+ // extra #numStates for ascertainment bias correction
+ mem_size[part] = get_safe_upper_limit((*it)->getAlnNPattern()) + get_safe_upper_limit((*it)->aln->num_states);
+ size_t mem_cat_size = mem_size[part] * (*it)->getRate()->getNRate() *
(((*it)->model_factory->fused_mix_rate)? 1 : (*it)->getModel()->getNMixtures());
+ block_size[part] = mem_cat_size * (*it)->aln->num_states;
+ scale_block_size[part] = mem_cat_size;
+
lh_cat_size[part] = mem_size[part] * (*it)->getRate()->getNDiscreteRate() *
(((*it)->model_factory->fused_mix_rate)? 1 : (*it)->getModel()->getNMixtures());
total_mem_size += mem_size[part];
total_block_size += block_size[part];
+ total_scale_block_size += scale_block_size[part];
total_lh_cat_size += lh_cat_size[part];
+ total_buffer_size += (buffer_size[part] = (*it)->getBufferPartialLhSize());
}
if (!_pattern_lh)
@@ -1787,7 +1854,13 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
at(part_order[0])->_pattern_lh_cat = _pattern_lh_cat;
if (!theta_all)
theta_all = aligned_alloc<double>(total_block_size);
+ if (!buffer_scale_all)
+ buffer_scale_all = aligned_alloc<double>(total_mem_size);
+ if (!buffer_partial_lh)
+ buffer_partial_lh = aligned_alloc<double>(total_buffer_size);
at(part_order[0])->theta_all = theta_all;
+ at(part_order[0])->buffer_scale_all = buffer_scale_all;
+ at(part_order[0])->buffer_partial_lh = buffer_partial_lh;
if (!ptn_freq) {
ptn_freq = aligned_alloc<double>(total_mem_size);
ptn_freq_computed = false;
@@ -1798,14 +1871,15 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
ptn_invar = aligned_alloc<double>(total_mem_size);
at(part_order[0])->ptn_invar = ptn_invar;
- size_t IT_NUM = (params->nni5) ? 6 : 2;
+// size_t IT_NUM = (params->nni5) ? 6 : 2;
+ size_t IT_NUM = 2;
if (!nni_partial_lh) {
nni_partial_lh = aligned_alloc<double>(IT_NUM*total_block_size);
}
at(part_order[0])->nni_partial_lh = nni_partial_lh;
if (!nni_scale_num) {
- nni_scale_num = aligned_alloc<UBYTE>(IT_NUM*total_mem_size);
+ nni_scale_num = aligned_alloc<UBYTE>(IT_NUM*total_scale_block_size);
}
at(part_order[0])->nni_scale_num = nni_scale_num;
@@ -1816,11 +1890,13 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
(*it)->_pattern_lh = (*prev_it)->_pattern_lh + mem_size[part];
(*it)->_pattern_lh_cat = (*prev_it)->_pattern_lh_cat + lh_cat_size[part];
(*it)->theta_all = (*prev_it)->theta_all + block_size[part];
+ (*it)->buffer_scale_all = (*prev_it)->buffer_scale_all + mem_size[part];
+ (*it)->buffer_partial_lh = (*prev_it)->buffer_partial_lh + buffer_size[part];
(*it)->ptn_freq = (*prev_it)->ptn_freq + mem_size[part];
(*it)->ptn_freq_computed = false;
(*it)->ptn_invar = (*prev_it)->ptn_invar + mem_size[part];
(*it)->nni_partial_lh = (*prev_it)->nni_partial_lh + IT_NUM*block_size[part];
- (*it)->nni_scale_num = (*prev_it)->nni_scale_num + IT_NUM*mem_size[part];
+ (*it)->nni_scale_num = (*prev_it)->nni_scale_num + IT_NUM*scale_block_size[part];
}
// compute total memory for all partitions
@@ -1873,6 +1949,32 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
tip_partial_lh_size = ((tip_partial_lh_size+3)/4)*4;
lh_addr += tip_partial_lh_size;
}
+
+ // 2016-09-29: redirect partial_lh when root does not occur in partition tree
+ SuperNeighbor *root_nei = (SuperNeighbor*)root->neighbors[0];
+ for (it = begin(), part = 0; it != end(); it++, part++) {
+ if (root_nei->link_neighbors[part])
+ continue;
+ NodeVector nodes;
+ (*it)->getInternalNodes(nodes);
+ for (NodeVector::iterator nit = nodes.begin(); nit != nodes.end(); nit++) {
+ bool has_partial_lh = false;
+ FOR_NEIGHBOR_IT(*nit, NULL, neiit)
+ if ( ((PhyloNeighbor*)(*neiit)->node->findNeighbor(*nit))->partial_lh) {
+ has_partial_lh = true;
+ break;
+ }
+ if (has_partial_lh)
+ continue;
+ // add partial_lh
+ PhyloNeighbor *back_nei = (PhyloNeighbor*)(*nit)->neighbors[0]->node->findNeighbor(*nit);
+ back_nei->partial_lh = lh_addr;
+ back_nei->scale_num = scale_addr;
+ lh_addr = lh_addr + block_size[part];
+ scale_addr = scale_addr + scale_block_size[part];
+ }
+ }
+
}
void PhyloSuperTreePlen::initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_addr, UINT* &pars_addr, PhyloNode *node, PhyloNode *dad) {
@@ -1889,7 +1991,7 @@ void PhyloSuperTreePlen::initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_
PhyloNeighbor *nei_part_back = nei_back->link_neighbors[part];
- if (params->lh_mem_save == LM_PER_NODE && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
+ if (params->lh_mem_save == LM_PER_NODE) {
if (!nei_part_back->node->isLeaf()) {
if (!nei_part_back->partial_lh) {
nei_part_back->partial_lh = lh_addr;
@@ -1904,7 +2006,7 @@ void PhyloSuperTreePlen::initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_
// nei_part->partial_lh = NULL;
// nei_part->scale_num = NULL;
} else {
- if (nei_part->node->isLeaf() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
+ if (nei_part->node->isLeaf()) {
nei_part->partial_lh = NULL; // do not allocate memory for tip, use tip_partial_lh instead
nei_part->scale_num = NULL;
} else if (!nei_part->partial_lh) {
@@ -1917,7 +2019,7 @@ void PhyloSuperTreePlen::initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_
// pars_addr += partial_pars_entries[part];
nei_part = nei_back->link_neighbors[part];
- if (nei_part->node->isLeaf() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
+ if (nei_part->node->isLeaf()) {
nei_part->partial_lh = NULL; // do not allocate memory for tip, use tip_partial_lh instead
nei_part->scale_num = NULL;
} else if (!nei_part->partial_lh) {
diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h
index 9c844d3..88dd177 100644
--- a/phylosupertreeplen.h
+++ b/phylosupertreeplen.h
@@ -313,12 +313,13 @@ public:
@param move the single NNI
*/
virtual void doNNI(NNIMove &move, bool clearLH = true);
- /**
- apply nni2apply NNIs from the non-conflicting NNI list
- @param nni2apply number of NNIs to apply from the list
+
+ /**
+ apply NNIs from the non-conflicting NNI list
+ @param compatibleNNIs vector of all compatible NNIs
@param changeBran whether or not the computed branch lengths should be applied
*/
- virtual void doNNIs(int nni2apply, bool changeBran = true);
+ virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);
/**
* Apply 5 new branch lengths stored in the NNI move
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 0a318cd..2ddc637 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -35,7 +35,7 @@
#include "phyloanalysis.h"
#include "gsl/mygsl.h"
-#include "vectorclass/vectorclass.h"
+//#include "vectorclass/vectorclass.h"
/******* Binary model set ******/
@@ -241,6 +241,59 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh,
delete[] pattern_lh;
}
+void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh,
+ bool append, const char *linename) {
+
+ assert(tree->isSuperTree());
+ PhyloSuperTree *stree = (PhyloSuperTree*)tree;
+ int i;
+ double *pattern_lh;
+ if (!ptn_lh) {
+ pattern_lh = new double[tree->getAlnNPattern()];
+ tree->computePatternLikelihood(pattern_lh);
+ } else
+ pattern_lh = ptn_lh;
+
+ double partition_lh[stree->size()];
+ int part;
+ double *pattern_lh_ptr = pattern_lh;
+ for (part = 0; part < stree->size(); part++) {
+ size_t nptn = stree->at(part)->getAlnNPattern();
+ partition_lh[part] = 0.0;
+ for (i = 0; i < nptn; i++)
+ partition_lh[part] += pattern_lh_ptr[i] * stree->at(part)->ptn_freq[i];
+ pattern_lh_ptr += nptn;
+ }
+
+ try {
+ ofstream out;
+ out.exceptions(ios::failbit | ios::badbit);
+ if (append) {
+ out.open(filename, ios::out | ios::app);
+ } else {
+ out.open(filename);
+ out << 1 << " " << stree->size() << endl;
+ }
+ if (!linename)
+ out << "Part_Lh ";
+ else {
+ out.width(10);
+ out << left << linename;
+ }
+ for (i = 0; i < stree->size(); i++)
+ out << " " << partition_lh[i];
+ out << endl;
+ out.close();
+ if (!append)
+ cout << "Partition log-likelihoods printed to " << filename << endl;
+ } catch (ios::failure) {
+ outError(ERR_WRITE_OUTPUT, filename);
+ }
+
+ if (!ptn_lh)
+ delete[] pattern_lh;
+}
+
void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) {
if (tree->isSuperTree()) {
@@ -267,7 +320,7 @@ void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl)
double *pattern_lh, *pattern_lh_cat;
int i;
pattern_lh = new double[tree->getAlnNPattern()];
- pattern_lh_cat = new double[tree->getAlnNPattern()*ncat];
+ pattern_lh_cat = new double[((size_t)tree->getAlnNPattern())*ncat];
tree->computePatternLikelihood(pattern_lh, NULL, pattern_lh_cat, wsl);
@@ -339,6 +392,120 @@ void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl)
}
+void printAncestralSequences(const char *out_prefix, PhyloTree *tree, AncestralSeqType ast) {
+
+ int i, j, nsites = tree->getAlnNSite(), nstates = tree->aln->num_states, nptn = tree->getAlnNPattern();
+
+ int *joint_ancestral = NULL;
+
+ if (tree->params->print_ancestral_sequence == AST_JOINT) {
+ joint_ancestral = new int[nptn*tree->leafNum];
+ tree->computeJointAncestralSequences(joint_ancestral);
+ }
+
+ string filename = (string)out_prefix + ".ancestralprob";
+ string filenameseq = (string)out_prefix + ".ancestralseq";
+
+ try {
+ ofstream out;
+ out.exceptions(ios::failbit | ios::badbit);
+ out.open(filename.c_str());
+
+ ofstream outseq;
+ outseq.exceptions(ios::failbit | ios::badbit);
+ outseq.open(filenameseq.c_str());
+
+ NodeVector nodes;
+ tree->getInternalNodes(nodes);
+ IntVector pattern_index;
+ tree->aln->getSitePatternIndex(pattern_index);
+
+ double *marginal_ancestral_prob = new double[nptn * tree->getModel()->num_states];
+ int *marginal_ancestral_seq = new int[nptn];
+
+ out << "Node\tSite\tMargin";
+ for (i = 0; i < nstates; i++)
+ out << "\tp_" << tree->aln->convertStateBackStr(i);
+ out << endl;
+
+ if (tree->params->print_ancestral_sequence == AST_JOINT)
+ outseq << 2*(tree->nodeNum-tree->leafNum) << " " << nsites << endl;
+ else
+ outseq << (tree->nodeNum-tree->leafNum) << " " << nsites << endl;
+
+ int name_width = max(tree->aln->getMaxSeqNameLength(),6)+10;
+
+ for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++) {
+ PhyloNode *node = (PhyloNode*)(*it);
+ PhyloNode *dad = (PhyloNode*)node->neighbors[0]->node;
+ tree->computeMarginalAncestralProbability((PhyloNeighbor*)dad->findNeighbor(node), dad, marginal_ancestral_prob);
+
+ int *joint_ancestral_node = joint_ancestral + (node->id - tree->leafNum)*nptn;
+
+ // compute state with highest probability
+ for (i = 0; i < nptn; i++) {
+ double *prob = marginal_ancestral_prob + (i*nstates);
+ int state_best = 0;
+ for (j = 1; j < nstates; j++)
+ if (prob[j] > prob[state_best])
+ state_best = j;
+ //if (fabs(prob[state_best]-flat_prob) < 1e-5)
+ if (prob[state_best] < tree->params->min_ancestral_prob)
+ state_best = STATE_INVALID;
+ marginal_ancestral_seq[i] = state_best;
+ }
+
+ // set node name if neccessary
+ if (node->name.empty() || !isalpha(node->name[0])) {
+ node->name = "Node" + convertIntToString(node->id-tree->leafNum+1);
+ }
+
+ // print ancestral state probabilities
+ for (i = 0; i < nsites; i++) {
+ int ptn = pattern_index[i];
+ out << node->name << "\t" << i+1 << "\t";
+ if (tree->params->print_ancestral_sequence == AST_JOINT)
+ out << tree->aln->convertStateBackStr(joint_ancestral_node[ptn]) << "\t";
+ out << tree->aln->convertStateBackStr(marginal_ancestral_seq[ptn]);
+ for (j = 0; j < nstates; j++) {
+ out << "\t" << marginal_ancestral_prob[ptn*nstates+j];
+ }
+ out << endl;
+ }
+
+ // print ancestral sequences
+ outseq.width(name_width);
+ outseq << left << (node->name+"_marginal") << " ";
+ for (i = 0; i < nsites; i++)
+ outseq << tree->aln->convertStateBackStr(marginal_ancestral_seq[pattern_index[i]]);
+ outseq << endl;
+
+ if (tree->params->print_ancestral_sequence == AST_JOINT) {
+ outseq.width(name_width);
+ outseq << left << (node->name+"_joint") << " ";
+ for (i = 0; i < nsites; i++)
+ outseq << tree->aln->convertStateBackStr(joint_ancestral_node[pattern_index[i]]);
+ outseq << endl;
+ }
+ }
+
+ delete[] marginal_ancestral_seq;
+ delete[] marginal_ancestral_prob;
+
+ out.close();
+ outseq.close();
+ cout << "Ancestral state probabilities printed to " << filename << endl;
+ cout << "Ancestral sequences printed to " << filenameseq << endl;
+
+ } catch (ios::failure) {
+ outError(ERR_WRITE_OUTPUT, filename);
+ }
+
+ if (joint_ancestral)
+ delete[] joint_ancestral;
+
+}
+
void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) {
if (wsl == WSL_NONE || wsl == WSL_SITE)
@@ -357,7 +524,7 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws
}
}
size_t cat, ncat = tree->getNumLhCat(wsl);
- double *ptn_prob_cat = new double[tree->getAlnNPattern()*ncat];
+ double *ptn_prob_cat = new double[((size_t)tree->getAlnNPattern())*ncat];
tree->computePatternProbabilityCategory(ptn_prob_cat, wsl);
try {
@@ -415,7 +582,7 @@ void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freq
if (state_freqs) {
ptn_state_freq = state_freqs;
} else {
- ptn_state_freq = new double[tree->getAlnNPattern() * nstates];
+ ptn_state_freq = new double[((size_t)tree->getAlnNPattern()) * nstates];
tree->computePatternStateFreq(ptn_state_freq);
}
@@ -650,8 +817,8 @@ int getModelList(Params ¶ms, Alignment *aln, StrVector &models, bool separat
}
}
- bool with_new = params.model_name.find("NEW") != string::npos;
- bool with_asc = params.model_name.find("ASC") != string::npos;
+ bool with_new = params.model_name.find("NEW") != string::npos;
+ bool with_asc = params.model_name.find("ASC") != string::npos;
// if (seq_type == SEQ_CODON) {
// for (i = 0; i < noptions; i++)
@@ -849,6 +1016,7 @@ void mergePartitions(PhyloSuperTree* super_tree, vector<IntVector> &gene_sets, S
part_info.push_back(info);
Alignment *aln = super_aln->concatenateAlignments(*it);
PhyloTree *tree = super_tree->extractSubtree(*it);
+ tree->setParams(super_tree->params);
tree->setAlignment(aln);
tree_vec.push_back(tree);
}
@@ -861,6 +1029,7 @@ void mergePartitions(PhyloSuperTree* super_tree, vector<IntVector> &gene_sets, S
delete super_tree->aln;
super_tree->aln = new SuperAlignment(super_tree);
+ super_tree->setAlignment(super_tree->aln);
}
void printModelFile(ostream &fmodel, Params ¶ms, PhyloTree *tree, ModelInfo &info, string &set_name) {
@@ -916,7 +1085,7 @@ void printModelFile(ostream &fmodel, Params ¶ms, PhyloTree *tree, ModelInfo
* @param model_info (IN/OUT) all model information
* @return total number of parameters
*/
-void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block ) {
+void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block, int num_threads) {
// params.print_partition_info = true;
// params.print_conaln = true;
int i = 0;
@@ -927,15 +1096,25 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
double lhsum = 0.0;
int dfsum = 0;
int ssize = in_tree->getAlnNSite();
- int num_model = 0;
- int total_num_model = in_tree->size();
+ int64_t num_model = 0;
+ int64_t total_num_model = in_tree->size();
if (params.model_name.find("LINK") != string::npos || params.model_name.find("MERGE") != string::npos) {
double p = params.partfinder_rcluster/100.0;
total_num_model += round(in_tree->size()*(in_tree->size()-1)*p/2);
for (i = in_tree->size()-2; i > 0; i--)
total_num_model += max(round(i*p), 1.0);
}
-
+
+
+#ifdef _OPENMP
+ if (num_threads <= 0) {
+ // partition selection scales well with many cores
+ num_threads = min((int64_t)countPhysicalCPUCores(), total_num_model);
+ omp_set_num_threads(num_threads);
+ cout << "NUMBER OF THREADS FOR PARTITION FINDING: " << num_threads << endl;
+ }
+#endif
+
double start_time = getRealTime();
cout << "Selecting individual models for " << in_tree->size() << " charsets using " << criterionName(params.model_test_criterion) << "..." << endl;
@@ -957,7 +1136,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
dist[i] = -((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states;
}
- if (params.num_threads > 1)
+ if (num_threads > 1)
{
quicksort(dist, 0, in_tree->size()-1, distID);
if (verbose_mode >= VB_MED) {
@@ -981,7 +1160,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
stringstream this_fmodel;
// do the computation
//#ifdef _OPENMP
- string model = testModel(params, this_tree, part_model_info, this_fmodel, models_block, in_tree->part_info[i].name);
+ string model = testModel(params, this_tree, part_model_info, this_fmodel, models_block, 1, in_tree->part_info[i].name);
//#else
// string model = testModel(params, this_tree, part_model_info, fmodel, in_tree->part_info[i].name);
//#endif
@@ -1078,7 +1257,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
this_aln = in_tree->at(distID[i] & ((1<<16)-1))->aln;
dist[i] -= ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states;
}
- if (params.num_threads > 1 && num_pairs >= 1)
+ if (num_threads > 1 && num_pairs >= 1)
quicksort(dist, 0, num_pairs-1, distID);
#ifdef _OPENMP
@@ -1127,7 +1306,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
tree->setCheckpoint(new Checkpoint());
}
//#ifdef _OPENMP
- model = testModel(params, tree, part_model_info, this_fmodel, models_block, set_name);
+ model = testModel(params, tree, part_model_info, this_fmodel, models_block, 1, set_name);
//#else
// model = testModel(params, tree, part_model_info, fmodel, set_name);
//#endif
@@ -1160,7 +1339,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
cout.width(11);
cout << score << " " << set_name;
if (num_model >= 10) {
- double remain_time = max(total_num_model-num_model, 0)*(getRealTime()-start_time)/num_model;
+ double remain_time = max(total_num_model-num_model, (int64_t)0)*(getRealTime()-start_time)/num_model;
cout << "\t" << convert_time(getRealTime()-start_time) << " ("
<< convert_time(remain_time) << " left)";
}
@@ -1233,7 +1412,8 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, vector<ModelInf
delete [] distID;
delete [] dist;
- mergePartitions(in_tree, gene_sets, model_names);
+ if (gene_sets.size() < in_tree->size())
+ mergePartitions(in_tree, gene_sets, model_names);
in_tree->printBestPartition((string(params.out_prefix) + ".best_scheme.nex").c_str());
in_tree->printBestPartitionRaxml((string(params.out_prefix) + ".best_scheme").c_str());
}
@@ -1253,7 +1433,7 @@ bool isMixtureModel(ModelsBlock *models_block, string &model_str) {
}
string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block,
- string set_name, bool print_mem_usage)
+ int num_threads, string set_name, bool print_mem_usage)
{
SeqType seq_type = in_tree->aln->seq_type;
if (in_tree->isSuperTree())
@@ -1283,14 +1463,13 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
#endif
}
-
string best_model = "";
/* first check the model file */
if (in_tree->isSuperTree()) {
// select model for each partition
PhyloSuperTree *stree = (PhyloSuperTree*)in_tree;
- testPartitionModel(params, stree, model_info, fmodel, models_block);
+ testPartitionModel(params, stree, model_info, fmodel, models_block, num_threads);
// stree->linkTrees();
stree->mapTrees();
string res_models = "";
@@ -1302,26 +1481,25 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
}
in_tree->optimize_by_newton = params.optimize_by_newton;
- in_tree->setLikelihoodKernel(params.SSE);
+ in_tree->setLikelihoodKernel(params.SSE, num_threads);
// int num_rate_classes = 3 + params.max_rate_cats;
RateHeterogeneity ** rate_class = new RateHeterogeneity*[4];
rate_class[0] = new RateHeterogeneity();
- rate_class[1] = new RateInvar(-1, NULL);
- rate_class[2] = new RateGamma(params.num_rate_cats, params.gamma_shape, params.gamma_median, NULL);
- rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_alg_gammai, NULL, false);
+ rate_class[1] = new RateInvar(params.p_invar_sites, in_tree);
+ rate_class[2] = new RateGamma(params.num_rate_cats, params.gamma_shape, params.gamma_median, in_tree);
+ rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_alg_gammai, in_tree, false);
RateFree ** rate_class_free = new RateFree*[params.max_rate_cats-1];
for (model = 0; model < params.max_rate_cats-1; model++)
- rate_class_free[model] = new RateFree(model+2, params.gamma_shape, "", false, params.optimize_alg, NULL);
+ rate_class_free[model] = new RateFree(model+2, params.gamma_shape, "", false, params.optimize_alg, in_tree);
RateFreeInvar ** rate_class_freeinvar = new RateFreeInvar*[params.max_rate_cats-1];
for (model = 0; model < params.max_rate_cats-1; model++) {
- rate_class_freeinvar[model] = new RateFreeInvar(model+2, params.gamma_shape, "", false, in_tree->aln->frac_const_sites/2.0, params.optimize_alg, NULL);
- rate_class_freeinvar[model]->setFixPInvar(false);
+ rate_class_freeinvar[model] = new RateFreeInvar(model+2, params.gamma_shape, "", false, params.p_invar_sites, params.optimize_alg, in_tree);
}
@@ -1415,7 +1593,7 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
}
} else {
// kernel might be changed if mixture model was tested
- in_tree->setLikelihoodKernel(params.SSE);
+ in_tree->setLikelihoodKernel(params.SSE, num_threads);
// normal model
if (model_names[model].find("+ASC") != string::npos) {
model_fac->unobserved_ptns = in_tree->aln->getUnobservedConstPatterns();
@@ -1506,6 +1684,13 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
tree->clearAllPartialLH();
+#ifdef _OPENMP
+ if (num_threads <= 0) {
+ num_threads = tree->testNumThreads();
+ omp_set_num_threads(num_threads);
+ }
+#endif
+
// optimize model parameters
ModelInfo info;
@@ -1560,6 +1745,13 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
// set checkpoint
iqtree->setCheckpoint(in_tree->getCheckpoint());
iqtree->num_precision = in_tree->num_precision;
+
+ // clear all checkpointed information
+ Checkpoint *newCheckpoint = new Checkpoint;
+ iqtree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree");
+ iqtree->getCheckpoint()->clear();
+ iqtree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end());
+ delete newCheckpoint;
cout << endl << "===> Testing model " << model+1 << ": " << params.model_name << endl;
runTreeReconstruction(params, original_model, *iqtree, model_info);
@@ -1571,7 +1763,7 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
tree = iqtree;
// clear all checkpointed information
- Checkpoint *newCheckpoint = new Checkpoint;
+ newCheckpoint = new Checkpoint;
tree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree");
tree->getCheckpoint()->clear();
tree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end());
@@ -1594,6 +1786,8 @@ string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_in
tree->fixNegativeBranch(true);
tree->clearAllPartialLH();
}
+ if (verbose_mode >= VB_MED)
+ cout << "Optimizing model " << info.name << endl;
info.logl = tree->getModelFactory()->optimizeParameters(false, false, TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST);
info.tree_len = tree->treeLength();
if (prev_model_id >= 0) {
@@ -2259,14 +2453,7 @@ void performAUTest(Params ¶ms, PhyloTree *tree, double *pattern_lhs, vector<
for (ptn = 0; ptn < nptn; ptn++)
tree_lh += pattern_lh[ptn] * boot_sample_dbl[ptn];
} else {
-#ifdef BINARY32
- tree_lh = tree->dotProductSIMD<double, Vec2d, 2>(pattern_lh, boot_sample_dbl, nptn);
-#else
- if (instruction_set >= 7)
- tree_lh = tree->dotProductSIMD<double, Vec4d, 4>(pattern_lh, boot_sample_dbl, nptn);
- else
- tree_lh = tree->dotProductSIMD<double, Vec2d, 2>(pattern_lh, boot_sample_dbl, nptn);
-#endif
+ tree_lh = tree->dotProductDoubleCall(pattern_lh, boot_sample_dbl, nptn);
}
// rescale lh
tree_lh /= r[k];
@@ -2468,7 +2655,7 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
cout << endl;
//MTreeSet trees(params.treeset_file, params.is_rooted, params.tree_burnin, params.tree_max_count);
cout << "Reading trees in " << params.treeset_file << " ..." << endl;
- int ntrees = countDistinctTrees(params.treeset_file, params.is_rooted, tree, distinct_ids, params.distinct_trees);
+ size_t ntrees = countDistinctTrees(params.treeset_file, params.is_rooted, tree, distinct_ids, params.distinct_trees);
if (ntrees < distinct_ids.size()) {
cout << "WARNING: " << distinct_ids.size() << " trees detected but only " << ntrees << " distinct trees will be evaluated" << endl;
} else {
@@ -2498,10 +2685,22 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
site_lh_out.close();
}
+ if (params.print_partition_lh && !tree->isSuperTree()) {
+ outWarning("-wpl does not work with non-partition model");
+ params.print_partition_lh = false;
+ }
+ string part_lh_file = params.out_prefix;
+ part_lh_file += ".partlh";
+ if (params.print_partition_lh) {
+ ofstream part_lh_out(part_lh_file.c_str());
+ part_lh_out << ntrees << " " << ((PhyloSuperTree*)tree)->size() << endl;
+ part_lh_out.close();
+ }
+
double time_start = getRealTime();
int *boot_samples = NULL;
- int boot;
+ size_t boot;
//double *saved_tree_lhs = NULL;
double *tree_lhs = NULL; // RELL score matrix of size #trees x #replicates
double *pattern_lh = NULL;
@@ -2509,8 +2708,8 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
double *orig_tree_lh = NULL; // Original tree log-likelihoods
double *max_lh = NULL;
double *lhdiff_weights = NULL;
- int nptn = tree->getAlnNPattern();
- int maxnptn = get_safe_upper_limit(nptn);
+ size_t nptn = tree->getAlnNPattern();
+ size_t maxnptn = get_safe_upper_limit(nptn);
if (params.topotest_replicates && ntrees > 1) {
size_t mem_size = (size_t)params.topotest_replicates*nptn*sizeof(int) +
@@ -2524,8 +2723,22 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
cout << "Creating " << params.topotest_replicates << " bootstrap replicates..." << endl;
if (!(boot_samples = new int [params.topotest_replicates*nptn]))
outError(ERR_NO_MEMORY);
+#ifdef _OPENMP
+ #pragma omp parallel private(boot) if(nptn > 10000)
+ {
+ int *rstream;
+ init_random(params.ran_seed + omp_get_thread_num(), false, &rstream);
+ #pragma omp for schedule(static)
+#else
+ int *rstream = randstream;
+#endif
for (boot = 0; boot < params.topotest_replicates; boot++)
- tree->aln->createBootstrapAlignment(boot_samples + (boot*nptn), params.bootstrap_spec);
+ tree->aln->createBootstrapAlignment(boot_samples + (boot*nptn), params.bootstrap_spec, rstream);
+#ifdef _OPENMP
+ finish_random(rstream);
+ }
+#endif
+ cout << "done" << endl;
//if (!(saved_tree_lhs = new double [ntrees * params.topotest_replicates]))
// outError(ERR_NO_MEMORY);
if (!(tree_lhs = new double [ntrees * params.topotest_replicates]))
@@ -2566,13 +2779,6 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
tree->setRootNode(params.root);
if (tree->isSuperTree())
((PhyloSuperTree*) tree)->mapTrees();
-// if ((tree->sse == LK_EIGEN || tree->sse == LK_EIGEN_SSE) && !tree->isBifurcating()) {
-// cout << "NOTE: Changing to old kernel as user tree is multifurcating" << endl;
-// if (tree->sse == LK_EIGEN)
-// tree->changeLikelihoodKernel(LK_NORMAL);
-// else
-// tree->changeLikelihoodKernel(LK_SSE);
-// }
tree->initializeAllPartialLh();
tree->fixNegativeBranch(false);
@@ -2600,6 +2806,10 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
string tree_name = "Tree" + convertIntToString(tree_index+1);
printSiteLh(site_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str());
}
+ if (params.print_partition_lh) {
+ string tree_name = "Tree" + convertIntToString(tree_index+1);
+ printPartitionLh(part_lh_file.c_str(), tree, pattern_lh, true, tree_name.c_str());
+ }
info[tid].logl = tree->getCurScore();
if (!params.topotest_replicates || ntrees <= 1) {
@@ -2612,7 +2822,7 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
for (boot = 0; boot < params.topotest_replicates; boot++) {
double lh = 0.0;
int *this_boot_sample = boot_samples + (boot*nptn);
- for (int ptn = 0; ptn < nptn; ptn++)
+ for (size_t ptn = 0; ptn < nptn; ptn++)
lh += pattern_lh[ptn] * this_boot_sample[ptn];
tree_lhs_offset[boot] = lh;
}
@@ -2693,9 +2903,9 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
}
double orig_max_lh = orig_tree_lh[0];
- int orig_max_id = 0;
+ size_t orig_max_id = 0;
double orig_2ndmax_lh = -DBL_MAX;
- int orig_2ndmax_id = -1;
+ size_t orig_2ndmax_id = -1;
// find the max tree ID
for (tid = 1; tid < ntrees; tid++)
if (orig_max_lh < orig_tree_lh[tid]) {
@@ -2716,7 +2926,7 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
// SH compute original deviation from max_lh
info[tid].kh_pvalue = 0.0;
info[tid].sh_pvalue = 0.0;
- int max_id = (tid != orig_max_id) ? orig_max_id : orig_2ndmax_id;
+ size_t max_id = (tid != orig_max_id) ? orig_max_id : orig_2ndmax_id;
double orig_diff = orig_tree_lh[max_id] - orig_tree_lh[tid] - avg_lh[tid];
double *max_kh = tree_lhs + (max_id * params.topotest_replicates);
for (boot = 0; boot < params.topotest_replicates; boot++) {
@@ -2752,7 +2962,7 @@ void evaluateTrees(Params ¶ms, IQTree *tree, vector<TreeInfo> &info, IntVect
info[tid].wkh_pvalue = 0.0;
info[tid].wsh_pvalue = 0.0;
double worig_diff = -DBL_MAX;
- int max_id = -1;
+ size_t max_id = -1;
for (tid2 = 0; tid2 < ntrees; tid2++)
if (tid2 != tid) {
double wdiff = (orig_tree_lh[tid2] - orig_tree_lh[tid])*lhdiff_weights[tid*ntrees+tid2];
diff --git a/phylotesting.h b/phylotesting.h
index 657a06d..63f9889 100644
--- a/phylotesting.h
+++ b/phylotesting.h
@@ -69,7 +69,7 @@ bool checkModelFile(string model_file, bool is_partitioned, vector<ModelInfo> &i
@return name of best-fit-model
*/
string testModel(Params ¶ms, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel,
- ModelsBlock *models_block, string set_name = "", bool print_mem_usage = false);
+ ModelsBlock *models_block, int num_threads, string set_name = "", bool print_mem_usage = false);
/**
* print site log likelihoods to a fileExists
@@ -83,6 +83,17 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL,
bool append = false, const char *linename = NULL);
/**
+ * print partition log likelihoods to a file
+ * @param filename output file name
+ * @param tree phylogenetic tree
+ * @param ptn_lh pattern log-likelihoods, will be computed if NULL
+ * @param append TRUE to append to existing file, FALSE otherwise
+ * @param linename name of the line, default "Site_Lh" if NULL
+ */
+void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL,
+ bool append = false, const char *linename = NULL);
+
+/**
* print site log likelihoods per category to a file
* @param filename output file name
* @param tree phylogenetic tree
@@ -104,6 +115,14 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws
void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs = NULL);
/**
+ print ancestral sequences
+ @param filename output file name
+ @param tree phylogenetic tree
+ @param ast either AST_MARGINAL or AST_JOINT
+*/
+void printAncestralSequences(const char*filename, PhyloTree *tree, AncestralSeqType ast);
+
+/**
* Evaluate user-trees with possibility of tree topology tests
* @param params program parameters
* @param tree current tree
diff --git a/phylotree.cpp b/phylotree.cpp
index b9143ce..5cd4c55 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -1,15 +1,24 @@
-//
-// C++ Implementation: phylotree
-//
-// Description:
-//
-//
-// Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui at univie.ac.at>, (C) 2008
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-
+/***************************************************************************
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
#include "phylotree.h"
#include "bionj.h"
//#include "rateheterogeneity.h"
@@ -21,8 +30,11 @@
#include "phylosupertree.h"
#include "phylosupertreeplen.h"
#include "upperbounds.h"
+#include "MPIHelper.h"
#include "model/modelmixture.h"
+const int LH_MIN_CONST = 1;
+
//const static int BINARY_SCALE = floor(log2(1/SCALING_THRESHOLD));
//const static double LOG_BINARY_SCALE = -(log(2) * BINARY_SCALE);
@@ -70,26 +82,24 @@ void PhyloTree::init() {
nni_scale_num = NULL;
central_partial_pars = NULL;
model_factory = NULL;
-// tmp_partial_lh1 = NULL;
-// tmp_partial_lh2 = NULL;
-// tmp_anscentral_state_prob1 = NULL;
-// tmp_anscentral_state_prob2 = NULL;
- //tmp_ptn_rates = NULL;
- //state_freqs = NULL;
-// tmp_scale_num1 = NULL;
-// tmp_scale_num2 = NULL;
discard_saturated_site = true;
_pattern_lh = NULL;
_pattern_lh_cat = NULL;
//root_state = STATE_UNKNOWN;
root_state = 126;
theta_all = NULL;
+ buffer_scale_all = NULL;
+ buffer_partial_lh = NULL;
ptn_freq = NULL;
ptn_invar = NULL;
subTreeDistComputed = false;
dist_matrix = NULL;
var_matrix = NULL;
- setLikelihoodKernel(LK_EIGEN_SSE); // FOR TUNG: you forgot to initialize this variable!
+ params = NULL;
+ setLikelihoodKernel(LK_EIGEN_SSE, 1); // FOR TUNG: you forgot to initialize this variable!
+ sse = LK_EIGEN_SSE;
+ num_threads = 0;
+ max_lh_slots = 0;
save_all_trees = 0;
nodeBranchDists = NULL;
// FOR: upper bounds
@@ -110,6 +120,7 @@ void PhyloTree::init() {
current_scaling = 1.0;
is_opt_scaling = false;
num_partial_lh_computations = 0;
+ vector_size = 0;
}
PhyloTree::PhyloTree(Alignment *aln) : MTree(), CheckpointFactory() {
@@ -117,6 +128,15 @@ PhyloTree::PhyloTree(Alignment *aln) : MTree(), CheckpointFactory() {
this->aln = aln;
}
+PhyloTree::PhyloTree(string& treeString, Alignment* aln, bool isRooted) : MTree() {
+ stringstream str;
+ str << treeString;
+ str.seekg(0, ios::beg);
+ freeNode();
+ readTree(str, isRooted);
+ setAlignment(aln);
+}
+
void PhyloTree::saveCheckpoint() {
checkpoint->startStruct("PhyloTree");
StrVector leafNames;
@@ -191,20 +211,6 @@ PhyloTree::~PhyloTree() {
if (site_rate)
delete site_rate;
site_rate = NULL;
-// if (tmp_scale_num1)
-// delete[] tmp_scale_num1;
-// if (tmp_scale_num2)
-// delete[] tmp_scale_num2;
-// if (tmp_partial_lh1)
-// delete[] tmp_partial_lh1;
-// if (tmp_partial_lh2)
-// delete[] tmp_partial_lh2;
-// if (tmp_anscentral_state_prob1)
-// delete[] tmp_anscentral_state_prob1;
-// if (tmp_anscentral_state_prob2)
-// delete[] tmp_anscentral_state_prob2;
- //if (tmp_ptn_rates)
- // delete [] tmp_ptn_rates;
if (_pattern_lh_cat)
aligned_free(_pattern_lh_cat);
_pattern_lh_cat = NULL;
@@ -216,6 +222,12 @@ PhyloTree::~PhyloTree() {
if (theta_all)
aligned_free(theta_all);
theta_all = NULL;
+ if (buffer_scale_all)
+ aligned_free(buffer_scale_all);
+ buffer_scale_all = NULL;
+ if (buffer_partial_lh)
+ aligned_free(buffer_partial_lh);
+ buffer_partial_lh = NULL;
if (ptn_freq)
aligned_free(ptn_freq);
ptn_freq = NULL;
@@ -330,7 +342,10 @@ void PhyloTree::setAlignment(Alignment *alignment) {
node->id = seq;
}
}
- if (err) outError("Tree taxa and alignment sequence do not match (see above)");
+ if (err) {
+ printTree(cout, WT_NEWLINE);
+ outError("Tree taxa and alignment sequence do not match (see above)");
+ }
StrVector taxname;
getTaxaName(taxname);
for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++)
@@ -351,31 +366,30 @@ void PhyloTree::setRootNode(const char *my_root) {
assert(root);
}
-void PhyloTree::setParams(Params* params) {
- this->params = params;
-}
+//void PhyloTree::setParams(Params* params) {
+// this->params = params;
+//}
void PhyloTree::readTreeString(const string &tree_string) {
stringstream str(tree_string);
-// str(tree_string);
-// str.seekg(0, ios::beg);
freeNode();
// bug fix 2016-04-14: in case taxon name happens to be ID
MTree::readTree(str, rooted);
assignLeafNames();
-// setAlignment(aln);
- setRootNode(params->root);
+ setRootNode(Params::getInstance().root);
if (isSuperTree()) {
((PhyloSuperTree*) this)->mapTrees();
}
- if (params->pll) {
+ if (Params::getInstance().pll) {
pllReadNewick(getTreeString());
}
resetCurScore();
-// lhComputed = false;
+ if (Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation) {
+ buildNodeSplit();
+ }
}
void PhyloTree::readTreeStringSeqName(const string &tree_string) {
@@ -396,6 +410,9 @@ void PhyloTree::readTreeStringSeqName(const string &tree_string) {
}
resetCurScore();
// lhComputed = false;
+ if (params->fixStableSplits) {
+ buildNodeSplit();
+ }
}
int PhyloTree::wrapperFixNegativeBranch(bool force_change) {
@@ -440,11 +457,16 @@ string PhyloTree::getTreeString() {
return tree_stream.str();
}
-string PhyloTree::getTopology() {
+string PhyloTree::getTopologyString(bool printBranchLength) {
stringstream tree_stream;
// important: to make topology string unique
setRootNode(params->root);
- printTree(tree_stream, WT_TAXON_ID + WT_SORT_TAXA);
+ //printTree(tree_stream, WT_TAXON_ID + WT_SORT_TAXA);
+ if (printBranchLength) {
+ printTree(tree_stream, WT_SORT_TAXA + WT_BR_LEN + WT_TAXON_ID);
+ } else {
+ printTree(tree_stream, WT_SORT_TAXA);
+ }
return tree_stream.str();
}
@@ -466,7 +488,7 @@ void PhyloTree::setModel(ModelSubst *amodel) {
void PhyloTree::setModelFactory(ModelFactory *model_fac) {
model_factory = model_fac;
if (model_factory && (model_factory->model->isMixture() || model_factory->model->isSiteSpecificModel()))
- setLikelihoodKernel(sse);
+ setLikelihoodKernel(sse, num_threads);
}
void PhyloTree::setRate(RateHeterogeneity *rate) {
@@ -501,6 +523,7 @@ void PhyloTree::clearAllPartialLH(bool make_null) {
current_it = current_it_back = NULL;
}
+/*
void PhyloTree::computeAllPartialLh(PhyloNode *node, PhyloNode *dad) {
if (!node) node = (PhyloNode*)root;
FOR_NEIGHBOR_IT(node, dad, it) {
@@ -512,6 +535,7 @@ void PhyloTree::computeAllPartialLh(PhyloNode *node, PhyloNode *dad) {
computeAllPartialLh((PhyloNode*)(*it)->node, node);
}
}
+*/
string PhyloTree::getModelName() {
string name = model->getName();
@@ -625,7 +649,7 @@ void PhyloTree::initializeAllPartialPars(int &index, PhyloNode *node, PhyloNode
size_t PhyloTree::getBitsBlockSize() {
// reserve the last entry for parsimony score
// return (aln->num_states * aln->size() + UINT_BITS - 1) / UINT_BITS + 1;
- size_t len = aln->num_states * ((max(aln->size(), (size_t)aln->num_informative_sites) + SIMD_BITS - 1) / UINT_BITS) + 4;
+ size_t len = aln->getMaxNumStates() * ((max(aln->size(), (size_t)aln->num_informative_sites) + SIMD_BITS - 1) / UINT_BITS) + 4;
len = ((len+7)/8)*8;
return len;
}
@@ -664,19 +688,23 @@ int PhyloTree::computeParsimony() {
likelihood function
****************************************************************************/
+size_t PhyloTree::getBufferPartialLhSize() {
+ const size_t VECTOR_SIZE = 8; // TODO, adjusted
+ size_t ncat_mix = site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
+ size_t block = model->num_states * ncat_mix;
+ size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2 * aln->getNSeq());
+ buffer_size += get_safe_upper_limit(block * (aln->getNSeq()+1) * (aln->STATE_UNKNOWN+1));
+ buffer_size += (block*2+model->num_states)*VECTOR_SIZE*num_threads;
+ return buffer_size;
+}
+
void PhyloTree::initializeAllPartialLh() {
int index, indexlh;
int numStates = model->num_states;
// Minh's question: why getAlnNSite() but not getAlnNPattern() ?
//size_t mem_size = ((getAlnNSite() % 2) == 0) ? getAlnNSite() : (getAlnNSite() + 1);
- size_t nptn = getAlnNPattern() + numStates; // extra #numStates for ascertainment bias correction
-
- size_t mem_size;
- if (instruction_set >= 7)
- mem_size = ((nptn +3)/4)*4;
- else
- mem_size = ((nptn % 2) == 0) ? nptn : (nptn + 1);
-
+ // extra #numStates for ascertainment bias correction
+ size_t mem_size = get_safe_upper_limit(getAlnNPattern()) + get_safe_upper_limit(numStates);
size_t block_size = mem_size * numStates * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
// make sure _pattern_lh size is divisible by 4 (e.g., 9->12, 14->16)
if (!_pattern_lh)
@@ -685,32 +713,26 @@ void PhyloTree::initializeAllPartialLh() {
_pattern_lh_cat = aligned_alloc<double>(mem_size * site_rate->getNDiscreteRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures()));
if (!theta_all)
theta_all = aligned_alloc<double>(block_size);
+ if (!buffer_scale_all)
+ buffer_scale_all = aligned_alloc<double>(mem_size);
+ if (!buffer_partial_lh) {
+ buffer_partial_lh = aligned_alloc<double>(getBufferPartialLhSize());
+ }
if (!ptn_freq) {
ptn_freq = aligned_alloc<double>(mem_size);
ptn_freq_computed = false;
}
if (!ptn_invar)
ptn_invar = aligned_alloc<double>(mem_size);
- bool benchmark_mem = (!central_partial_lh && verbose_mode >= VB_MED);
- if (benchmark_mem) {
- cout << "Measuring run time for allocating " << getMemoryRequired() << " bytes RAM" << endl;
- }
- double cpu_start_time = getCPUTime();
- double wall_start_time = getRealTime();
initializeAllPartialLh(index, indexlh);
- if (benchmark_mem) {
- cout << "CPU time for initializeAllPartialLh: " << getCPUTime() - cpu_start_time << " sec" << endl;
- cout << "Wall-clock time for initializeAllPartialLh: " << getRealTime() - wall_start_time << " sec" << endl;
- }
+ if (params->lh_mem_save == LM_MEM_SAVE)
+ mem_slots.init(this, max_lh_slots);
+
assert(index == (nodeNum - 1) * 2);
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE) {
- assert(indexlh == nodeNum-leafNum);
- } else {
- assert(indexlh == (nodeNum-1)*2-leafNum);
- }
- } else
- assert(indexlh == (nodeNum-1)*2);
+ if (params->lh_mem_save == LM_PER_NODE) {
+ assert(indexlh == nodeNum-leafNum);
+ }
+
clearAllPartialLH();
}
@@ -739,7 +761,10 @@ void PhyloTree::deleteAllPartialLh() {
aligned_free(ptn_freq);
if (theta_all)
aligned_free(theta_all);
-
+ if (buffer_scale_all)
+ aligned_free(buffer_scale_all);
+ if (buffer_partial_lh)
+ aligned_free(buffer_partial_lh);
if (_pattern_lh_cat)
aligned_free(_pattern_lh_cat);
if (_pattern_lh)
@@ -752,6 +777,8 @@ void PhyloTree::deleteAllPartialLh() {
ptn_freq = NULL;
ptn_freq_computed = false;
theta_all = NULL;
+ buffer_scale_all = NULL;
+ buffer_partial_lh = NULL;
_pattern_lh_cat = NULL;
_pattern_lh = NULL;
@@ -760,103 +787,106 @@ void PhyloTree::deleteAllPartialLh() {
clearAllPartialLH();
}
-uint64_t PhyloTree::getMemoryRequired(size_t ncategory) {
- size_t nptn = aln->getNPattern() + aln->num_states; // +num_states for ascertainment bias correction
- uint64_t block_size;
- if (instruction_set >= 7)
- // block size must be divisible by 4
- block_size = ((nptn+3)/4)*4;
- else
- // block size must be divisible by 2
- block_size = ((nptn % 2) == 0) ? nptn : (nptn + 1);
- block_size = block_size * aln->num_states;
+uint64_t PhyloTree::getMemoryRequired(size_t ncategory, bool full_mem) {
+ // +num_states for ascertainment bias correction
+ int64_t nptn = get_safe_upper_limit(aln->getNPattern()) + get_safe_upper_limit(aln->num_states);
+ int64_t scale_block_size = nptn;
if (site_rate)
- block_size *= site_rate->getNRate();
+ scale_block_size *= site_rate->getNRate();
else
- block_size *= ncategory;
+ scale_block_size *= ncategory;
if (model && !model_factory->fused_mix_rate)
- block_size *= model->getNMixtures();
- uint64_t mem_size = ((uint64_t) leafNum*4) * block_size *sizeof(double) + 2 + (leafNum) * 4 * nptn * sizeof(UBYTE);
- if (params->SSE == LK_EIGEN || params->SSE == LK_EIGEN_SSE) {
- mem_size -= ((uint64_t)leafNum) * ((uint64_t)block_size*sizeof(double) + nptn * sizeof(UBYTE));
- if (params->lh_mem_save == LM_PER_NODE) {
- mem_size -= ((uint64_t)leafNum*2 - 4) * ((uint64_t)block_size*sizeof(double) + nptn * sizeof(UBYTE));
- }
- }
- uint64_t tip_partial_lh_size;
+ scale_block_size *= model->getNMixtures();
+
+ int64_t block_size = scale_block_size * aln->num_states;
+
+ int64_t mem_size;
+ // memory to tip_partial_lh
if (model)
- tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures() * sizeof(double);
+ mem_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures() * sizeof(double);
else
- tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * sizeof(double);
- mem_size += tip_partial_lh_size;
+ mem_size = aln->num_states * (aln->STATE_UNKNOWN+1) * sizeof(double);
+
+ // memory for UFBoot
if (params->gbo_replicates)
mem_size += params->gbo_replicates*nptn*sizeof(BootValType);
+
+ // memory for model
if (model)
mem_size += model->getMemoryRequired();
+
+ int64_t lh_scale_size = block_size * sizeof(double) + scale_block_size * sizeof(UBYTE);
+
+ max_lh_slots = leafNum-2;
+
+ if (!full_mem && params->lh_mem_save == LM_MEM_SAVE) {
+ int64_t min_lh_slots = log2(leafNum)+LH_MIN_CONST;
+ if (params->max_mem_size == 0.0) {
+ max_lh_slots = min_lh_slots;
+ } else if (params->max_mem_size <= 1) {
+ max_lh_slots = floor(params->max_mem_size*(leafNum-2));
+ } else {
+ int64_t rest_mem = params->max_mem_size - mem_size;
+
+ // include 2 blocks for nni_partial_lh
+ max_lh_slots = rest_mem / lh_scale_size - 2;
+
+ // RAM over requirement, reset to LM_PER_NODE
+ if (max_lh_slots > leafNum-2)
+ max_lh_slots = leafNum-2;
+ }
+ if (max_lh_slots < min_lh_slots) {
+ cout << "WARNING: Too low -mem, automatically increased to " << (mem_size + (min_lh_slots+2)*lh_scale_size)/1048576.0 << " MB" << endl;
+ max_lh_slots = min_lh_slots;
+ }
+ }
+
+ // also count MEM for nni_partial_lh
+ mem_size += (max_lh_slots+2) * lh_scale_size;
+
+
return mem_size;
}
void PhyloTree::getMemoryRequired(uint64_t &partial_lh_entries, uint64_t &scale_num_entries, uint64_t &partial_pars_entries) {
- size_t nptn = aln->getNPattern() + aln->num_states; // +num_states for ascertainment bias correction
- uint64_t block_size;
- if (instruction_set >= 7)
- // block size must be divisible by 4
- block_size = ((nptn+3)/4)*4;
- else
- // block size must be divisible by 2
- block_size = ((nptn % 2) == 0) ? nptn : (nptn + 1);
+ // +num_states for ascertainment bias correction
+ uint64_t block_size = get_safe_upper_limit(aln->getNPattern()) + get_safe_upper_limit(aln->num_states);
+ size_t scale_size = block_size;
block_size = block_size * aln->num_states;
- if (site_rate)
+ if (site_rate) {
block_size *= site_rate->getNRate();
- if (model && !model_factory->fused_mix_rate)
+ scale_size *= site_rate->getNRate();
+ }
+ if (model && !model_factory->fused_mix_rate) {
block_size *= model->getNMixtures();
+ scale_size *= model->getNMixtures();
+ }
uint64_t tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures();
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE)
- partial_lh_entries = ((uint64_t)leafNum - 2) * (uint64_t) block_size + 4 + tip_partial_lh_size;
- else
- partial_lh_entries = ((uint64_t)leafNum * 3 - 6) * (uint64_t) block_size + 4 + tip_partial_lh_size;
- } else
- partial_lh_entries = ((uint64_t)leafNum * 4 - 6) * (uint64_t) block_size + 4 + tip_partial_lh_size;
-
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE)
- scale_num_entries = (leafNum - 2) * nptn;
- else
- scale_num_entries = (leafNum*3 - 4) * nptn;
- } else
- scale_num_entries = (leafNum*4 - 4) * nptn;
+ // TODO mem save
+ partial_lh_entries = ((uint64_t)leafNum - 2) * (uint64_t) block_size + 4 + tip_partial_lh_size;
+ scale_num_entries = (leafNum - 2) * scale_size;
size_t pars_block_size = getBitsBlockSize();
partial_pars_entries = (leafNum - 1) * 4 * pars_block_size;
}
void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node, PhyloNode *dad) {
- size_t pars_block_size = getBitsBlockSize();
- size_t nptn = aln->size()+aln->num_states; // +num_states for ascertainment bias correction
- size_t block_size;
- if (instruction_set >= 7)
- // block size must be divisible by 4
- nptn = ((nptn+3)/4)*4;
- else
- // block size must be divisible by 2
- nptn = ((nptn % 2) == 0) ? nptn : (nptn + 1);
+ uint64_t pars_block_size = getBitsBlockSize();
+ // +num_states for ascertainment bias correction
+ size_t nptn = get_safe_upper_limit(aln->size())+ get_safe_upper_limit(aln->num_states);
+ uint64_t block_size;
+ uint64_t scale_block_size = nptn * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
+ block_size = scale_block_size * model->num_states;
- size_t scale_block_size = nptn;
-// size_t tip_block_size = nptn * model->num_states;
-
- block_size = nptn * model->num_states * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
if (!node) {
node = (PhyloNode*) root;
// allocate the big central partial likelihoods memory
+// size_t IT_NUM = (params->nni5) ? 6 : 2;
+ size_t IT_NUM = 2;
if (!nni_partial_lh) {
// allocate memory only once!
-// intptr_t MEM_ALIGNMENT = (instruction_set >= 7) ? 32 : 16;
-// nni_partial_lh = aligned_alloc<double>(IT_NUM*partial_lh_size+MEM_ALIGNMENT/sizeof(double));
-// nni_scale_num = aligned_alloc<UBYTE>(IT_NUM*scale_num_size+MEM_ALIGNMENT/sizeof(UBYTE));
- size_t IT_NUM = (params->nni5) ? 6 : 2;
nni_partial_lh = aligned_alloc<double>(IT_NUM*block_size);
nni_scale_num = aligned_alloc<UBYTE>(IT_NUM*scale_block_size);
}
@@ -864,16 +894,14 @@ void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node
if (!central_partial_lh) {
uint64_t tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures();
- if (model->isSiteSpecificModel() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE))
+ if (model->isSiteSpecificModel())
tip_partial_lh_size = get_safe_upper_limit(aln->size()) * model->num_states * leafNum;
- uint64_t mem_size = ((uint64_t)leafNum * 4 - 6) * (uint64_t) block_size + 2 + tip_partial_lh_size;
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE) {
- mem_size -= ((uint64_t)leafNum * 3 - 4) * (uint64_t)block_size;
- } else {
- mem_size -= (uint64_t)leafNum * (uint64_t)block_size;
- }
- }
+
+ if (max_lh_slots == 0)
+ getMemoryRequired();
+
+ uint64_t mem_size = (uint64_t)max_lh_slots * block_size + 4 + tip_partial_lh_size;
+
if (verbose_mode >= VB_MED)
cout << "Allocating " << mem_size * sizeof(double) << " bytes for partial likelihood vectors" << endl;
try {
@@ -886,24 +914,15 @@ void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node
}
// now always assign tip_partial_lh
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE) {
- tip_partial_lh = central_partial_lh + ((nodeNum - leafNum)*block_size);
- } else {
- tip_partial_lh = central_partial_lh + (((nodeNum - 1)*2-leafNum)*block_size);
- }
- } else
- tip_partial_lh = central_partial_lh + (((nodeNum - 1)*2)*block_size);
+ if (params->lh_mem_save == LM_PER_NODE) {
+ tip_partial_lh = central_partial_lh + ((nodeNum - leafNum)*block_size);
+ } else {
+ tip_partial_lh = central_partial_lh + (max_lh_slots*block_size);
+ }
if (!central_scale_num) {
- uint64_t mem_size = (leafNum - 1) * 4 * scale_block_size;
- if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
- if (params->lh_mem_save == LM_PER_NODE) {
- mem_size -= ((uint64_t)leafNum*3 - 2) * (uint64_t) scale_block_size;
- } else {
- mem_size -= (uint64_t)leafNum * (uint64_t) scale_block_size;
- }
- }
+ uint64_t mem_size = max_lh_slots * scale_block_size;
+
if (verbose_mode >= VB_MED)
cout << "Allocating " << mem_size * sizeof(UBYTE) << " bytes for scale num vectors" << endl;
try {
@@ -943,7 +962,7 @@ void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node
assert(index < nodeNum * 2 - 1);
// now initialize partial_lh and scale_num
- if (params->lh_mem_save == LM_PER_NODE && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
+ if (params->lh_mem_save == LM_PER_NODE) {
if (!node->isLeaf()) { // only allocate memory to internal node
nei->partial_lh = NULL; // do not allocate memory for tip, use tip_partial_lh instead
nei->scale_num = NULL;
@@ -957,24 +976,18 @@ void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node
nei2->partial_lh = NULL;
}
} else {
- if (nei->node->isLeaf() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
- nei->partial_lh = NULL; // do not allocate memory for tip, use tip_partial_lh instead
- nei->scale_num = NULL;
- } else {
- nei->scale_num = central_scale_num + (indexlh * scale_block_size);
- nei->partial_lh = central_partial_lh + (indexlh * block_size);
- indexlh++;
- }
- if (nei2->node->isLeaf() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
- nei2->partial_lh = NULL; // do not allocate memory for tip, use tip_partial_lh instead
- nei2->scale_num = NULL;
- } else {
- nei2->scale_num = central_scale_num + ((indexlh) * scale_block_size);
- nei2->partial_lh = central_partial_lh + (indexlh * block_size);
- indexlh++;
- }
+ nei->partial_lh = NULL;
+ nei->scale_num = NULL;
+ nei2->scale_num = NULL;
+ nei2->partial_lh = NULL;
}
-
+
+ // zero memory to allocate contiguous chunk of memory
+// if (nei->partial_lh)
+// memset(nei->partial_lh, 0, block_size*sizeof(double));
+// if (nei2->partial_lh)
+// memset(nei2->partial_lh, 0, block_size*sizeof(double));
+
// if (model->isSiteSpecificModel() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
// // allocate tip memory for this model
// if (node->isLeaf()) {
@@ -989,32 +1002,31 @@ void PhyloTree::initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node
}
double *PhyloTree::newPartialLh() {
- double *ret = aligned_alloc<double>((aln->size()+aln->num_states+3) * aln->num_states * site_rate->getNRate() *
- ((model_factory->fused_mix_rate)? 1 : model->getNMixtures()));
- return ret;
+ return aligned_alloc<double>(getPartialLhSize());
}
-size_t PhyloTree::getPartialLhBytes() {
- size_t nptn = aln->size()+aln->num_states; // +num_states for ascertainment bias correction
- size_t block_size;
- if (instruction_set >= 7)
- // block size must be divisible by 4
- block_size = ((nptn+3)/4)*4;
- else
- // block size must be divisible by 2
- block_size = ((nptn % 2) == 0) ? nptn : (nptn + 1);
+size_t PhyloTree::getPartialLhSize() {
+ // +num_states for ascertainment bias correction
+ size_t block_size = get_safe_upper_limit(aln->size())+get_safe_upper_limit(aln->num_states);
+ block_size *= model->num_states * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
+ return block_size;
+}
- block_size = block_size * model->num_states * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
+size_t PhyloTree::getPartialLhBytes() {
+ // +num_states for ascertainment bias correction
+ return getPartialLhSize() * sizeof(double);
+}
- return block_size * sizeof(double);
+size_t PhyloTree::getScaleNumSize() {
+ return (get_safe_upper_limit(aln->size())+get_safe_upper_limit(aln->num_states)) * site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
}
size_t PhyloTree::getScaleNumBytes() {
- return (aln->size()+aln->num_states) * sizeof(UBYTE);
+ return getScaleNumSize()*sizeof(UBYTE);
}
UBYTE *PhyloTree::newScaleNum() {
- return aligned_alloc<UBYTE>(aln->size()+aln->num_states);
+ return aligned_alloc<UBYTE>(getScaleNumSize());
}
Node *findFirstFarLeaf(Node *node, Node *dad = NULL) {
@@ -1108,58 +1120,90 @@ int PhyloTree::getNumLhCat(SiteLoglType wsl) {
}
}
+void PhyloTree::transformPatternLhCat() {
+ if (vector_size == 1)
+ return;
+
+ size_t nptn = ((aln->size()+vector_size-1)/vector_size)*vector_size;
+// size_t nstates = aln->num_states;
+ size_t ncat = site_rate->getNRate();
+ if (!model_factory->fused_mix_rate) ncat *= model->getNMixtures();
+
+ double *mem = aligned_alloc<double>(nptn*ncat);
+ memcpy(mem, _pattern_lh_cat, nptn*ncat*sizeof(double));
+ double *memptr = mem;
+
+ size_t ptn, cat, i;
+ for (ptn = 0; ptn < nptn; ptn+=vector_size) {
+ double *lh_cat_ptr = &_pattern_lh_cat[ptn*ncat];
+ for (cat = 0; cat < ncat; cat++) {
+ for (i = 0; i < vector_size; i++)
+ lh_cat_ptr[i*ncat+cat] = memptr[i];
+ memptr += vector_size;
+ }
+ }
+ aligned_free(mem);
+}
+
double PhyloTree::computePatternLhCat(SiteLoglType wsl) {
if (!current_it) {
Node *leaf = findFirstFarLeaf(root);
current_it = (PhyloNeighbor*)leaf->neighbors[0];
current_it_back = (PhyloNeighbor*)current_it->node->findNeighbor(leaf);
}
-// if (sse == LK_NORMAL || sse == LK_SSE) {
-// if (getModel()->isMixture())
-// outError("Naive kernel does not support mixture models, contact author if you really need this feature");
-// return computeLikelihoodBranchNaive(current_it, (PhyloNode*)current_it_back->node);
-// } else
- if (!getModel()->isMixture())
- return computeLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
+
+ double score;
+
+ score = computeLikelihoodBranch(current_it, (PhyloNode*)current_it_back->node);
+ // TODO: SIMD aware
+ transformPatternLhCat();
+ /*
+ if (getModel()->isSiteSpecificModel()) {
+ score = computeLikelihoodBranch(current_it, (PhyloNode*)current_it_back->node);
+ } else if (!getModel()->isMixture())
+ score = computeLikelihoodBranch(current_it, (PhyloNode*)current_it_back->node);
else if (getModelFactory()->fused_mix_rate)
- return computeMixrateLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
+ score = computeLikelihoodBranch(current_it, (PhyloNode*)current_it_back->node);
else {
- double score = computeMixtureLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
- if (wsl == WSL_MIXTURE_RATECAT) return score;
-
- double *lh_cat = _pattern_lh_cat;
- double *lh_res = _pattern_lh_cat;
- size_t ptn, nptn = aln->getNPattern();
- size_t m, nmixture = getModel()->getNMixtures();
- size_t c, ncat = getRate()->getNRate();
- if (wsl == WSL_MIXTURE && ncat > 1) {
- // transform to lh per mixture class
- for (ptn = 0; ptn < nptn; ptn++) {
- for (m = 0; m < nmixture; m++) {
- double lh = lh_cat[0];
- for (c = 1; c < ncat; c++)
- lh += lh_cat[c];
- lh_res[m] = lh;
- lh_cat += ncat;
+ score = computeLikelihoodBranch(current_it, (PhyloNode*)current_it_back->node);
+ */
+ if (!getModel()->isSiteSpecificModel() && getModel()->isMixture() && !getModelFactory()->fused_mix_rate) {
+ if (wsl == WSL_MIXTURE || wsl == WSL_RATECAT) {
+ double *lh_cat = _pattern_lh_cat;
+ double *lh_res = _pattern_lh_cat;
+ size_t ptn, nptn = aln->getNPattern();
+ size_t m, nmixture = getModel()->getNMixtures();
+ size_t c, ncat = getRate()->getNRate();
+ if (wsl == WSL_MIXTURE && ncat > 1) {
+ // transform to lh per mixture class
+ for (ptn = 0; ptn < nptn; ptn++) {
+ for (m = 0; m < nmixture; m++) {
+ double lh = lh_cat[0];
+ for (c = 1; c < ncat; c++)
+ lh += lh_cat[c];
+ lh_res[m] = lh;
+ lh_cat += ncat;
+ }
+ lh_res += nmixture;
}
- lh_res += nmixture;
- }
- } else if (wsl == WSL_RATECAT && nmixture > 1) {
- // transform to lh per rate category
- for (ptn = 0; ptn < nptn; ptn++) {
- if (lh_res != lh_cat)
- memcpy(lh_res, lh_cat, ncat*sizeof(double));
- lh_cat += ncat;
- for (m = 1; m < nmixture; m++) {
- for (c = 0; c < ncat; c++)
- lh_res[c] += lh_cat[c];
+ } else if (wsl == WSL_RATECAT && nmixture > 1) {
+ // transform to lh per rate category
+ for (ptn = 0; ptn < nptn; ptn++) {
+ if (lh_res != lh_cat)
+ memcpy(lh_res, lh_cat, ncat*sizeof(double));
lh_cat += ncat;
+ for (m = 1; m < nmixture; m++) {
+ for (c = 0; c < ncat; c++)
+ lh_res[c] += lh_cat[c];
+ lh_cat += ncat;
+ }
+ lh_res += ncat;
}
- lh_res += ncat;
}
}
- return score;
}
+
+ return score;
}
void PhyloTree::computePatternStateFreq(double *ptn_state_freq) {
@@ -1255,7 +1299,12 @@ void PhyloTree::computePatternLikelihood(double *ptn_lh, double *cur_logl, doubl
} else {
memmove(ptn_lh, _pattern_lh, nptn * sizeof(double));
}
- if (ptn_lh_cat) {
+
+ if (!ptn_lh_cat)
+ return;
+
+ /*
+ if (ptn_lh_cat && model->isSiteSpecificModel()) {
int offset = 0;
if (sum_scaling == 0.0) {
int nptncat = nptn * ncat;
@@ -1282,7 +1331,74 @@ void PhyloTree::computePatternLikelihood(double *ptn_lh, double *cur_logl, doubl
ptn_lh_cat[offset] = log(_pattern_lh_cat[offset]) + scale;
}
}
+ return;
+ }
+ */
+
+ // New kernel
+ int ptn;
+ PhyloNeighbor *nei1 = current_it;
+ PhyloNeighbor *nei2 = current_it_back;
+ if (!nei1->node->isLeaf() && nei2->node->isLeaf()) {
+ // exchange
+ PhyloNeighbor *tmp = nei1;
+ nei1 = nei2;
+ nei2 = tmp;
+ }
+ if (nei1->node->isLeaf()) {
+ // external branch
+ double *lh_cat = _pattern_lh_cat;
+ double *out_lh_cat = ptn_lh_cat;
+ UBYTE *nei2_scale = nei2->scale_num;
+ if (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling) {
+ // per-category scaling
+ for (ptn = 0; ptn < nptn; ptn++) {
+ for (i = 0; i < ncat; i++) {
+ out_lh_cat[i] = log(lh_cat[i]) + nei2_scale[i] * LOG_SCALING_THRESHOLD;
+ }
+ lh_cat += ncat;
+ out_lh_cat += ncat;
+ nei2_scale += ncat;
+ }
+ } else {
+ // normal scaling
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double scale = nei2_scale[ptn] * LOG_SCALING_THRESHOLD;
+ for (i = 0; i < ncat; i++)
+ out_lh_cat[i] = log(lh_cat[i]) + scale;
+ lh_cat += ncat;
+ out_lh_cat += ncat;
+ }
+ }
+ } else {
+ // internal branch
+ double *lh_cat = _pattern_lh_cat;
+ double *out_lh_cat = ptn_lh_cat;
+ UBYTE *nei1_scale = nei1->scale_num;
+ UBYTE *nei2_scale = nei2->scale_num;
+ if (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling) {
+ // per-category scaling
+ for (ptn = 0; ptn < nptn; ptn++) {
+ for (i = 0; i < ncat; i++) {
+ out_lh_cat[i] = log(lh_cat[i]) + (nei1_scale[i]+nei2_scale[i]) * LOG_SCALING_THRESHOLD;
+ }
+ lh_cat += ncat;
+ out_lh_cat += ncat;
+ nei1_scale += ncat;
+ nei2_scale += ncat;
+ }
+ } else {
+ // normal scaling
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double scale = (nei1_scale[ptn] + nei2_scale[ptn]) * LOG_SCALING_THRESHOLD;
+ for (i = 0; i < ncat; i++)
+ out_lh_cat[i] = log(lh_cat[i]) + scale;
+ lh_cat += ncat;
+ out_lh_cat += ncat;
+ }
+ }
}
+
// if (cur_logl) {
// double check_score = 0.0;
// for (int i = 0; i < nptn; i++) {
@@ -2010,10 +2126,11 @@ double PhyloTree::computeBayesianBranchLength(PhyloNeighbor *dad_branch, PhyloNo
if (node->isLeaf() || dad->isLeaf()) {
return -1.0;
}*/
- if ((dad_branch->partial_lh_computed & 1) == 0)
- computePartialLikelihood(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 1) == 0)
- computePartialLikelihood(node_branch, node);
+ // TODO
+// if ((dad_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(dad_branch, dad);
+// if ((node_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(node_branch, node);
// now combine likelihood at the branch
int nstates = aln->num_states;
int numCat = site_rate->getNRate();
@@ -2227,6 +2344,7 @@ void PhyloTree::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clear
double ferror, optx;
assert(current_len >= 0.0);
theta_computed = false;
+// mem_slots.cleanup();
if (optimize_by_newton) {
// Newton-Raphson method
optx = minimizeNewton(params->min_branch_length, current_len, params->max_branch_length, params->min_branch_length, negative_lh, maxNRStep);
@@ -2342,8 +2460,12 @@ double PhyloTree::optimizeAllBranches(int my_iterations, double tolerance, int m
// printTree(cout, WT_BR_LEN+WT_NEWLINE);
// }
- for (int j = 0; j < nodes.size(); j++)
+ for (int j = 0; j < nodes.size(); j++) {
optimizeOneBranch((PhyloNode*)nodes[j], (PhyloNode*)nodes2[j]);
+ if (verbose_mode >= VB_MAX) {
+ cout << "Branch " << nodes[j]->id << " " << nodes2[j]->id << ": " << computeLikelihoodFromBuffer() << endl;
+ }
+ }
// if (i == 0)
// optimizeOneBranch((PhyloNode*)nodes[0], (PhyloNode*)nodes2[0]);
@@ -2796,52 +2918,52 @@ int PhyloTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
Nearest Neighbor Interchange by maximum likelihood
****************************************************************************/
-void PhyloTree::doOneRandomNNI(Node *node1, Node *node2) {
- assert(isInnerBranch(node1, node2));
- Neighbor *node1Nei = NULL;
- Neighbor *node2Nei = NULL;
- // randomly choose one neighbor from node1 and one neighbor from node2
- bool chooseNext = false;
- FOR_NEIGHBOR_IT(node1, node2, it){
- if (chooseNext) {
- node1Nei = (*it);
- break;
- }
-
- int randNum = random_int(2); // randNum is either 0 or 1
- if (randNum == 0) {
- node1Nei = (*it);
- break;
- } else {
- chooseNext = true;
- }
+void PhyloTree::doOneRandomNNI(Branch branch) {
+ assert(isInnerBranch(branch.first, branch.second));
+ NNIMove nni;
+ nni.node1 = (PhyloNode*) branch.first;
+ nni.node2 = (PhyloNode*) branch.second;
+ FOR_NEIGHBOR_IT(branch.first, branch.second, node1NeiIt) {
+ nni.node1Nei_it = node1NeiIt;
+ break;
}
- chooseNext = false;
- FOR_NEIGHBOR_IT(node2, node1, it){
- if (chooseNext) {
- node2Nei = (*it);
- break;
- }
- int randNum = random_int(2);
- if (randNum == 0) {
- node2Nei = (*it);
+ int randInt = random_int(branch.second->neighbors.size()-1);
+ int cnt = 0;
+ FOR_NEIGHBOR_IT(branch.second, branch.first, node2NeiIt) {
+ if (cnt == randInt) {
+ nni.node2Nei_it = node2NeiIt;
break;
} else {
- chooseNext = true;
+ cnt++;
}
}
- assert(node1Nei != NULL && node2Nei != NULL);
-
- NeighborVec::iterator node1NeiIt = node1->findNeighborIt(node1Nei->node);
- NeighborVec::iterator node2NeiIt = node2->findNeighborIt(node2Nei->node);
- assert(node1NeiIt != node1->neighbors.end());
- assert(node1NeiIt != node2->neighbors.end());
-
- node1->updateNeighbor(node1NeiIt, node2Nei);
- node2Nei->node->updateNeighbor(node2, node1);
+ if (constraintTree.isCompatible(nni))
+ doNNI(nni, true);
+}
- node2->updateNeighbor(node2NeiIt, node1Nei);
- node1Nei->node->updateNeighbor(node1, node2);
+
+NNIMove PhyloTree::getRandomNNI(Branch &branch) {
+ assert(isInnerBranch(branch.first, branch.second));
+ NNIMove nni;
+ nni.node1 = (PhyloNode*) branch.first;
+ nni.node2 = (PhyloNode*) branch.second;
+
+ FOR_NEIGHBOR_IT(branch.first, branch.second, node1NeiIt) {
+ nni.node1Nei_it = node1NeiIt;
+ break;
+ }
+ int randInt = random_int(branch.second->neighbors.size()-1);
+ int cnt = 0;
+ FOR_NEIGHBOR_IT(branch.second, branch.first, node2NeiIt) {
+ if (cnt == randInt) {
+ nni.node2Nei_it = node2NeiIt;
+ break;
+ } else {
+ cnt++;
+ }
+ }
+ nni.newloglh = 0.0;
+ return nni;
}
void PhyloTree::doNNI(NNIMove &move, bool clearLH) {
@@ -2871,9 +2993,9 @@ void PhyloTree::doNNI(NNIMove &move, bool clearLH) {
PhyloNeighbor *node21_it = (PhyloNeighbor*) node2->findNeighbor(node1); // return neighbor of node2 which points to node 1
// reorient partial_lh before swap
- if (params->lh_mem_save == LM_PER_NODE && !isSuperTree() && (sse == LK_EIGEN || sse == LK_EIGEN_SSE)) {
- node12_it->reorientPartialLh(node1);
- node21_it->reorientPartialLh(node2);
+ if (!isSuperTree()) {
+ reorientPartialLh(node12_it, node1);
+ reorientPartialLh(node21_it, node2);
}
// do the NNI swap
@@ -2896,11 +3018,14 @@ void PhyloTree::doNNI(NNIMove &move, bool clearLH) {
outError("Wrong ID");
}*/
+ PhyloNeighbor *nei12 = (PhyloNeighbor*) node1->findNeighbor(node2); // return neighbor of node1 which points to node 2
+ PhyloNeighbor *nei21 = (PhyloNeighbor*) node2->findNeighbor(node1); // return neighbor of node2 which points to node 1
if (clearLH) {
// clear partial likelihood vector
- node12_it->clearPartialLh();
- node21_it->clearPartialLh();
+ nei12->clearPartialLh();
+ nei21->clearPartialLh();
+ nei12->size = nei21->size = 0;
node2->clearReversePartialLh(node1);
node1->clearReversePartialLh(node2);
@@ -2911,6 +3036,20 @@ void PhyloTree::doNNI(NNIMove &move, bool clearLH) {
if (params->leastSquareNNI) {
updateSubtreeDists(move);
}
+
+ // update split store in node
+ if (nei12->split != NULL || nei21->split != NULL) {
+ delete nei12->split;
+ nei12->split = new Split(leafNum);
+ delete nei21->split;
+ nei21->split = new Split(leafNum);
+
+ FOR_NEIGHBOR_IT(nei12->node, node1, it)
+ *(nei12->split) += *((*it)->split);
+
+ FOR_NEIGHBOR_IT(nei21->node, node2, it)
+ *(nei21->split) += *((*it)->split);
+ }
}
void PhyloTree::changeNNIBrans(NNIMove nnimove) {
@@ -2953,13 +3092,15 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
size_t partial_lh_size = getPartialLhBytes()/sizeof(double);
size_t scale_num_size = getScaleNumBytes()/sizeof(UBYTE);
+
// Upper Bounds ---------------
- totalNNIub += 2;
+ /*
if(params->upper_bound_NNI){
+ totalNNIub += 2;
NNIMove resMove;
resMove = getBestNNIForBranUB(node1,node2,this);
- /* if UB is smaller than the current likelihood, then we don't recompute the likelihood of the swapped topology.
- * Otherwise, follow the normal procedure: evaluate NNIs and compute the likelihood.*/
+ // if UB is smaller than the current likelihood, then we don't recompute the likelihood of the swapped topology.
+ // Otherwise, follow the normal procedure: evaluate NNIs and compute the likelihood.
// here, we skip NNI is its UB n times worse than the curLikelihood
if( resMove.newloglh < (1+params->upper_bound_frac)*this->curScore){
@@ -2967,7 +3108,8 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
return resMove;
}
}
-
+ */
+
//-----------------------------
NeighborVec::iterator it;
@@ -2988,14 +3130,20 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
assert(id == IT_NUM);
Neighbor *saved_nei[6];
+ int mem_id = 0;
// save Neighbor and allocate new Neighbor pointer
for (id = 0; id < IT_NUM; id++) {
saved_nei[id] = (*saved_it[id]);
*saved_it[id] = new PhyloNeighbor(saved_nei[id]->node, saved_nei[id]->length);
- ((PhyloNeighbor*) (*saved_it[id]))->partial_lh = nni_partial_lh + id*partial_lh_size;
- ((PhyloNeighbor*) (*saved_it[id]))->scale_num = nni_scale_num + id*scale_num_size;
+ if (((PhyloNeighbor*)saved_nei[id])->partial_lh) {
+ ((PhyloNeighbor*) (*saved_it[id]))->partial_lh = nni_partial_lh + mem_id*partial_lh_size;
+ ((PhyloNeighbor*) (*saved_it[id]))->scale_num = nni_scale_num + mem_id*scale_num_size;
+ mem_id++;
+ mem_slots.addSpecialNei((PhyloNeighbor*)*saved_it[id]);
+ }
// ((PhyloNeighbor*) (*saved_it[id]))->scale_num = newScaleNum();
}
+ assert(mem_id == 2);
// get the Neighbor again since it is replaced for saving purpose
PhyloNeighbor* node12_it = (PhyloNeighbor*) node1->findNeighbor(node2);
@@ -3037,22 +3185,36 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
// Initialize node1 and node2 in nniMoves
nniMoves[0].node1 = nniMoves[1].node1 = node1;
nniMoves[0].node2 = nniMoves[1].node2 = node2;
+ nniMoves[0].newloglh = nniMoves[1].newloglh = -DBL_MAX;
double backupScore = curScore;
- for (cnt = 0; cnt < 2; cnt++) {
+ for (cnt = 0; cnt < 2; cnt++) if (constraintTree.isCompatible(nniMoves[cnt]))
+ {
// do the NNI swap
NeighborVec::iterator node1_it = nniMoves[cnt].node1Nei_it;
NeighborVec::iterator node2_it = nniMoves[cnt].node2Nei_it;
Neighbor *node1_nei = *node1_it;
Neighbor *node2_nei = *node2_it;
+ // reorient partial_lh before swap
+ if (!isSuperTree()) {
+ reorientPartialLh(node12_it, node1);
+ reorientPartialLh(node21_it, node2);
+ }
+
node1->updateNeighbor(node1_it, node2_nei);
node2_nei->node->updateNeighbor(node2, node1);
node2->updateNeighbor(node2_it, node1_nei);
node1_nei->node->updateNeighbor(node1, node2);
+ if (params->lh_mem_save == LM_MEM_SAVE) {
+ // reset subtree size to change traversal order
+ for (id = 0; id < IT_NUM; id++)
+ ((PhyloNeighbor*)*saved_it[id])->size = 0;
+ }
+
// clear partial likelihood vector
node12_it->clearPartialLh();
node21_it->clearPartialLh();
@@ -3088,6 +3250,8 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
node12_it->clearPartialLh();
}
double score = computeLikelihoodFromBuffer();
+ if (verbose_mode >= VB_DEBUG)
+ cout << "NNI " << node1->id << " - " << node2->id << ": " << score << endl;
nniMoves[cnt].newloglh = score;
// compute the pattern likelihoods if wanted
if (nniMoves[cnt].ptnlh)
@@ -3097,6 +3261,12 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
saveCurrentTree(score); // BQM: for new bootstrap
}
+ // reorient partial_lh before swap
+ if (!isSuperTree()) {
+ reorientPartialLh(node12_it, node1);
+ reorientPartialLh(node21_it, node2);
+ }
+
// else, swap back, also recover the branch lengths
node1->updateNeighbor(node1_it, node1_nei);
node1_nei->node->updateNeighbor(node2, node1);
@@ -3110,12 +3280,21 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
for (id = IT_NUM-1; id >= 0; id--) {
// aligned_free(((PhyloNeighbor*) *saved_it[id])->scale_num);
//delete[] ((PhyloNeighbor*) *saved_it[id])->partial_lh;
+// if (((PhyloNeighbor*)saved_nei[id])->partial_lh) {
+// if (saved_nei[id]->node == node1)
+// mem_slots.restore(node21_it, (PhyloNeighbor*)saved_nei[id]);
+// else
+// mem_slots.restore(node12_it, (PhyloNeighbor*)saved_nei[id]);
+// }
if (*saved_it[id] == current_it) current_it = (PhyloNeighbor*) saved_nei[id];
if (*saved_it[id] == current_it_back) current_it_back = (PhyloNeighbor*) saved_nei[id];
delete (*saved_it[id]);
(*saved_it[id]) = saved_nei[id];
}
+
+ mem_slots.eraseSpecialNei();
+
// aligned_free(new_partial_lh);
// restore the length of 4 branches around node1, node2
@@ -4469,36 +4648,89 @@ void PhyloTree::computeSeqIdentityAlongTree() {
}
void PhyloTree::generateRandomTree(TreeGenType tree_type) {
+ if (!constraintTree.empty() && tree_type != YULE_HARDING)
+ outError("Only Yule-Harding ramdom tree supported with constraint tree");
assert(aln);
int orig_size = params->sub_size;
params->sub_size = aln->getNSeq();
MExtTree ext_tree;
- switch (tree_type) {
- case YULE_HARDING:
- ext_tree.generateYuleHarding(*params);
- break;
- case UNIFORM:
- ext_tree.generateUniform(params->sub_size);
- break;
- case CATERPILLAR:
- ext_tree.generateCaterpillar(params->sub_size);
- break;
- case BALANCED:
- ext_tree.generateBalanced(params->sub_size);
- break;
- case STAR_TREE:
- ext_tree.generateStarTree(*params);
- break;
- default:
- break;
- }
+ if (constraintTree.empty()) {
+ switch (tree_type) {
+ case YULE_HARDING:
+ ext_tree.generateYuleHarding(*params);
+ break;
+ case UNIFORM:
+ ext_tree.generateUniform(params->sub_size);
+ break;
+ case CATERPILLAR:
+ ext_tree.generateCaterpillar(params->sub_size);
+ break;
+ case BALANCED:
+ ext_tree.generateBalanced(params->sub_size);
+ break;
+ case STAR_TREE:
+ ext_tree.generateStarTree(*params);
+ break;
+ default:
+ break;
+ }
+ NodeVector taxa;
+ ext_tree.getTaxa(taxa);
+ assert(taxa.size() == aln->getNSeq());
+ for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++)
+ (*it)->name = aln->getSeqName((*it)->id);
+ } else {
+ ext_tree.generateConstrainedYuleHarding(*params, &constraintTree, aln->getSeqNames());
+ }
params->sub_size = orig_size;
- NodeVector taxa;
- ext_tree.getTaxa(taxa);
- assert(taxa.size() == aln->getNSeq());
- for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++)
- (*it)->name = aln->getSeqName((*it)->id);
stringstream str;
ext_tree.printTree(str);
PhyloTree::readTreeStringSeqName(str.str());
}
+
+/*
+void PhyloTree::sortNeighborBySubtreeSize(PhyloNode *node, PhyloNode *dad) {
+
+ // already sorted, return
+ PhyloNeighbor *nei = (PhyloNeighbor*)dad->findNeighbor(node);
+ if (nei->size >= 1)
+ return;
+
+ if (dad && node->isLeaf()) {
+ nei->size = 1;
+ return;
+ }
+
+ nei->size = 0;
+ FOR_NEIGHBOR_DECLARE(node, dad, it) {
+ sortNeighborBySubtreeSize((PhyloNode*)(*it)->node, node);
+ nei->size += ((PhyloNeighbor*)*it)->size;
+ }
+
+ // sort neighbors in descending order of sub-tree size
+ FOR_NEIGHBOR(node, dad, it)
+ for (NeighborVec::iterator it2 = it+1; it2 != node->neighbors.end(); it2++)
+ if ((*it2)->node != dad && ((PhyloNeighbor*)*it)->size < ((PhyloNeighbor*)*it2)->size) {
+ Neighbor *nei;
+ nei = *it;
+ *it = *it2;
+ *it2 = nei;
+ }
+}
+*/
+
+void PhyloTree::reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad) {
+ if (dad_branch->partial_lh)
+ return;
+ Node * node = dad_branch->node;
+ FOR_NEIGHBOR_IT(node, dad, it) {
+ PhyloNeighbor *backnei = (PhyloNeighbor*)(*it)->node->findNeighbor(node);
+ if (backnei->partial_lh) {
+ mem_slots.takeover(dad_branch, backnei);
+ break;
+ }
+ }
+ if (params->lh_mem_save == LM_PER_NODE)
+ assert(dad_branch->partial_lh && "partial_lh is not re-oriented");
+}
+
diff --git a/phylotree.h b/phylotree.h
index 69187c3..09f66a9 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -1,14 +1,24 @@
-//
-// C++ Interface: phylotree
-//
-// Description:
-//
-//
-// Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui at univie.ac.at>, (C) 2008
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
+/***************************************************************************
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
#ifndef PHYLOTREE_H
#define PHYLOTREE_H
@@ -30,8 +40,11 @@
#include "phylonode.h"
#include "optimization.h"
#include "model/rateheterogeneity.h"
+#include "candidateset.h"
#include "pll/pll.h"
#include "checkpoint.h"
+#include "constrainttree.h"
+#include "memslot.h"
#define BOOT_VAL_FLOAT
#define BootValType float
@@ -39,6 +52,8 @@
extern int instruction_set;
+#define SAFE_LH true // safe likelihood scaling to avoid numerical underflow for ultra large trees
+#define NORM_LH false // normal likelihood scaling
const double TOL_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
const double TOL_LIKELIHOOD = 0.001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
@@ -58,6 +73,10 @@ const int SPR_DEPTH = 2;
//using namespace Eigen;
inline size_t get_safe_upper_limit(size_t cur_limit) {
+ if (instruction_set >= 9)
+ // AVX-512
+ return ((cur_limit+7)/8)*8;
+ else
if (instruction_set >= 7)
// AVX
return ((cur_limit+3)/4)*4;
@@ -67,6 +86,10 @@ inline size_t get_safe_upper_limit(size_t cur_limit) {
}
inline size_t get_safe_upper_limit_float(size_t cur_limit) {
+ if (instruction_set >= 9)
+ // AVX
+ return ((cur_limit+15)/16)*16;
+ else
if (instruction_set >= 7)
// AVX
return ((cur_limit+7)/8)*8;
@@ -89,7 +112,7 @@ inline size_t get_safe_upper_limit_float(size_t cur_limit) {
template< class T>
inline T *aligned_alloc(size_t size) {
- size_t MEM_ALIGNMENT = (instruction_set >= 7) ? 32 : 16;
+ size_t MEM_ALIGNMENT = (instruction_set >= 9) ? 64 : ((instruction_set >= 7) ? 32 : 16);
void *mem;
#if defined WIN32 || defined _WIN32 || defined __WIN32__
@@ -150,6 +173,30 @@ typedef std::map< int, PhyloNode* > IntPhyloNodeMap;
const int MAX_SPR_MOVES = 20;
+struct NNIMove {
+
+ // Two nodes representing the central branch
+ PhyloNode *node1, *node2;
+
+ // Roots of the two subtree that are swapped
+ NeighborVec::iterator node1Nei_it, node2Nei_it;
+
+ // log-likelihood of the tree after applying the NNI
+ double newloglh;
+
+ int swap_id;
+
+ // new branch lengths of 5 branches corresponding to the NNI
+ double newLen[5];
+
+ // pattern likelihoods
+ double *ptnlh;
+
+ bool operator<(const NNIMove & rhs) const {
+ return newloglh > rhs.newloglh;
+ }
+};
+
/**
an SPR move.
*/
@@ -204,34 +251,6 @@ struct SwapNNIParam {
double *nni2_ptnlh;
};
-struct NNIMove {
- // Two nodes representing the central branch
- PhyloNode *node1, *node2;
- // Roots of the two subtree that are swapped
- NeighborVec::iterator node1Nei_it, node2Nei_it;
-
- // log-likelihood of the tree after applying the NNI
- double newloglh;
-
- int swap_id;
-
- // old branch lengths of 5 branches before doing NNI
- //double oldLen[5];
-
- // new branch lengths of 5 branches corresponding to the NNI
- double newLen[5];
-
- // pattern likelihoods
- double *ptnlh;
-
- bool operator<(const NNIMove & rhs) const {
- return newloglh > rhs.newloglh;
- //return delta > rhs.delta;
- }
-};
-
-
-
struct LeafFreq {
int leaf_id;
@@ -299,6 +318,28 @@ struct SeqQuartetInfo {
// END definitions for likelihood mapping (HAS)
// ********************************************
+
+// ********************************************
+// BEGIN traversal information
+// ********************************************
+
+class TraversalInfo {
+public:
+ PhyloNeighbor *dad_branch;
+ PhyloNode *dad;
+ double *echildren;
+ double *partial_lh_leaves;
+
+ TraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad) {
+ this->dad = dad;
+ this->dad_branch = dad_branch;
+ }
+};
+
+// ********************************************
+// END traversal information
+// ********************************************
+
/**
Phylogenetic Tree class
@@ -313,6 +354,8 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory {
friend class RateKategory;
friend class ModelMixture;
friend class RateFree;
+ friend class MemSlotVector;
+ friend class ModelFactory;
public:
/**
@@ -328,6 +371,12 @@ public:
*/
PhyloTree(Alignment *aln);
+ /**
+ * Create a phylotree from the tree string and assign alignment.
+ * Taxa IDs are numbered according to their orders in the alignment.
+ */
+ PhyloTree(string& treeString, Alignment *aln, bool isRooted);
+
void init();
/**
@@ -483,7 +532,7 @@ public:
/****************************************************************************
Dot product
****************************************************************************/
- template <class Numeric, class VectorClass, const int VCSIZE>
+ template <class Numeric, class VectorClass>
Numeric dotProductSIMD(Numeric *x, Numeric *y, int size);
typedef BootValType (PhyloTree::*DotProductType)(BootValType *x, BootValType *y, int size);
@@ -492,11 +541,21 @@ public:
typedef double (PhyloTree::*DotProductDoubleType)(double *x, double *y, int size);
DotProductDoubleType dotProductDouble;
+ double dotProductDoubleCall(double *x, double *y, int size);
+
#if defined(BINARY32) || defined(__NOAVX__)
void setDotProductAVX() {}
+ void setDotProductFMA() {}
#else
void setDotProductAVX();
+ void setDotProductFMA();
+#ifdef INCLUDE_AVX512
+ void setDotProductAVX512();
+#endif
#endif
+
+ void setDotProductSSE();
+
/**
this function return the parsimony or likelihood score of the tree. Default is
to compute the parsimony score. Override this function if you define a new
@@ -577,10 +636,14 @@ public:
virtual void setParsimonyKernelAVX();
#endif
+ virtual void setParsimonyKernelSSE();
+
/****************************************************************************
likelihood function
****************************************************************************/
+ size_t getBufferPartialLhSize();
+
/**
initialize partial_lh vector of all PhyloNeighbors, allocating central_partial_lh
*/
@@ -623,6 +686,7 @@ public:
/** get the number of bytes occupied by partial_lh */
size_t getPartialLhBytes();
+ size_t getPartialLhSize();
/**
allocate memory for a scale num vector
@@ -631,6 +695,7 @@ public:
/** get the number of bytes occupied by scale_num */
size_t getScaleNumBytes();
+ size_t getScaleNumSize();
/**
* this stores partial_lh for each state at the leaves of the tree because they are the same between leaves
@@ -641,38 +706,84 @@ public:
bool ptn_freq_computed;
+ /** vector size used by SIMD kernel */
+ size_t vector_size;
+
+ /** number of threads used for likelihood kernel */
+ int num_threads;
+
+
+ /****************************************************************************
+ helper functions for computing tree traversal
+ ****************************************************************************/
+
+
+ /**
+ compute traversal_info of a subtree
+ */
+ inline bool computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer);
+
+
+ /**
+ compute traversal_info of both subtrees
+ */
+ template<class VectorClass, const int nstates>
+ void computeTraversalInfo(PhyloNode *node, PhyloNode *dad, bool compute_partial_lh);
+ template<class VectorClass>
+ void computeTraversalInfo(PhyloNode *node, PhyloNode *dad, bool compute_partial_lh);
+
+ /**
+ precompute info for models
+ */
+ template<class VectorClass, const int nstates>
+ void computePartialInfo(TraversalInfo &info, VectorClass* buffer);
+ template<class VectorClass>
+ void computePartialInfo(TraversalInfo &info, VectorClass* buffer);
+
+ /**
+ sort neighbor in descending order of subtree size (number of leaves within subree)
+ @param node the starting node, NULL to start from the root
+ @param dad dad of the node, used to direct the search
+ */
+ void sortNeighborBySubtreeSize(PhyloNode *node, PhyloNode *dad);
+
/****************************************************************************
computing partial (conditional) likelihood of subtrees
****************************************************************************/
+ /** transform _pattern_lh_cat from "interleaved" to "sequential", due to vector_size > 1 */
+ void transformPatternLhCat();
+
void computeTipPartialLikelihood();
void computePtnInvar();
void computePtnFreq();
+
/**
compute the partial likelihood at a subtree
@param dad_branch the branch leading to the subtree
@param dad its dad, used to direct the tranversal
*/
- virtual void computePartialLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
- typedef void (PhyloTree::*ComputePartialLikelihoodType)(PhyloNeighbor *, PhyloNode *);
+ virtual void computePartialLikelihood(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id);
+ typedef void (PhyloTree::*ComputePartialLikelihoodType)(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id);
ComputePartialLikelihoodType computePartialLikelihoodPointer;
//template <const int nstates>
- void computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
-
- //template <const int nstates>
- void computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
+// void computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
- //template <const int nstates>
- void computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
+// void computeSitemodelPartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
- void computeSitemodelPartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
+// template <class VectorClass, const int VCSIZE, const int nstates>
+// void computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
- template <class VectorClass, const int VCSIZE, const int nstates>
- void computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
+ template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA = false, const bool SITE_MODEL = false>
+ void computePartialLikelihoodSIMD(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id);
+ template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA = false, const bool SITE_MODEL = false>
+ void computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id);
+
+ /*
template <class VectorClass, const int VCSIZE, const int nstates>
void computeMixratePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
@@ -681,6 +792,7 @@ public:
template <class VectorClass, const int VCSIZE, const int nstates>
void computeSitemodelPartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
+ */
/****************************************************************************
computing likelihood on a branch
@@ -705,19 +817,20 @@ public:
// inline double computeLikelihoodBranchFast(PhyloNeighbor *dad_branch, PhyloNode *dad);
//template <const int nstates>
- double computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
+// double computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
- //template <const int nstates>
- double computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
+// double computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
- //template <const int nstates>
- double computeMixrateLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
+// template <class VectorClass, const int VCSIZE, const int nstates>
+// double computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
- double computeSitemodelLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA = false, const bool SITE_MODEL = false>
+ double computeLikelihoodBranchSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
- template <class VectorClass, const int VCSIZE, const int nstates>
- double computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA = false, const bool SITE_MODEL = false>
+ double computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ /*
template <class VectorClass, const int VCSIZE, const int nstates>
double computeMixrateLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
@@ -726,6 +839,7 @@ public:
template <class VectorClass, const int VCSIZE, const int nstates>
double computeSitemodelLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
+ */
/****************************************************************************
computing likelihood on a branch using buffer
@@ -742,9 +856,16 @@ public:
typedef double (PhyloTree::*ComputeLikelihoodFromBufferType)();
ComputeLikelihoodFromBufferType computeLikelihoodFromBufferPointer;
- template <class VectorClass, const int VCSIZE, const int nstates>
- double computeLikelihoodFromBufferEigenSIMD();
+// template <class VectorClass, const int VCSIZE, const int nstates>
+// double computeLikelihoodFromBufferEigenSIMD();
+
+ template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA = false, const bool SITE_MODEL = false>
+ double computeLikelihoodFromBufferSIMD();
+
+ template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA = false, const bool SITE_MODEL = false>
+ double computeLikelihoodFromBufferGenericSIMD();
+ /*
template <class VectorClass, const int VCSIZE, const int nstates>
double computeMixrateLikelihoodFromBufferEigenSIMD();
@@ -755,6 +876,7 @@ public:
double computeSitemodelLikelihoodFromBufferEigenSIMD();
double computeSitemodelLikelihoodFromBufferEigen();
+ */
/**
compute tree likelihood when a branch length collapses to zero
@@ -798,6 +920,43 @@ public:
*/
void computePatternStateFreq(double *ptn_state_freq);
+ /****************************************************************************
+ ancestral sequence reconstruction
+ ****************************************************************************/
+
+ /**
+ compute ancestral sequence probability for an internal node by marginal reconstruction
+ (Yang, Kumar and Nei 1995)
+ @param dad_branch branch leading to an internal node where to obtain ancestral sequence
+ @param dad dad of the target internal node
+ @param[out] ptn_ancestral_prob pattern ancestral probability vector of dad_branch->node
+ */
+ void computeMarginalAncestralProbability(PhyloNeighbor *dad_branch, PhyloNode *dad, double *ptn_ancestral_prob);
+
+ /**
+ compute the joint ancestral states at a pattern (Pupko et al. 2000)
+ */
+ void computeJointAncestralSequences(int *ancestral_seqs);
+
+ /**
+ * compute max ancestral likelihood according to
+ * step 1-3 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
+ * @param dad_branch branch leading to an internal node where to obtain ancestral sequence
+ * @param dad dad of the target internal node
+ * @param[out] C array storing all information about max ancestral states
+ */
+ void computeAncestralLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C);
+
+ /**
+ * compute max ancestral states according to
+ * step 4-5 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
+ * @param dad_branch branch leading to an internal node where to obtain ancestral sequence
+ * @param dad dad of the target internal node
+ * @param C array storing all information about max ancestral states
+ * @param[out] ancestral_seqs array of size nptn*nnode for ancestral sequences at all internal nodes
+ */
+ void computeAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C, int *ancestral_seqs);
+
/**
compute pattern likelihoods only if the accumulated scaling factor is non-zero.
Otherwise, copy the pattern_lh attribute
@@ -915,11 +1074,13 @@ public:
@param tree_string tree string to read from
*/
void readTreeFile(const string &file_name);
-
- /**
+
+ /*
refactored 2015-12-22: Taxon IDs instead of Taxon names to save space!
* Return the tree string contining taxon IDs and branch lengths
* @return
+ * @param format (WT_TAXON_ID, WT_BR_LEN, ...)
+ * @return the tree string with the specified format
*/
virtual string getTreeString();
@@ -940,8 +1101,10 @@ public:
/**
* Return the sorted topology without branch length, used to compare tree topology
+ * @param
+ * printBranchLength true/false
*/
- string getTopology();
+ string getTopologyString(bool printBranchLength);
bool checkEqualScalingFactor(double &sum_scaling, PhyloNode *node = NULL, PhyloNode *dad = NULL);
@@ -951,19 +1114,27 @@ public:
****************************************************************************/
//template <const int nstates>
- void computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+// void computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
- //template <const int nstates>
- void computeMixtureLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+// void computeSitemodelLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
- //template <const int nstates>
- void computeMixrateLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+// template <class VectorClass, const int VCSIZE, const int nstates>
+// void computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
- void computeSitemodelLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+ template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA = false, const bool SITE_MODEL = false>
+ void computeLikelihoodBufferSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, size_t ptn_lower, size_t ptn_upper, int thread_id);
- template <class VectorClass, const int VCSIZE, const int nstates>
- void computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+ template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA = false, const bool SITE_MODEL = false>
+ void computeLikelihoodBufferGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, size_t ptn_lower, size_t ptn_upper, int thread_id);
+
+
+ template <class VectorClass, const bool SAFE_NUMERIC, const int nstates, const bool FMA = false, const bool SITE_MODEL = false>
+ void computeLikelihoodDervSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+
+ template <class VectorClass, const bool SAFE_NUMERIC, const bool FMA = false, const bool SITE_MODEL = false>
+ void computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+ /*
template <class VectorClass, const int VCSIZE, const int nstates>
void computeMixrateLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
@@ -972,6 +1143,7 @@ public:
template <class VectorClass, const int VCSIZE, const int nstates>
void computeSitemodelLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
+ */
/**
compute tree likelihood and derivatives on a branch. used to optimize branch length
@@ -990,6 +1162,9 @@ public:
Stepwise addition (greedy) by maximum parsimony
****************************************************************************/
+ /** constraint tree used to guide tree search */
+ ConstraintTree constraintTree;
+
/**
FAST VERSION: used internally by computeParsimonyTree() to find the best target branch to add into the tree
@param added_node node to add
@@ -1118,6 +1293,11 @@ public:
*/
double* theta_all;
+ /** total scaling buffer */
+ double *buffer_scale_all;
+
+ /** buffer used when computing partial_lh, to avoid repeated mem allocation */
+ double *buffer_partial_lh;
/**
* frequencies of alignment patterns, used as buffer for likelihood computation
@@ -1131,6 +1311,9 @@ public:
*/
double *ptn_invar;
+ vector<TraversalInfo> traversal_info;
+
+
/****************************************************************************
Nearest Neighbor Interchange by maximum likelihood
****************************************************************************/
@@ -1146,7 +1329,7 @@ public:
search by a nearest neigbor interchange
@return the likelihood of the tree
*/
- double optimizeNNI();
+ //double optimizeNNI();
/**
search by a nearest neigbor interchange
@@ -1180,12 +1363,20 @@ public:
virtual void doNNI(NNIMove &move, bool clearLH = true);
/**
+ * [DEPRECATED]
* Randomly choose perform an NNI, out of the two defined by branch node1-node2.
* This function also clear the corresponding partial likelihood vectors
- * @param node1 one node of the branch
- * @param node2 one node of the branch
+ *
+ * @param branch on which a random NNI is done
*/
- void doOneRandomNNI(Node *node1, Node *node2);
+ void doOneRandomNNI(Branch branch);
+
+ /**
+ * Get a random NNI from an internal branch, checking for consistency with constraintTree
+ * @param branch the internal branch
+ * @return an NNIMove, node1 and node2 are set to NULL if not consistent with constraintTree
+ */
+ NNIMove getRandomNNI(Branch& branch);
/**
@@ -1312,6 +1503,12 @@ public:
*/
void generateRandomTree(TreeGenType tree_type);
+
+ /**
+ test the best number of threads
+ */
+ int testNumThreads();
+
/****************************************************************************
Subtree Pruning and Regrafting by maximum likelihood
NOTE: NOT DONE YET
@@ -1464,13 +1661,20 @@ public:
virtual void changeLikelihoodKernel(LikelihoodKernel lk);
- virtual void setLikelihoodKernel(LikelihoodKernel lk);
+ virtual void setLikelihoodKernel(LikelihoodKernel lk, int num_threads);
#if defined(BINARY32) || defined(__NOAVX__)
virtual void setLikelihoodKernelAVX() {}
+ virtual void setLikelihoodKernelFMA() {}
#else
virtual void setLikelihoodKernelAVX();
+ virtual void setLikelihoodKernelFMA();
+#ifdef INCLUDE_AVX512
+ virtual void setLikelihoodKernelAVX512();
#endif
+#endif
+ virtual void setLikelihoodKernelSSE();
+
/****************************************************************************
Public variables
****************************************************************************/
@@ -1539,11 +1743,6 @@ public:
double minStateFreq;
- /*
- * Store the all the parameters for the program
- */
- Params* params;
-
/** sequence names that were removed */
StrVector removed_seqs;
@@ -1582,7 +1781,7 @@ public:
* compute the memory size required for storing partial likelihood vectors
* @return memory size required in bytes
*/
- virtual uint64_t getMemoryRequired(size_t ncategory = 1);
+ virtual uint64_t getMemoryRequired(size_t ncategory = 1, bool full_mem = false);
void getMemoryRequired(uint64_t &partial_lh_entries, uint64_t &scale_num_entries, uint64_t &partial_pars_entries);
@@ -1601,10 +1800,7 @@ public:
void approxAllBranches(PhyloNode *node = NULL, PhyloNode *dad = NULL);
- /** set pointer of params variable */
- virtual void setParams(Params* params);
-
- double getCurScore() {
+ double getCurScore() {
return curScore;
}
@@ -1622,7 +1818,6 @@ public:
curScore = -DBL_MAX;
if (model)
initializeAllPartialLh();
-// clearAllPartialLH();
}
void computeSeqIdentityAlongTree(Split &resp, Node *node = NULL, Node *dad = NULL);
@@ -1630,6 +1825,7 @@ public:
double *getPatternLhCatPointer() { return _pattern_lh_cat; }
+
protected:
/**
@@ -1638,11 +1834,6 @@ protected:
pllInstance *pllInst;
/**
- * Whether the partial likelihood vectors have been computed for PLL
- */
-// bool lhComputed;
-
- /**
* PLL data structure for alignment
*/
pllAlignmentData *pllAlignment;
@@ -1663,7 +1854,7 @@ protected:
bool subTreeDistComputed;
/**
- * Map data structure to store distance between subtree.
+ * Map data structure to store distance Candidate trees between subtree.
* The key is a string which is constructed by concatenating IDs of
* the 2 nodes, e.g. 15-16
*/
@@ -1691,10 +1882,9 @@ protected:
double *_pattern_lh;
/**
- internal pattern likelihoods per category, always stored after calling computeLikelihood()
- or related functions. Note that scaling factors are not incorporated here.
- If you want to get real pattern likelihoods, please use computePatternLikelihood()
- */
+ internal pattern likelihoods per category,
+ only stored after calling non-SSE computeLikelihood for efficiency purpose
+ */
double *_pattern_lh_cat;
/**
@@ -1760,6 +1950,16 @@ protected:
*/
UINT *central_partial_pars;
+ void reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad);
+
+ //----------- memory saving technique ------//
+
+ /** maximum number of partial_lh_slots */
+ int64_t max_lh_slots;
+
+ /** mapping from */
+ MemSlotVector mem_slots;
+
/**
TRUE to discard saturated for Meyer & von Haeseler (2003) model
*/
@@ -1812,7 +2012,7 @@ protected:
* Current score of the tree;
*/
double curScore;
-
+
/** current best parsimony score */
UINT best_pars_score;
diff --git a/phylotreeavx.cpp b/phylotreeavx.cpp
index b4d0245..ff39ddf 100644
--- a/phylotreeavx.cpp
+++ b/phylotreeavx.cpp
@@ -6,11 +6,17 @@
*/
-#include "phylokernel.h"
-#include "phylokernelmixture.h"
-#include "phylokernelmixrate.h"
-#include "phylokernelsitemodel.h"
+#include "vectorclass/vectormath_exp.h"
#include "vectorclass/vectorclass.h"
+#include "phylokernel.h"
+//#include "phylokernelsafe.h"
+//#include "phylokernelmixture.h"
+//#include "phylokernelmixrate.h"
+//#include "phylokernelsitemodel.h"
+
+#include "phylokernelnew.h"
+#define KERNEL_FIX_STATES
+#include "phylokernelnew.h"
#ifndef __AVX__
#error "You must compile this file with AVX enabled!"
@@ -23,112 +29,139 @@ void PhyloTree::setParsimonyKernelAVX() {
void PhyloTree::setDotProductAVX() {
#ifdef BOOT_VAL_FLOAT
- dotProduct = &PhyloTree::dotProductSIMD<float, Vec8f, 8>;
+ dotProduct = &PhyloTree::dotProductSIMD<float, Vec8f>;
#else
- dotProduct = &PhyloTree::dotProductSIMD<double, Vec4d, 4>;
+ dotProduct = &PhyloTree::dotProductSIMD<double, Vec4d>;
#endif
-
- dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec4d, 4>;
+ dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec4d>;
}
void PhyloTree::setLikelihoodKernelAVX() {
+ vector_size = 4;
setParsimonyKernelAVX();
+
+ if (model_factory && model_factory->model->isSiteSpecificModel() && (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling)) {
+ switch (aln->num_states) {
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, SAFE_LH, 4, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, SAFE_LH, 4, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, SAFE_LH, 4, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 4, false, true>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, SAFE_LH, 20, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, SAFE_LH, 20, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, SAFE_LH, 20, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 20, false, true>;
+ break;
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec4d, SAFE_LH, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec4d, SAFE_LH, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec4d, SAFE_LH, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, false, true>;
+ break;
+ }
+ return;
+ }
+
if (model_factory && model_factory->model->isSiteSpecificModel()) {
switch (aln->num_states) {
case 4:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigenSIMD<Vec4d, 4, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigenSIMD<Vec4d, 4, 4>;
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, NORM_LH, 4, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, NORM_LH, 4, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, NORM_LH, 4, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 4, false, true>;
break;
case 20:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigenSIMD<Vec4d, 4, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigenSIMD<Vec4d, 4, 20>;
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD <Vec4d, NORM_LH, 20, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD <Vec4d, NORM_LH, 20, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD <Vec4d, NORM_LH, 20, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20, false, true>;
break;
default:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigen;
- break;
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD <Vec4d, NORM_LH, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD <Vec4d, NORM_LH, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD <Vec4d, NORM_LH, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, false, true>;
+ break;
}
return;
}
+ if (params->lk_safe_scaling || leafNum >= params->numseq_safe_scaling) {
switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 2>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 2>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 2>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 2>;
+ break;
+ */
+ case 4:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 4>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 4>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 4>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 4>;
+ break;
+ case 20:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 20>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 20>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 20>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 20>;
+ break;
+ /*
+ case 64:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, SAFE_LH, 64>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, SAFE_LH, 64>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, SAFE_LH, 64>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, SAFE_LH, 64>;
+ break;
+ */
+ default:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, SAFE_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, SAFE_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, SAFE_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH>;
+ break;
+ }
+ return;
+ }
+
+ switch(aln->num_states) {
+ /*
+ case 2:
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 2>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 2>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 2>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 2>;
+ break;
+ */
case 4:
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec4d, 4, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec4d, 4, 4>;
-// cout << "Fast-AVX-semi-mixture" << endl;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec4d, 4, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec4d, 4, 4>;
-// cout << "Fast-AVX-mixture" << endl;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec4d, 4, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec4d, 4, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec4d, 4, 4>;
-// cout << "Fast-AVX" << endl;
- }
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 4>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 4>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 4>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 4>;
break;
case 20:
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec4d, 4, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec4d, 4, 20>;
-// cout << "Fast-AVX-semi-mixture" << endl;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec4d, 4, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec4d, 4, 20>;
-// cout << "Fast-AVX-mixture" << endl;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec4d, 4, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec4d, 4, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec4d, 4, 20>;
-// cout << "Fast-AVX" << endl;
- }
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 20>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 20>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 20>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20>;
break;
+ /*
case 64:
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec4d, 4, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec4d, 4, 64>;
-// cout << "Fast-AVX-semi-mixture" << endl;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec4d, 4, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec4d, 4, 64>;
-// cout << "Fast-AVX-mixture" << endl;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec4d, 4, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec4d, 4, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec4d, 4, 64>;
-// cout << "Fast-AVX" << endl;
- }
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSIMD<Vec4d, NORM_LH, 64>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSIMD<Vec4d, NORM_LH, 64>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSIMD<Vec4d, NORM_LH, 64>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 64>;
break;
+ */
default:
- assert(0);
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, NORM_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, NORM_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, NORM_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH>;
break;
}
}
diff --git a/phylotreepars.cpp b/phylotreepars.cpp
index 57b3ae4..0cea097 100644
--- a/phylotreepars.cpp
+++ b/phylotreepars.cpp
@@ -8,9 +8,23 @@
*/
#include "phylotree.h"
-#include "vectorclass/vectorclass.h"
+//#include "vectorclass/vectorclass.h"
#include "phylosupertree.h"
+#if defined (__GNUC__) || defined(__clang__)
+#define vml_popcnt __builtin_popcount
+#else
+// taken from vectorclass library
+static inline uint32_t vml_popcnt (uint32_t a) {
+ // popcnt instruction not available
+ uint32_t b = a - ((a >> 1) & 0x55555555);
+ uint32_t c = (b & 0x33333333) + ((b >> 2) & 0x33333333);
+ uint32_t d = (c + (c >> 4)) & 0x0F0F0F0F;
+ uint32_t e = d * 0x01010101;
+ return e >> 24;
+}
+#endif
+
/***********************************************************/
/****** optimized version of parsimony kernel **************/
/***********************************************************/
@@ -19,132 +33,128 @@ void PhyloTree::computePartialParsimonyFast(PhyloNeighbor *dad_branch, PhyloNode
if (dad_branch->partial_lh_computed & 2)
return;
Node *node = dad_branch->node;
- int nstates = aln->num_states;
- int site;
+ int nstates = aln->getMaxNumStates();
+ int site = 0;
dad_branch->partial_lh_computed |= 2;
+ vector<Alignment*> *partitions = NULL;
+ if (aln->isSuperAlignment())
+ partitions = &((SuperAlignment*)aln)->partitions;
+ else {
+ partitions = new vector<Alignment*>;
+ partitions->push_back(aln);
+ }
+
if (node->isLeaf() && dad) {
// external node
- if (aln->ordered_pattern.empty())
- aln->orderPatternByNumChars();
int leafid = node->id;
- int pars_size = getBitsBlockSize();
- memset(dad_branch->partial_pars, 0, pars_size*sizeof(UINT));
-// int ptn;
-// int nptn = aln->size();
- int ambi_aa[] = {2, 3, 5, 6, 9, 10}; // {4+8, 32+64, 512+1024};
+ memset(dad_branch->partial_pars, 0, getBitsBlockSize()*sizeof(UINT));
int max_sites = ((aln->num_informative_sites+UINT_BITS-1)/UINT_BITS)*UINT_BITS;
- Alignment::iterator pat;
- switch (aln->seq_type) {
- case SEQ_DNA:
-// nptn = aln->ordered_pattern.size();
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
-// Pattern *pat = &aln->ordered_pattern[ptn];
-// if (!pat->is_informative)
-// continue;
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 4) {
- for (int j = 0; j < freq; j++, site++) {
- dad_branch->partial_pars[(site/UINT_BITS)*4+state] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*4);
- UINT bit1 = (1 << (site%UINT_BITS));
- p[0] |= bit1;
- p[1] |= bit1;
- p[2] |= bit1;
- p[3] |= bit1;
- }
- } else {
- state -= 3;
- for (int j = 0; j < freq; j++, site++) {
- UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*4);
- UINT bit1 = (1 << (site%UINT_BITS));
- for (int i = 0; i < 4; i++)
- if (state & (1<<i))
- p[i] |= bit1;
+ int ambi_aa[] = {2, 3, 5, 6, 9, 10}; // {4+8, 32+64, 512+1024};
+ if (aln->ordered_pattern.empty())
+ aln->orderPatternByNumChars();
+ int start_pos = 0;
+ for (vector<Alignment*>::iterator alnit = partitions->begin(); alnit != partitions->end(); alnit++) {
+ int end_pos = start_pos + (*alnit)->ordered_pattern.size();
+ switch ((*alnit)->seq_type) {
+ case SEQ_DNA:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 4) {
+ for (int j = 0; j < freq; j++, site++) {
+ dad_branch->partial_pars[(site/UINT_BITS)*nstates+state] |= (1 << (site % UINT_BITS));
+ }
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
+ UINT bit1 = (1 << (site%UINT_BITS));
+ p[0] |= bit1;
+ p[1] |= bit1;
+ p[2] |= bit1;
+ p[3] |= bit1;
+ }
+ } else {
+ state -= 3;
+ assert(state < 15);
+ for (int j = 0; j < freq; j++, site++) {
+ UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
+ UINT bit1 = (1 << (site%UINT_BITS));
+ for (int i = 0; i < 4; i++)
+ if (state & (1<<i))
+ p[i] |= bit1;
+ }
}
}
- }
- assert(site == aln->num_informative_sites);
- // add dummy states
- if (site < max_sites)
- dad_branch->partial_pars[(site/UINT_BITS)*4] |= ~((1<<(site%UINT_BITS)) - 1);
-// for (; site < max_sites; site++) {
-// dad_branch->partial_pars[(site/UINT_BITS)*4] |= (1 << (site%UINT_BITS));
-// }
- break;
- case SEQ_PROTEIN:
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
-// if (!aln->at(ptn).is_informative)
-// continue;
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < 20) {
- for (int j = 0; j < freq; j++, site++) {
- dad_branch->partial_pars[(site/UINT_BITS)*20+state] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*20);
- UINT bit1 = (1 << (site%UINT_BITS));
- for (int i = 0; i < 20; i++)
- p[i] |= bit1;
- }
- } else {
- assert(state < 23);
- state = (state-20)*2;
- for (int j = 0; j < freq; j++, site++) {
- UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*20);
- UINT bit1 = (1 << (site%UINT_BITS));
- p[ambi_aa[state]] |= bit1;
- p[ambi_aa[state+1]] |= bit1;
+ //assert(site == aln->num_informative_sites);
+ // add dummy states
+ //if (site < max_sites)
+ // dad_branch->partial_pars[(site/UINT_BITS)*4] |= ~((1<<(site%UINT_BITS)) - 1);
+ break;
+ case SEQ_PROTEIN:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < 20) {
+ for (int j = 0; j < freq; j++, site++) {
+ dad_branch->partial_pars[(site/UINT_BITS)*nstates+state] |= (1 << (site % UINT_BITS));
+ }
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
+ UINT bit1 = (1 << (site%UINT_BITS));
+ for (int i = 0; i < 20; i++)
+ p[i] |= bit1;
+ }
+ } else {
+ assert(state < 23);
+ state = (state-20)*2;
+ for (int j = 0; j < freq; j++, site++) {
+ UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
+ UINT bit1 = (1 << (site%UINT_BITS));
+ p[ambi_aa[state]] |= bit1;
+ p[ambi_aa[state+1]] |= bit1;
+ }
}
}
- }
- assert(site == aln->num_informative_sites);
- // add dummy states
- if (site < max_sites)
- dad_branch->partial_pars[(site/UINT_BITS)*20] |= ~((1<<(site%UINT_BITS)) - 1);
-// for (; site < max_sites; site++) {
-// dad_branch->partial_pars[(site/UINT_BITS)*20] |= (1 << (site%UINT_BITS));
-// }
- break;
- default:
-// for (ptn = 0, site = 0; ptn < nptn; ptn++) {
- for (pat = aln->ordered_pattern.begin(), site = 0; pat != aln->ordered_pattern.end(); pat++) {
-// if (!aln->at(ptn).is_informative)
-// continue;
- int state = pat->at(leafid);
- int freq = pat->frequency;
- if (state < nstates) {
- for (int j = 0; j < freq; j++, site++) {
- dad_branch->partial_pars[(site/UINT_BITS)*nstates+state] |= (1 << (site % UINT_BITS));
- }
- } else if (state == aln->STATE_UNKNOWN) {
- for (int j = 0; j < freq; j++, site++) {
- UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
- UINT bit1 = (1 << (site%UINT_BITS));
- for (int i = 0; i < nstates; i++)
- p[i] |= bit1;
+ //assert(site == aln->num_informative_sites);
+ // add dummy states
+ //if (site < max_sites)
+ // dad_branch->partial_pars[(site/UINT_BITS)*20] |= ~((1<<(site%UINT_BITS)) - 1);
+ break;
+ default:
+ for (int patid = start_pos; patid != end_pos; patid++) {
+ Alignment::iterator pat = aln->ordered_pattern.begin()+ patid;
+ int state = pat->at(leafid);
+ int freq = pat->frequency;
+ if (state < (*alnit)->num_states) {
+ for (int j = 0; j < freq; j++, site++) {
+ dad_branch->partial_pars[(site/UINT_BITS)*nstates+state] |= (1 << (site % UINT_BITS));
+ }
+ } else if (state == (*alnit)->STATE_UNKNOWN) {
+ for (int j = 0; j < freq; j++, site++) {
+ UINT *p = dad_branch->partial_pars+((site/UINT_BITS)*nstates);
+ UINT bit1 = (1 << (site%UINT_BITS));
+ for (int i = 0; i < (*alnit)->num_states; i++)
+ p[i] |= bit1;
+ }
+ } else {
+ assert(0);
}
- } else {
- assert(0);
}
- }
- assert(site == aln->num_informative_sites);
- // add dummy states
- if (site < max_sites)
- dad_branch->partial_pars[(site/UINT_BITS)*nstates] |= ~((1<<(site%UINT_BITS)) - 1);
-// for (; site < max_sites; site++) {
-// dad_branch->partial_pars[(site/UINT_BITS)*nstates] |= (1 << (site%UINT_BITS));
-// }
- break;
- }
+ break;
+ } // end of switch
+
+ start_pos = end_pos;
+ } // FOR LOOP
+ assert(site == aln->num_informative_sites);
+ // add dummy states
+ if (site < max_sites)
+ dad_branch->partial_pars[(site/UINT_BITS)*nstates] |= ~((1<<(site%UINT_BITS)) - 1);
} else {
// internal node
assert(node->degree() == 3); // it works only for strictly bifurcating tree
@@ -168,7 +178,7 @@ void PhyloTree::computePartialParsimonyFast(PhyloNeighbor *dad_branch, PhyloNode
#endif
for (site = 0; site<nsites; site++) {
UINT w;
- size_t offset = 4*site;
+ size_t offset = nstates*site;
UINT *x = left->partial_pars + offset;
UINT *y = right->partial_pars + offset;
UINT *z = dad_branch->partial_pars + offset;
@@ -178,7 +188,7 @@ void PhyloTree::computePartialParsimonyFast(PhyloNeighbor *dad_branch, PhyloNode
z[3] = x[3] & y[3];
w = z[0] | z[1] | z[2] | z[3];
w = ~w;
- score += vml_popcnt(w);
+ score += __builtin_popcount(w);
z[0] |= w & (x[0] | y[0]);
z[1] |= w & (x[1] | y[1]);
z[2] |= w & (x[2] | y[2]);
@@ -212,6 +222,9 @@ void PhyloTree::computePartialParsimonyFast(PhyloNeighbor *dad_branch, PhyloNode
dad_branch->partial_pars[nstates*nsites] = score + left->partial_pars[nstates*nsites] + right->partial_pars[nstates*nsites];
// dad_branch->partial_pars[0] = score;
}
+
+ if (!aln->isSuperAlignment())
+ delete partitions;
}
@@ -227,7 +240,7 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode *
computePartialParsimonyFast(node_branch, node);
int site;
int nsites = (aln->num_informative_sites + UINT_BITS-1) / UINT_BITS;
- int nstates = aln->num_states;
+ int nstates = aln->getMaxNumStates();
int scoreid = ((aln->num_informative_sites+UINT_BITS-1)/UINT_BITS)*nstates;
UINT sum_end_node = (dad_branch->partial_pars[scoreid] + node_branch->partial_pars[scoreid]);
@@ -247,10 +260,10 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode *
UINT w = (x[0] & y[0]) | (x[1] & y[1]) | (x[2] & y[2]) | (x[3] & y[3]);
w = ~w;
score += vml_popcnt(w);
- #ifndef _OPENMP
- if (score >= lower_bound)
- break;
- #endif
+// #ifndef _OPENMP
+// if (score >= lower_bound)
+// break;
+// #endif
}
break;
default:
@@ -268,10 +281,10 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode *
}
w = ~w;
score += vml_popcnt(w);
- #ifndef _OPENMP
- if (score >= lower_bound)
- break;
- #endif
+// #ifndef _OPENMP
+// if (score >= lower_bound)
+// break;
+// #endif
}
break;
}
@@ -312,40 +325,78 @@ int PhyloTree::computeParsimonyTree(const char *out_prefix, Alignment *alignment
if (size < 3)
outError(ERR_FEW_TAXA);
- freeNode();
+ IntVector taxon_order;
+ taxon_order.reserve(size);
+
+ if (constraintTree.empty()) {
+ freeNode();
+ taxon_order.resize(size);
+ for (int i = 0; i < size; i++)
+ taxon_order[i] = i;
+ // randomize the addition order
+ my_random_shuffle(taxon_order.begin(), taxon_order.end());
+
+ root = newNode(size);
+
+ // create initial tree with 3 taxa
+ for (leafNum = 0; leafNum < 3; leafNum++) {
+ if (verbose_mode >= VB_MAX)
+ cout << "Add " << aln->getSeqName(taxon_order[leafNum]) << " to the tree" << endl;
+ Node *new_taxon = newNode(taxon_order[leafNum], aln->getSeqName(taxon_order[leafNum]).c_str());
+ root->addNeighbor(new_taxon, -1.0);
+ new_taxon->addNeighbor(root, -1.0);
+ }
+ } else {
+ // first copy the constraint tree
+ MTree::copyTree(&constraintTree);
+
+ // convert to birfucating tree if needed
+ extractBifurcatingSubTree();
+ assert(isBifurcating());
+
+ // assign proper taxon IDs
+ NodeVector nodes;
+ NodeVector::iterator it;
+ getTaxa(nodes);
+ leafNum = nodes.size();
+ vector<int> pushed;
+ pushed.resize(size, 0);
+ for (it = nodes.begin(); it != nodes.end(); it++) {
+ (*it)->id = aln->getSeqID((*it)->name);
+ assert((*it)->id >= 0);
+ taxon_order.push_back((*it)->id);
+ pushed[(*it)->id] = 1;
+ }
- root = newNode(size);
+ // start with constraint tree
+ int i;
+ for (i = 0; i < size; i++)
+ if (!pushed[i] && constraintTree.hasTaxon(aln->getSeqName(i))) {
+ taxon_order.push_back(i);
+ pushed[i] = 1;
+ }
+ assert(taxon_order.size() == constraintTree.leafNum);
+ for (int i = 0; i < size; i++)
+ if (!pushed[i]) {
+ taxon_order.push_back(i);
+ }
+ // randomize the addition order
+ my_random_shuffle(taxon_order.begin()+leafNum, taxon_order.begin()+constraintTree.leafNum);
+ my_random_shuffle(taxon_order.begin()+constraintTree.leafNum, taxon_order.end());
- IntVector taxon_order;
- taxon_order.resize(size);
- for (int i = 0; i < size; i++)
- taxon_order[i] = i;
- // randomize the addition order
- my_random_shuffle(taxon_order.begin(), taxon_order.end());
-
- // create initial tree with 3 taxa
- for (leafNum = 0; leafNum < 3; leafNum++) {
- if (verbose_mode >= VB_MAX)
- cout << "Add " << aln->getSeqName(taxon_order[leafNum]) << " to the tree" << endl;
- Node *new_taxon = newNode(taxon_order[leafNum], aln->getSeqName(taxon_order[leafNum]).c_str());
- root->addNeighbor(new_taxon, -1.0);
- new_taxon->addNeighbor(root, -1.0);
}
root = findNodeID(taxon_order[0]);
initializeAllPartialPars();
- size_t index = 6;
+ size_t index = (2*leafNum-3)*2;
size_t pars_block_size = getBitsBlockSize();
- if (isSuperTree())
- ((PhyloSuperTree*)this)->mapTrees();
-
UINT *tmp_partial_pars;
tmp_partial_pars = newBitsBlock();
- // stepwise adding the next taxon
- for (leafNum = 3; leafNum < size; leafNum++) {
+ // stepwise adding the next taxon for the remaining taxa
+ for (; leafNum < size; leafNum++) {
if (verbose_mode >= VB_MAX)
- cout << "Add " << aln->getSeqName(taxon_order[leafNum]) << " to the tree";
+ cout << "Adding " << aln->getSeqName(taxon_order[leafNum]) << " to the tree..." << endl;
NodeVector nodes1, nodes2;
getBranches(nodes1, nodes2);
PhyloNode *target_node = NULL;
@@ -367,6 +418,7 @@ int PhyloTree::computeParsimonyTree(const char *out_prefix, Alignment *alignment
added_node->addNeighbor((Node*) 2, -1.0);
for (int nodeid = 0; nodeid < nodes1.size(); nodeid++) {
+
int score = addTaxonMPFast(new_taxon, added_node, nodes1[nodeid], nodes2[nodeid]);
if (score < best_pars_score) {
best_pars_score = score;
@@ -422,6 +474,9 @@ int PhyloTree::computeParsimonyTree(const char *out_prefix, Alignment *alignment
file_name += ".parstree";
printTree(file_name.c_str(), WT_NEWLINE);
}
+// if (isSuperTree())
+// ((PhyloSuperTree*)this)->mapTrees();
+
return best_pars_score;
}
@@ -445,6 +500,11 @@ int PhyloTree::addTaxonMPFast(Node *added_taxon, Node* added_node, Node* node, N
// compute the likelihood
((PhyloNeighbor*) added_taxon->findNeighbor(added_node))->clearPartialLh();
int score = computeParsimonyBranch((PhyloNeighbor*) added_node->neighbors[0], (PhyloNode*) added_node);
+ if (leafNum < constraintTree.leafNum) {
+ // still during addition of taxa from constraint tree
+ if (!constraintTree.isCompatible(this))
+ score = INT_MAX;
+ }
// remove the added node
node->updateNeighbor(added_node, dad, len);
dad->updateNeighbor(added_node, node, len);
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index 7782612..b0f9c01 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -18,14 +18,23 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include "phylotree.h"
-#include "phylokernel.h"
-#include "phylokernelmixture.h"
-#include "phylokernelmixrate.h"
-#include "phylokernelsitemodel.h"
+#include "vectorclass/instrset.h"
+
+#if INSTRSET < 2
+#include "phylokernelnew.h"
+#define KERNEL_FIX_STATES
+#include "phylokernelnew.h"
+#include "vectorf64.h"
+#endif
+
+//#include "phylokernel.h"
+//#include "phylokernelmixture.h"
+//#include "phylokernelmixrate.h"
+//#include "phylokernelsitemodel.h"
+
#include "model/modelgtr.h"
#include "model/modelset.h"
-
/* BQM: to ignore all-gapp subtree at an alignment site */
//#define IGNORE_GAP_LH
@@ -33,297 +42,118 @@
void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
// set parsimony kernel
- switch (lk) {
-// case LK_SSE:
-// computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
-// computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
-// break;
- case LK_EIGEN:
+ if (lk == LK_EIGEN || instruction_set < 2) {
computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFast;
computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFast;
- break;
- case LK_EIGEN_SSE:
- if (instruction_set >= 7)
- setParsimonyKernelAVX();
- else {
- computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFastSIMD<Vec4ui>;
- computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFastSIMD<Vec4ui>;
- }
- break;
-// default:
-// computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
-// computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
-// break;
+ return;
+ }
+ if (instruction_set >= 7) {
+ setParsimonyKernelAVX();
+ return;
}
+ if (instruction_set >= 2) {
+ setParsimonyKernelSSE();
+ return;
+ }
+ assert(0);
}
-void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
+void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk, int num_threads) {
+
+ sse = lk;
+ vector_size = 1;
+ this->num_threads = num_threads;
+
+ //--- parsimony kernel ---
setParsimonyKernel(lk);
- if (instruction_set >= 7) {
+ bool has_fma = (hasFMA3()) && (instruction_set >= 7) && (Params::getInstance().lk_no_avx != 2);
+ //--- dot-product kernel ---
+ if (has_fma) {
+ setDotProductFMA();
+ } else if (instruction_set >= 7) {
setDotProductAVX();
+ } else if (instruction_set >= 2) {
+ setDotProductSSE();
} else {
+
+#if INSTRSET < 2
#ifdef BOOT_VAL_FLOAT
- dotProduct = &PhyloTree::dotProductSIMD<float, Vec4f, 4>;
+ // TODO naive dot-product for float
+ assert(0 && "Not supported, contact developer");
+// dotProduct = &PhyloTree::dotProductSIMD<float, Vec1f>;
#else
- dotProduct = &PhyloTree::dotProductSIMD<double, Vec2d, 2>;
+ dotProduct = &PhyloTree::dotProductSIMD<double, Vec1d>;
+#endif
+ dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec1d>;
#endif
- dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec2d, 2>;
}
- sse = lk;
+
+ //--- naive likelihood kernel, no alignment specified yet ---
if (!aln) {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigen;
+#if INSTRSET < 2
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
+ sse = LK_EIGEN;
+#else
+ computeLikelihoodBranchPointer = NULL;
+ computeLikelihoodDervPointer = NULL;
+ computePartialLikelihoodPointer = NULL;
computeLikelihoodFromBufferPointer = NULL;
sse = LK_EIGEN;
+#endif
return;
}
-
- if (model_factory && model_factory->model->isSiteSpecificModel()) {
- if (sse == LK_EIGEN) {
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigen;
- return;
- }
- // LK_EIGEN_SSE
- if (instruction_set >= 7) {
+
+ //--- SIMD kernel ---
+ if (sse == LK_EIGEN_SSE && instruction_set >= 2) {
+#ifdef INCLUDE_AVX512
+ if (instruction_set >= 9) {
+ setLikelihoodKernelAVX512();
+ return;
+ }
+#endif
+ if (has_fma) {
+ // CPU supports AVX and FMA
+ setLikelihoodKernelFMA();
+ } else if (instruction_set >= 7) {
// CPU supports AVX
setLikelihoodKernelAVX();
- return;
- }
- switch (aln->num_states) {
- case 4:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigenSIMD<Vec2d, 2, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigenSIMD<Vec2d, 2, 4>;
- break;
- case 20:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigenSIMD<Vec2d, 2, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigenSIMD<Vec2d, 2, 20>;
- break;
- default:
- computeLikelihoodBranchPointer = &PhyloTree::computeSitemodelLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeSitemodelLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeSitemodelPartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeSitemodelLikelihoodFromBufferEigen;
- break;
- }
- return;
- }
-
- if (sse == LK_EIGEN) {
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
- }
} else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
+ // SSE kernel
+ setLikelihoodKernelSSE();
}
return;
}
-// cout << "Likelihood kernel: ";
-
- // set likelihood kernel
- switch(aln->num_states) {
- case 4:
- switch(sse) {
-// case LK_SSE:
-// computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<4>;
-// computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<4>;
-// computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<4>;
-// computeLikelihoodFromBufferPointer = NULL;
-// break;
- case LK_EIGEN_SSE:
- if (instruction_set >= 7) {
- // CPU supports AVX
- setLikelihoodKernelAVX();
- } else {
- // CPU does not support AVX
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec2d, 2, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec2d, 2, 4>;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec2d, 2, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec2d, 2, 4>;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec2d, 2, 4>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec2d, 2, 4>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec2d, 2, 4>;
- }
- }
- break;
- default:
- break;
- }
- break;
- case 20:
- switch(sse) {
-// case LK_SSE:
-// computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<20>;
-// computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<20>;
-// computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<20>;
-// computeLikelihoodFromBufferPointer = NULL;
-// break;
- case LK_EIGEN_SSE:
- if (instruction_set >= 7) {
- setLikelihoodKernelAVX();
- } else {
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec2d, 2, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec2d, 2, 20>;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec2d, 2, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec2d, 2, 20>;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec2d, 2, 20>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec2d, 2, 20>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec2d, 2, 20>;
- }
- }
- break;
- default:
- break;
- }
- break;
-
- case 64: // CODON
- switch(sse) {
-// case LK_SSE:
-// computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<64>;
-// computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<64>;
-// computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<64>;
-// computeLikelihoodFromBufferPointer = NULL;
-// break;
- case LK_EIGEN_SSE:
- if (instruction_set >= 7) {
- setLikelihoodKernelAVX();
- } else {
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigenSIMD<Vec2d, 2, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD<Vec2d, 2, 64>;
-// cout << "Fast-SSE-semi-mixture" << endl;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigenSIMD<Vec2d, 2, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD<Vec2d, 2, 64>;
-// cout << "Fast-SSE-mixture" << endl;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec2d, 2, 64>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec2d, 2, 64>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec2d, 2, 64>;
-// cout << "Fast-SSE" << endl;
- }
- }
- break;
- default:
- break;
- }
- break;
-
-
- case 2:
- switch(sse) {
-// case LK_SSE:
-// computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<2>;
-// computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<2>;
-// computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<2>;
-// computeLikelihoodFromBufferPointer = NULL;
-// break;
- case LK_EIGEN_SSE:
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec2d, 2, 2>;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec2d, 2, 2>;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigenSIMD<Vec2d, 2, 2>;
- computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferEigenSIMD<Vec2d, 2, 2>;
- break;
- default:
- break;
- }
- break;
+#if INSTRSET < 2
+ //--- naive kernel for site-specific model ---
+ if (model_factory && model_factory->model->isSiteSpecificModel()) {
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH, false, true>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH, false, true>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH, false, true>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH, false, true>;
+ return;
+ }
- default:
- if (sse == LK_EIGEN_SSE) {
- if (model_factory && model_factory->model->isMixture()) {
- if (model_factory->fused_mix_rate) {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixrateLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeMixrateLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeMixratePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeMixtureLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeMixtureLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computeMixturePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
- }
- } else {
- computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigen;
- computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigen;
- computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigen;
- computeLikelihoodFromBufferPointer = NULL;
- }
- sse = LK_EIGEN;
-// } else {
-// computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchNaive;
-// computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervNaive;
-// computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodNaive;
-// computeLikelihoodFromBufferPointer = NULL;
-// sse = LK_NORMAL;
- }
- break;
- }
+ //--- naive (no SIMD) kernel ---
+ computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec1d, SAFE_LH>;
+ computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
+ computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
+ computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
+#else
+ computeLikelihoodBranchPointer = NULL;
+ computeLikelihoodDervPointer = NULL;
+ computePartialLikelihoodPointer = NULL;
+ computeLikelihoodFromBufferPointer = NULL;
+#endif
}
void PhyloTree::changeLikelihoodKernel(LikelihoodKernel lk) {
if (sse == lk) return;
-// if ((sse == LK_EIGEN || sse == LK_EIGEN_SSE) && (lk == LK_NORMAL || lk == LK_SSE)) {
-// // need to increase the memory usage when changing from new kernel to old kernel
-// if (params->lh_mem_save == LM_PER_NODE)
-// params->lh_mem_save = LM_ALL_BRANCH;
-// setLikelihoodKernel(lk);
-// deleteAllPartialLh();
-// initializeAllPartialLh();
-// clearAllPartialLH();
-// } else {
- // otherwise simply assign variable sse
- setLikelihoodKernel(lk);
-// }
+ setLikelihoodKernel(lk, num_threads);
}
/*******************************************************
@@ -332,8 +162,8 @@ void PhyloTree::changeLikelihoodKernel(LikelihoodKernel lk) {
*
******************************************************/
-void PhyloTree::computePartialLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- (this->*computePartialLikelihoodPointer)(dad_branch, dad);
+void PhyloTree::computePartialLikelihood(TraversalInfo &info, size_t ptn_left, size_t ptn_right, int thread_id) {
+ (this->*computePartialLikelihoodPointer)(info, ptn_left, ptn_right, thread_id);
}
double PhyloTree::computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad) {
@@ -349,13 +179,19 @@ void PhyloTree::computeLikelihoodDerv(PhyloNeighbor *dad_branch, PhyloNode *dad,
double PhyloTree::computeLikelihoodFromBuffer() {
assert(current_it && current_it_back);
- if (computeLikelihoodFromBufferPointer)
+ if (computeLikelihoodFromBufferPointer && optimize_by_newton)
return (this->*computeLikelihoodFromBufferPointer)();
else
return (this->*computeLikelihoodBranchPointer)(current_it, (PhyloNode*)current_it_back->node);
}
+double PhyloTree::dotProductDoubleCall(double *x, double *y, int size) {
+ return (this->*dotProductDouble)(x, y, size);
+}
+
+
+
void PhyloTree::computeTipPartialLikelihood() {
if (tip_partial_lh_computed)
return;
@@ -371,88 +207,103 @@ void PhyloTree::computeTipPartialLikelihood() {
computePtnInvar();
if (getModel()->isSiteSpecificModel()) {
- ModelSet *models = (ModelSet*)model;
- size_t nptn = aln->getNPattern(), max_nptn = get_safe_upper_limit(nptn), tip_block_size = max_nptn * aln->num_states;
+// ModelSet *models = (ModelSet*)model;
+ size_t nptn = aln->getNPattern(), max_nptn = ((nptn+vector_size-1)/vector_size)*vector_size, tip_block_size = max_nptn * aln->num_states;
int nstates = aln->num_states;
int nseq = aln->getNSeq();
+ assert(vector_size > 0);
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int nodeid = 0; nodeid < nseq; nodeid++) {
- int i, x;
+ int i, x, v;
double *partial_lh = tip_partial_lh + tip_block_size*nodeid;
size_t ptn;
- for (ptn = 0; ptn < nptn; ptn++, partial_lh += nstates) {
- int state = aln->at(ptn)[nodeid];
-// double *partial_lh = node_partial_lh + ptn*nstates;
- double *inv_evec = models->at(ptn)->getInverseEigenvectors();
-
- if (state < nstates) {
- for (i = 0; i < nstates; i++)
- partial_lh[i] = inv_evec[i*nstates+state];
- } else if (state == aln->STATE_UNKNOWN) {
- // special treatment for unknown char
- for (i = 0; i < nstates; i++) {
- double lh_unknown = 0.0;
- double *this_inv_evec = inv_evec + i*nstates;
- for (x = 0; x < nstates; x++)
- lh_unknown += this_inv_evec[x];
- partial_lh[i] = lh_unknown;
- }
- } else {
- double lh_ambiguous;
- // ambiguous characters
- int ambi_aa[] = {
- 4+8, // B = N or D
- 32+64, // Z = Q or E
- 512+1024 // U = I or L
- };
- switch (aln->seq_type) {
- case SEQ_DNA:
- {
- int cstate = state-nstates+1;
- for (i = 0; i < nstates; i++) {
- lh_ambiguous = 0.0;
- for (x = 0; x < nstates; x++)
- if ((cstate) & (1 << x))
- lh_ambiguous += inv_evec[i*nstates+x];
- partial_lh[i] = lh_ambiguous;
- }
+ for (ptn = 0; ptn < nptn; ptn+=vector_size, partial_lh += nstates*vector_size) {
+// int state[vector_size];
+// for (v = 0; v < vector_size; v++) {
+// if (ptn+v < nptn)
+// state[v] = aln->at(ptn+v)[nodeid];
+// else
+// state[v] = aln->STATE_UNKNOWN;
+// }
+
+ double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates];
+ for (v = 0; v < vector_size; v++) {
+ int state = aln->STATE_UNKNOWN;
+ if (ptn+v < nptn)
+ state = aln->at(ptn+v)[nodeid];
+ // double *partial_lh = node_partial_lh + ptn*nstates;
+// double *inv_evec = models->at(ptn)->getInverseEigenvectors();
+
+ if (state < nstates) {
+ for (i = 0; i < nstates; i++)
+ partial_lh[i*vector_size+v] = inv_evec[(i*nstates+state)*vector_size+v];
+ } else if (state == aln->STATE_UNKNOWN) {
+ // special treatment for unknown char
+ for (i = 0; i < nstates; i++) {
+ double lh_unknown = 0.0;
+// double *this_inv_evec = inv_evec + i*nstates;
+ for (x = 0; x < nstates; x++)
+ lh_unknown += inv_evec[(i*nstates+x)*vector_size+v];
+ partial_lh[i*vector_size+v] = lh_unknown;
}
- break;
- case SEQ_PROTEIN:
- //map[(unsigned char)'B'] = 4+8+19; // N or D
- //map[(unsigned char)'Z'] = 32+64+19; // Q or E
- {
- int cstate = state-nstates;
- for (i = 0; i < nstates; i++) {
- lh_ambiguous = 0.0;
- for (x = 0; x < 11; x++)
- if (ambi_aa[cstate] & (1 << x))
- lh_ambiguous += inv_evec[i*nstates+x];
- partial_lh[i] = lh_ambiguous;
+ } else {
+ double lh_ambiguous;
+ // ambiguous characters
+ int ambi_aa[] = {
+ 4+8, // B = N or D
+ 32+64, // Z = Q or E
+ 512+1024 // U = I or L
+ };
+ switch (aln->seq_type) {
+ case SEQ_DNA:
+ {
+ int cstate = state-nstates+1;
+ for (i = 0; i < nstates; i++) {
+ lh_ambiguous = 0.0;
+ for (x = 0; x < nstates; x++)
+ if ((cstate) & (1 << x))
+ lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
+ partial_lh[i*vector_size+v] = lh_ambiguous;
+ }
+ }
+ break;
+ case SEQ_PROTEIN:
+ //map[(unsigned char)'B'] = 4+8+19; // N or D
+ //map[(unsigned char)'Z'] = 32+64+19; // Q or E
+ {
+ int cstate = state-nstates;
+ for (i = 0; i < nstates; i++) {
+ lh_ambiguous = 0.0;
+ for (x = 0; x < 11; x++)
+ if (ambi_aa[cstate] & (1 << x))
+ lh_ambiguous += inv_evec[(i*nstates+x)*vector_size+v];
+ partial_lh[i*vector_size+v] = lh_ambiguous;
+ }
}
+ break;
+ default:
+ assert(0);
+ break;
}
- break;
- default:
- assert(0);
- break;
}
- }
- // sanity check
-// bool all_zero = true;
-// for (i = 0; i < nstates; i++)
-// if (partial_lh[i] != 0) {
-// all_zero = false;
-// break;
-// }
-// assert(!all_zero && "some tip_partial_lh are all zeros");
-
- }
+ // sanity check
+ // bool all_zero = true;
+ // for (i = 0; i < nstates; i++)
+ // if (partial_lh[i] != 0) {
+ // all_zero = false;
+ // break;
+ // }
+ // assert(!all_zero && "some tip_partial_lh are all zeros");
+
+ } // FOR v
+ } // FOR ptn
+ // NO Need to copy dummy anymore
// dummy values
- for (ptn = nptn; ptn < max_nptn; ptn++, partial_lh += nstates)
- memcpy(partial_lh, partial_lh-nstates, nstates*sizeof(double));
- }
+// for (ptn = nptn; ptn < max_nptn; ptn++, partial_lh += nstates)
+// memcpy(partial_lh, partial_lh-nstates, nstates*sizeof(double));
+ } // FOR nodeid
return;
}
@@ -532,7 +383,7 @@ void PhyloTree::computePtnFreq() {
if (ptn_freq_computed) return;
ptn_freq_computed = true;
size_t nptn = aln->getNPattern();
- size_t maxptn = get_safe_upper_limit(nptn+model_factory->unobserved_ptns.size());
+ size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
int ptn;
for (ptn = 0; ptn < nptn; ptn++)
ptn_freq[ptn] = (*aln)[ptn].frequency;
@@ -542,7 +393,7 @@ void PhyloTree::computePtnFreq() {
void PhyloTree::computePtnInvar() {
size_t nptn = aln->getNPattern(), ptn;
- size_t maxptn = get_safe_upper_limit(nptn+model_factory->unobserved_ptns.size());
+ size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
int nstates = aln->num_states;
double *state_freq = aligned_alloc<double>(nstates);
@@ -557,13 +408,13 @@ void PhyloTree::computePtnInvar() {
ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char];
}
}
- // ascertmain bias correction
- for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
- ptn_invar[nptn+ptn] = p_invar * state_freq[(int)model_factory->unobserved_ptns[ptn]];
-
+// // ascertmain bias correction
+// for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
+// ptn_invar[nptn+ptn] = p_invar * state_freq[(int)model_factory->unobserved_ptns[ptn]];
+//
// dummy values
- for (ptn = nptn+model_factory->unobserved_ptns.size(); ptn < maxptn; ptn++)
- ptn_invar[ptn] = ptn_invar[ptn-1];
+ for (ptn = nptn; ptn < maxptn; ptn++)
+ ptn_invar[ptn] = p_invar;
}
aligned_free(state_freq);
}
@@ -577,7 +428,7 @@ void PhyloTree::computePtnInvar() {
*
******************************************************/
-//template <const int nstates>
+/*
void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
// don't recompute the likelihood
@@ -602,10 +453,19 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
size_t ptn, c;
size_t orig_ntn = aln->size();
size_t ncat = site_rate->getNRate();
- const size_t nstatesqr=nstates*nstates;
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
size_t i, x;
- size_t block = nstates * ncat;
-
+ size_t block = nstates * ncat_mix;
+ size_t tip_block = nstates * model->getNMixtures();
+ size_t scale_size = nptn * ncat_mix;
+
double *evec = model->getEigenvectors();
double *inv_evec = model->getInverseEigenvectors();
assert(inv_evec && evec);
@@ -615,12 +475,15 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
// internal node
PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
+ int num_leaves = 0;
FOR_NEIGHBOR_IT(node, dad, it) {
PhyloNeighbor *nei = (PhyloNeighbor*)*it;
if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
if ((nei->partial_lh_computed & 1) == 0)
computePartialLikelihood(nei, node);
dad_branch->lh_scale_factor += nei->lh_scale_factor;
+ if (nei->node->isLeaf())
+ num_leaves ++;
}
if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
@@ -638,12 +501,17 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
break;
}
}
+ if (!done) {
+ printTree(cout, WT_BR_LEN + WT_NEWLINE);
+ }
assert(done && "partial_lh is not re-oriented");
}
// precompute buffer to save times
double *echildren = new double[block*nstates*(node->degree()-1)];
- double *partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*(node->degree()-1)];
+ double *partial_lh_leaves = NULL;
+ if (num_leaves > 0)
+ partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*num_leaves];
double *echild = echildren;
double *partial_lh_leaf = partial_lh_leaves;
@@ -651,15 +519,20 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
double expchild[nstates];
PhyloNeighbor *child = (PhyloNeighbor*)*it;
// precompute information buffer
- for (c = 0; c < ncat; c++) {
- double len_child = site_rate->getRate(c) * child->length;
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ double len_child = site_rate->getRate(c%ncat) * child->length;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double *evec_ptr = evec + mix_addr[c];
for (i = 0; i < nstates; i++) {
- expchild[i] = exp(eval[i]*len_child);
+ expchild[i] = exp(eval_ptr[i]*len_child);
}
- for (x = 0; x < nstates; x++)
+ for (x = 0; x < nstates; x++) {
for (i = 0; i < nstates; i++) {
- echild[c*nstatesqr+x*nstates+i] = evec[x*nstates+i] * expchild[i];
+ echild_ptr[i] = evec_ptr[x*nstates+i] * expchild[i];
}
+ echild_ptr += nstates;
+ }
}
// pre compute information for tip
@@ -667,16 +540,23 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
vector<int>::iterator it;
for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
int state = (*it);
- for (x = 0; x < block; x++) {
- double vchild = 0.0;
- for (i = 0; i < nstates; i++) {
- vchild += echild[x*nstates+i] * tip_partial_lh[state*nstates+i];
+ double *this_partial_lh_leaf = partial_lh_leaf + state*block;
+ double *echild_ptr = echild;
+ for (c = 0; c < ncat_mix; c++) {
+ double *this_tip_partial_lh = tip_partial_lh + state*tip_block + mix_addr_nstates[c];
+ for (x = 0; x < nstates; x++) {
+ double vchild = 0.0;
+ for (i = 0; i < nstates; i++) {
+ vchild += echild_ptr[i] * this_tip_partial_lh[i];
+ }
+ this_partial_lh_leaf[x] = vchild;
+ echild_ptr += nstates;
}
- partial_lh_leaf[state*block+x] = vchild;
+ this_partial_lh_leaf += nstates;
}
}
+ size_t addr = aln->STATE_UNKNOWN * block;
for (x = 0; x < block; x++) {
- size_t addr = aln->STATE_UNKNOWN * block;
partial_lh_leaf[addr+x] = 1.0;
}
partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
@@ -685,9 +565,6 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
}
- double sum_scale = 0.0;
-
-
double *eleft = echildren, *eright = echildren + block*nstates;
if (!left->node->isLeaf() && right->node->isLeaf()) {
@@ -701,23 +578,25 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
if (node->degree() > 3) {
- /*--------------------- multifurcating node ------------------*/
+ //--------------------- multifurcating node ------------------//
// now for-loop computing partial_lh over all site-patterns
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
+#pragma omp parallel for private(ptn, c, x, i) schedule(static)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double partial_lh_all[block];
for (i = 0; i < block; i++)
partial_lh_all[i] = 1.0;
- dad_branch->scale_num[ptn] = 0;
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ memset(scale_dad, 0, sizeof(UBYTE)*ncat_mix);
double *partial_lh_leaf = partial_lh_leaves;
double *echild = echildren;
FOR_NEIGHBOR_IT(node, dad, it) {
PhyloNeighbor *child = (PhyloNeighbor*)*it;
+ UBYTE *scale_child = child->scale_num + ptn*ncat_mix;
if (child->node->isLeaf()) {
// external node
int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
@@ -731,10 +610,10 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
// internal node
double *partial_lh = partial_lh_all;
double *partial_lh_child = child->partial_lh + ptn*block;
- dad_branch->scale_num[ptn] += child->scale_num[ptn];
double *echild_ptr = echild;
- for (c = 0; c < ncat; c++) {
+ for (c = 0; c < ncat_mix; c++) {
+ scale_dad[c] += scale_child[c];
// compute real partial likelihood vector
for (x = 0; x < nstates; x++) {
double vchild = 0.0;
@@ -754,11 +633,11 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
// compute dot-product with inv_eigenvector
- double lh_max = 0.0;
double *partial_lh_tmp = partial_lh_all;
double *partial_lh = dad_branch->partial_lh + ptn*block;
- for (c = 0; c < ncat; c++) {
- double *inv_evec_ptr = inv_evec;
+ for (c = 0; c < ncat_mix; c++) {
+ double lh_max = 0.0;
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
for (i = 0; i < nstates; i++) {
double res = 0.0;
for (x = 0; x < nstates; x++) {
@@ -768,102 +647,85 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
partial_lh[i] = res;
lh_max = max(lh_max, fabs(res));
}
- partial_lh += nstates;
- partial_lh_tmp += nstates;
- }
- // check if one should scale partial likelihoods
- if (lh_max < SCALING_THRESHOLD) {
- partial_lh = dad_branch->partial_lh + ptn*block;
- if (lh_max == 0.0) {
- // for very shitty data
- for (c = 0; c < ncat; c++)
- memcpy(&partial_lh[c*nstates], &tip_partial_lh[aln->STATE_UNKNOWN*nstates], nstates*sizeof(double));
- sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 4;
- int nsite = aln->getNSite();
- for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
- if (aln->getPatternID(i) == ptn) {
- outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
- x++;
- }
- } else if (ptn_invar[ptn] == 0.0) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- //partial_lh[i] /= lh_max;
+ // check if one should scale partial likelihoods
+ if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
+ //assert(lh_max != 0.0 && "Numerical underflow for multifurcation node");
+ if (ptn_invar[ptn] == 0.0) {
+ // now do the likelihood scaling
+ for (i = 0; i < nstates; i++)
+ partial_lh[i] *= SCALING_THRESHOLD_INVER;
+ scale_dad[c] += 1;
}
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
}
+ partial_lh += nstates;
+ partial_lh_tmp += nstates;
}
} // for ptn
- dad_branch->lh_scale_factor += sum_scale;
+// dad_branch->lh_scale_factor += sum_scale;
// end multifurcating treatment
} else if (left->node->isLeaf() && right->node->isLeaf()) {
- /*--------------------- TIP-TIP (cherry) case ------------------*/
+ //--------------------- TIP-TIP (cherry) case ------------------//
double *partial_lh_left = partial_lh_leaves;
double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
// scale number must be ZERO
- memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
+ memset(dad_branch->scale_num, 0, scale_size * sizeof(UBYTE));
#ifdef _OPENMP
#pragma omp parallel for private(ptn, c, x, i) schedule(static)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double partial_lh_tmp[nstates];
double *partial_lh = dad_branch->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- int state_right = (ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- for (c = 0; c < ncat; c++) {
+ double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
+ double *vright = partial_lh_right + block*((ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
+ for (c = 0; c < ncat_mix; c++) {
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
// compute real partial likelihood vector
- double *left = partial_lh_left + (state_left*block+c*nstates);
- double *right = partial_lh_right + (state_right*block+c*nstates);
for (x = 0; x < nstates; x++) {
- partial_lh_tmp[x] = left[x] * right[x];
+ partial_lh_tmp[x] = vleft[x] * vright[x];
}
// compute dot-product with inv_eigenvector
- double *inv_evec_ptr = inv_evec;
for (i = 0; i < nstates; i++) {
double res = 0.0;
for (x = 0; x < nstates; x++) {
res += partial_lh_tmp[x]*inv_evec_ptr[x];
}
inv_evec_ptr += nstates;
- partial_lh[c*nstates+i] = res;
+ partial_lh[i] = res;
}
+ vleft += nstates;
+ vright += nstates;
+ partial_lh += nstates;
}
}
} else if (left->node->isLeaf() && !right->node->isLeaf()) {
- /*--------------------- TIP-INTERNAL NODE case ------------------*/
+ //--------------------- TIP-INTERNAL NODE case ------------------//
// only take scale_num from the right subtree
- memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
+ memcpy(dad_branch->scale_num, right->scale_num, scale_size * sizeof(UBYTE));
double *partial_lh_left = partial_lh_leaves;
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
+#pragma omp parallel for private(ptn, c, x, i) schedule(static)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double partial_lh_tmp[nstates];
double *partial_lh = dad_branch->partial_lh + ptn*block;
double *partial_lh_right = right->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- double *vleft = partial_lh_left + state_left*block;
- double lh_max = 0.0;
-
+ double *vleft = partial_lh_left + block*((ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn]);
+
double *eright_ptr = eright;
- for (c = 0; c < ncat; c++) {
+ for (c = 0; c < ncat_mix; c++) {
+ double lh_max = 0.0;
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
// compute real partial likelihood vector
for (x = 0; x < nstates; x++) {
double vright = 0.0;
@@ -875,73 +737,59 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
eright_ptr += nstates;
partial_lh_tmp[x] = vleft[x] * (vright);
}
- vleft += nstates;
- partial_lh_right += nstates;
// compute dot-product with inv_eigenvector
- double *inv_evec_ptr = inv_evec;
for (i = 0; i < nstates; i++) {
double res = 0.0;
for (x = 0; x < nstates; x++) {
res += partial_lh_tmp[x]*inv_evec_ptr[x];
}
inv_evec_ptr += nstates;
- partial_lh[c*nstates+i] = res;
+ partial_lh[i] = res;
lh_max = max(fabs(res), lh_max);
}
+ // check if one should scale partial likelihoods
+ if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
+ //assert(lh_max != 0.0 && "Numerical underflow for tip-inner node");
+ if (ptn_invar[ptn] == 0.0) {
+ // now do the likelihood scaling
+ for (i = 0; i < nstates; i++)
+ partial_lh[i] *= SCALING_THRESHOLD_INVER;
+ dad_branch->scale_num[ptn*ncat_mix+c] += 1;
+ }
+ }
+ vleft += nstates;
+ partial_lh_right += nstates;
+ partial_lh += nstates;
}
- // check if one should scale partial likelihoods
- if (lh_max < SCALING_THRESHOLD) {
- if (lh_max == 0.0) {
- // for very shitty data
- for (c = 0; c < ncat; c++)
- memcpy(&partial_lh[c*nstates], &tip_partial_lh[aln->STATE_UNKNOWN*nstates], nstates*sizeof(double));
- sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 4;
- int nsite = aln->getNSite();
- for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
- if (aln->getPatternID(i) == ptn) {
- outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
- x++;
- }
- } else if (ptn_invar[ptn] == 0.0) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- //partial_lh[i] /= lh_max;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
- }
-
}
- dad_branch->lh_scale_factor += sum_scale;
+// dad_branch->lh_scale_factor += sum_scale;
// delete [] partial_lh_left;
} else {
- /*--------------------- INTERNAL-INTERNAL NODE case ------------------*/
+ //--------------------- INTERNAL-INTERNAL NODE case ------------------//
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
+#pragma omp parallel for private(ptn, c, x, i) schedule(static)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double partial_lh_tmp[nstates];
double *partial_lh = dad_branch->partial_lh + ptn*block;
double *partial_lh_left = left->partial_lh + ptn*block;
double *partial_lh_right = right->partial_lh + ptn*block;
- double lh_max = 0.0;
- dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ UBYTE *scale_left = left->scale_num + ptn*ncat_mix;
+ UBYTE *scale_right = right->scale_num + ptn*ncat_mix;
double *eleft_ptr = eleft;
double *eright_ptr = eright;
- for (c = 0; c < ncat; c++) {
+ for (c = 0; c < ncat_mix; c++) {
+ scale_dad[c] = scale_left[c] + scale_right[c];
+ double lh_max = 0.0;
+ double *inv_evec_ptr = inv_evec + mix_addr[c];
// compute real partial likelihood vector
for (x = 0; x < nstates; x++) {
double vleft = 0.0, vright = 0.0;
@@ -955,61 +803,43 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
partial_lh_tmp[x] = vleft*vright;
// assert(partial_lh_tmp[x] != 0.0);
}
- partial_lh_left += nstates;
- partial_lh_right += nstates;
// compute dot-product with inv_eigenvector
- double *inv_evec_ptr = inv_evec;
for (i = 0; i < nstates; i++) {
double res = 0.0;
for (x = 0; x < nstates; x++) {
res += partial_lh_tmp[x]*inv_evec_ptr[x];
}
inv_evec_ptr += nstates;
- partial_lh[c*nstates+i] = res;
+ partial_lh[i] = res;
lh_max = max(lh_max, fabs(res));
}
+ // check if one should scale partial likelihoods
+ if (lh_max < SCALING_THRESHOLD && lh_max != 0.0) {
+ //assert(lh_max != 0.0 && "Numerical underflow for inner-inner node");
+ if (ptn_invar[ptn] == 0.0) {
+ // BQM 2016-05-03: only scale for non-constant sites
+ // now do the likelihood scaling
+ for (i = 0; i < nstates; i++)
+ partial_lh[i] *= SCALING_THRESHOLD_INVER;
+ scale_dad[c] += 1;
+ }
+ }
+ partial_lh_left += nstates;
+ partial_lh_right += nstates;
+ partial_lh += nstates;
}
- // check if one should scale partial likelihoods
- if (lh_max < SCALING_THRESHOLD) {
- if (lh_max == 0.0) {
- // for very shitty data
- for (c = 0; c < ncat; c++)
- memcpy(&partial_lh[c*nstates], &tip_partial_lh[aln->STATE_UNKNOWN*nstates], nstates*sizeof(double));
- sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 4;
- int nsite = aln->getNSite();
- for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++)
- if (aln->getPatternID(i) == ptn) {
- outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
- x++;
- }
- } else if (ptn_invar[ptn] == 0.0) {
- // BQM 2016-05-03: only scale for non-constant sites
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- //partial_lh[i] /= lh_max;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- //sum_scale += log(lh_max) * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
- }
-
}
- dad_branch->lh_scale_factor += sum_scale;
+// dad_branch->lh_scale_factor += sum_scale;
}
- delete [] partial_lh_leaves;
+ if (partial_lh_leaves)
+ delete [] partial_lh_leaves;
delete [] echildren;
}
-//template <const int nstates>
void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
PhyloNode *node = (PhyloNode*) dad_branch->node;
PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
@@ -1030,12 +860,23 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
size_t nstates = aln->num_states;
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
- size_t block = ncat * nstates;
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i;
size_t orig_nptn = aln->size();
size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ }
+
double *eval = model->getEigenvalues();
assert(eval);
@@ -1050,11 +891,24 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+ UBYTE *scale_dad = dad_branch->scale_num+ptn*ncat_mix;
double *theta = theta_all + ptn*block;
- double *lh_tip = tip_partial_lh + ((int)((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]))*nstates;
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- theta[i] = lh_tip[i] * partial_lh_dad[i];
+ double *this_tip_partial_lh = tip_partial_lh + tip_block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
+ UBYTE min_scale = scale_dad[0];
+ for (c = 1; c < ncat_mix; c++)
+ min_scale = min(min_scale, scale_dad[c]);
+ for (c = 0; c < ncat_mix; c++) {
+ double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
+ if (scale_dad[c] == min_scale) {
+ for (i = 0; i < nstates; i++) {
+ theta[i] = lh_tip[i] * partial_lh_dad[i];
+ }
+ } else if (scale_dad[c] == min_scale+1) {
+ for (i = 0; i < nstates; i++) {
+ theta[i] = lh_tip[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
+ }
+ } else {
+ memset(theta, 0, sizeof(double)*nstates);
}
partial_lh_dad += nstates;
theta += nstates;
@@ -1067,15 +921,38 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
// size_t all_entries = nptn*block;
#ifdef _OPENMP
-#pragma omp parallel for private(ptn, i) schedule(static)
+#pragma omp parallel for private(ptn, i, c) schedule(static)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double *theta = theta_all + ptn*block;
double *partial_lh_node = node_branch->partial_lh + ptn*block;
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- for (i = 0; i < block; i++) {
- theta[i] = partial_lh_node[i] * partial_lh_dad[i];
- }
+
+ size_t ptn_ncat = ptn*ncat_mix;
+ UBYTE *scale_dad = dad_branch->scale_num + ptn_ncat;
+ UBYTE *scale_node = node_branch->scale_num + ptn_ncat;
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale = sum_scale[0] = scale_dad[0] + scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] == min_scale) {
+ for (i = 0; i < nstates; i++) {
+ theta[i] = partial_lh_node[i] * partial_lh_dad[i];
+ }
+ } else if (sum_scale[c] == min_scale+1) {
+ for (i = 0; i < nstates; i++) {
+ theta[i] = partial_lh_node[i] * partial_lh_dad[i] * SCALING_THRESHOLD;
+ }
+ } else {
+ memset(theta, 0, sizeof(double)*nstates);
+ }
+ theta += nstates;
+ partial_lh_dad += nstates;
+ partial_lh_node += nstates;
+ }
}
}
theta_computed = true;
@@ -1084,15 +961,19 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
double *val0 = new double[block];
double *val1 = new double[block];
double *val2 = new double[block];
- for (c = 0; c < ncat; c++) {
- double prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t m = c/denom;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ size_t mycat = c%ncat;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ size_t addr = c*nstates;
for (i = 0; i < nstates; i++) {
- double cof = eval[i]*site_rate->getRate(c);
+ double cof = eval_ptr[i]*site_rate->getRate(mycat);
double val = exp(cof*dad_branch->length) * prop;
double val1_ = cof*val;
- val0[c*nstates+i] = val;
- val1[c*nstates+i] = val1_;
- val2[c*nstates+i] = cof*val1_;
+ val0[addr+i] = val;
+ val1[addr+i] = val1_;
+ val2[addr+i] = cof*val1_;
}
}
@@ -1132,6 +1013,9 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
}
df = my_df;
ddf = my_ddf;
+
+ assert(!isnan(df) && !isinf(df) && "Numerical underflow for lh-derivative");
+
if (isnan(df) || isinf(df)) {
df = 0.0;
ddf = 0.0;
@@ -1177,25 +1061,37 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
size_t nstates = aln->num_states;
size_t ncat = site_rate->getNRate();
+ size_t ncat_mix = (model_factory->fused_mix_rate) ? ncat : ncat*model->getNMixtures();
- size_t block = ncat * nstates;
+ size_t block = ncat_mix * nstates;
+ size_t tip_block = nstates * model->getNMixtures();
size_t ptn; // for big data size > 4GB memory required
size_t c, i;
size_t orig_nptn = aln->size();
size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
+
+ size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix];
+ size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat;
+
double *eval = model->getEigenvalues();
assert(eval);
double *val = new double[block];
- for (c = 0; c < ncat; c++) {
- double len = site_rate->getRate(c)*dad_branch->length;
- double prop = site_rate->getProp(c);
+ for (c = 0; c < ncat_mix; c++) {
+ size_t mycat = c%ncat;
+ size_t m = c/denom;
+ mix_addr_nstates[c] = m*nstates;
+ mix_addr[c] = mix_addr_nstates[c]*nstates;
+ double *eval_ptr = eval + mix_addr_nstates[c];
+ double len = site_rate->getRate(mycat)*dad_branch->length;
+ double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m);
+ double *this_val = val + c*nstates;
for (i = 0; i < nstates; i++)
- val[c*nstates+i] = exp(eval[i]*len) * prop;
+ this_val[i] = exp(eval_ptr[i]*len) * prop;
}
double prob_const = 0.0;
- memset(_pattern_lh_cat, 0, nptn*ncat*sizeof(double));
+ memset(_pattern_lh_cat, 0, sizeof(double)*nptn*ncat_mix);
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
@@ -1205,9 +1101,10 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
// precompute information from one tip
for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
double *lh_node = partial_lh_node +(*it)*block;
- double *lh_tip = tip_partial_lh + (*it)*nstates;
double *val_tmp = val;
- for (c = 0; c < ncat; c++) {
+ double *this_tip_partial_lh = tip_partial_lh + (*it)*tip_block;
+ for (c = 0; c < ncat_mix; c++) {
+ double *lh_tip = this_tip_partial_lh + mix_addr_nstates[c];
for (i = 0; i < nstates; i++) {
lh_node[i] = val_tmp[i] * lh_tip[i];
}
@@ -1222,27 +1119,37 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*ncat;
+ double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
- double *lh_node = partial_lh_node + state_dad*block;
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += lh_node[i] * partial_lh_dad[i];
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ double *lh_node = partial_lh_node + block*((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]);
+ // determine the min scaling
+ UBYTE min_scale = scale_dad[0];
+ for (c = 1; c < ncat_mix; c++)
+ min_scale = min(min_scale, scale_dad[c]);
+
+ for (c = 0; c < ncat_mix; c++) {
+ if (scale_dad[c] <= min_scale+1) {
+ // only compute for least scale category
+ for (i = 0; i < nstates; i++) {
+ *lh_cat += (lh_node[i] * partial_lh_dad[i]);
+ }
+ if (scale_dad[c] != min_scale)
+ *lh_cat *= SCALING_THRESHOLD;
+ lh_ptn += *lh_cat;
}
lh_node += nstates;
partial_lh_dad += nstates;
- lh_ptn += *lh_cat;
lh_cat++;
}
// assert(lh_ptn > -1e-10);
if (ptn < orig_nptn) {
- lh_ptn = log(fabs(lh_ptn));
+ lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
_pattern_lh[ptn] = lh_ptn;
tree_lh += lh_ptn * ptn_freq[ptn];
} else {
// bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] >= 1)
+ if (min_scale >= 1)
lh_ptn *= SCALING_THRESHOLD;
// _pattern_lh[ptn] = lh_ptn;
prob_const += lh_ptn;
@@ -1256,15 +1163,28 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
#endif
for (ptn = 0; ptn < nptn; ptn++) {
double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*ncat;
+ double *lh_cat = _pattern_lh_cat + ptn*ncat_mix;
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
double *partial_lh_node = node_branch->partial_lh + ptn*block;
double *val_tmp = val;
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
+ UBYTE *scale_dad = dad_branch->scale_num + ptn*ncat_mix;
+ UBYTE *scale_node = node_branch->scale_num + ptn*ncat_mix;
+ UBYTE sum_scale[ncat_mix];
+ UBYTE min_scale = sum_scale[0] = scale_dad[0]+scale_node[0];
+ for (c = 1; c < ncat_mix; c++) {
+ sum_scale[c] = scale_dad[c] + scale_node[c];
+ min_scale = min(min_scale, sum_scale[c]);
+ }
+ for (c = 0; c < ncat_mix; c++) {
+ if (sum_scale[c] <= min_scale+1) {
+ // only compute for least scale category
+ for (i = 0; i < nstates; i++) {
+ *lh_cat += (val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i]);
+ }
+ if (sum_scale[c] != min_scale)
+ *lh_cat *= SCALING_THRESHOLD;
+ lh_ptn += *lh_cat;
}
- lh_ptn += *lh_cat;
partial_lh_node += nstates;
partial_lh_dad += nstates;
val_tmp += nstates;
@@ -1273,12 +1193,12 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
// assert(lh_ptn > 0.0);
if (ptn < orig_nptn) {
- lh_ptn = log(fabs(lh_ptn));
+ lh_ptn = log(fabs(lh_ptn)) + LOG_SCALING_THRESHOLD*min_scale;
_pattern_lh[ptn] = lh_ptn;
tree_lh += lh_ptn * ptn_freq[ptn];
} else {
// bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
+ if (min_scale >= 1)
lh_ptn *= SCALING_THRESHOLD;
// _pattern_lh[ptn] = lh_ptn;
prob_const += lh_ptn;
@@ -1286,30 +1206,7 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
}
}
- if (isnan(tree_lh) || isinf(tree_lh)) {
- cout << "WARNING: Numerical underflow caused by alignment sites";
- i = aln->getNSite();
- int j;
- for (j = 0, c = 0; j < i; j++) {
- ptn = aln->getPatternID(j);
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- cout << " " << j+1;
- c++;
- if (c >= 10) {
- cout << " ...";
- break;
- }
- }
- }
- cout << endl;
- tree_lh = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
- for (ptn = 0; ptn < orig_nptn; ptn++) {
- if (isnan(_pattern_lh[ptn]) || isinf(_pattern_lh[ptn])) {
- _pattern_lh[ptn] = LOG_SCALING_THRESHOLD*4; // log(2^(-1024))
- }
- tree_lh += _pattern_lh[ptn] * ptn_freq[ptn];
- }
- }
+ assert(!isnan(tree_lh) && !isinf(tree_lh) && "Numerical underflow for lh-branch");
if (orig_nptn < nptn) {
// ascertainment bias correction
@@ -1337,1367 +1234,377 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
delete [] val;
return tree_lh;
}
+*/
-
-/************************************************************************************************
+/*******************************************************
*
- * non-vectorized fused mixture and rate likelihood functions
+ * ancestral sequence reconstruction
*
- *************************************************************************************************/
-
-//template <const int nstates>
-void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- // don't recompute the likelihood
- assert(dad);
- if (dad_branch->partial_lh_computed & 1)
- return;
- dad_branch->partial_lh_computed |= 1;
+ ******************************************************/
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- PhyloNode *node = (PhyloNode*)(dad_branch->node);
- if (!tip_partial_lh_computed)
- computeTipPartialLikelihood();
+void PhyloTree::computeMarginalAncestralProbability(PhyloNeighbor *dad_branch, PhyloNode *dad, double *ptn_ancestral_prob) {
+ PhyloNode *node = (PhyloNode*) dad_branch->node;
+ PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
+ if (!central_partial_lh)
+ initializeAllPartialLh();
+ assert(!node->isLeaf());
- if (node->isLeaf()) {
- dad_branch->lh_scale_factor = 0.0;
- return;
- }
+ // TODO: not working yet
+// if ((dad_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(dad_branch, dad);
+// if ((node_branch->partial_lh_computed & 1) == 0)
+// computePartialLikelihood(node_branch, node);
size_t nstates = aln->num_states;
- size_t ptn, c;
- size_t orig_ntn = aln->size();
- size_t ncat = site_rate->getNRate();
- assert(ncat == model->getNMixtures());
const size_t nstatesqr=nstates*nstates;
- size_t i, x;
- size_t block = nstates * ncat;
+ size_t ncat = site_rate->getNRate();
+ size_t statecat = nstates * ncat;
+ size_t nmixture = model->getNMixtures();
- double *evec = model->getEigenvectors();
- double *inv_evec = model->getInverseEigenvectors();
- assert(inv_evec && evec);
- double *eval = model->getEigenvalues();
+ size_t block = ncat * nstates * nmixture;
+ size_t ptn; // for big data size > 4GB memory required
+ size_t c, i, m, x;
+ size_t nptn = aln->size();
+ double *eval = model->getEigenvalues();
+ double *evec = model->getEigenvectors();
+ double *inv_evec = model->getInverseEigenvectors();
+ assert(eval);
- dad_branch->lh_scale_factor = 0.0;
+ double echild[block*nstates];
- // internal node
-// assert(node->degree() == 3); // it works only for strictly bifurcating tree
- PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
- FOR_NEIGHBOR_IT(node, dad, it) {
- PhyloNeighbor *nei = (PhyloNeighbor*)*it;
- if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
- if ((nei->partial_lh_computed & 1) == 0)
- computePartialLikelihood(nei, node);
- dad_branch->lh_scale_factor += nei->lh_scale_factor;
- }
-
- if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
- // re-orient partial_lh
- bool done = false;
- FOR_NEIGHBOR_IT(node, dad, it2) {
- PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
- if (backnei->partial_lh) {
- dad_branch->partial_lh = backnei->partial_lh;
- dad_branch->scale_num = backnei->scale_num;
- backnei->partial_lh = NULL;
- backnei->scale_num = NULL;
- backnei->partial_lh_computed &= ~1; // clear bit
- done = true;
- break;
+ for (c = 0; c < ncat; c++) {
+ double expchild[nstates];
+ double len_child = site_rate->getRate(c) * dad_branch->length;
+ for (m = 0; m < nmixture; m++) {
+ for (i = 0; i < nstates; i++) {
+ expchild[i] = exp(eval[m*nstates+i]*len_child);
}
+ for (x = 0; x < nstates; x++)
+ for (i = 0; i < nstates; i++) {
+ echild[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expchild[i];
+ }
}
- assert(done && "partial_lh is not re-oriented");
- }
-
- // precompute buffer to save times
- double *echildren = new double[block*nstates*(node->degree()-1)];
- double *partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*(node->degree()-1)];
- double *echild = echildren;
- double *partial_lh_leaf = partial_lh_leaves;
-
- FOR_NEIGHBOR_IT(node, dad, it) {
- double expchild[nstates];
- PhyloNeighbor *child = (PhyloNeighbor*)*it;
- // precompute information buffer
- for (c = 0; c < ncat; c++) {
- double len_child = site_rate->getRate(c) * child->length;
- for (i = 0; i < nstates; i++) {
- expchild[i] = exp(eval[c*nstates+i]*len_child);
- }
- for (x = 0; x < nstates; x++)
- for (i = 0; i < nstates; i++) {
- echild[c*nstatesqr+x*nstates+i] = evec[c*nstatesqr+x*nstates+i] * expchild[i];
- }
- }
- // pre compute information for tip
- if (child->node->isLeaf()) {
- vector<int>::iterator it;
- for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
- int state = (*it);
- for (c = 0; c < ncat; c++)
- for (x = 0; x < nstates; x++) {
- double vchild = 0.0;
- for (i = 0; i < nstates; i++) {
- vchild += echild[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
- }
- partial_lh_leaf[state*block+c*nstates+x] = vchild;
- }
- }
- size_t addr = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_leaf[addr+x] = 1.0;
- }
- partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
- }
- echild += block*nstates;
- }
-
- double *eleft = echildren, *eright = echildren + block*nstates;
-
- if (!left->node->isLeaf() && right->node->isLeaf()) {
- PhyloNeighbor *tmp = left;
- left = right;
- right = tmp;
- double *etmp = eleft;
- eleft = eright;
- eright = etmp;
- }
-
- if (node->degree() > 3) {
- /*--------------------- multifurcating node ------------------*/
- double sum_scale = 0.0;
-
- // now for-loop computing partial_lh over all site-patterns
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_all[block];
- for (i = 0; i < block; i++)
- partial_lh_all[i] = 1.0;
- dad_branch->scale_num[ptn] = 0;
-
- double *partial_lh_leaf = partial_lh_leaves;
- double *echild = echildren;
-
- FOR_NEIGHBOR_IT(node, dad, it) {
- PhyloNeighbor *child = (PhyloNeighbor*)*it;
- if (child->node->isLeaf()) {
- // external node
- int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- double *child_lh = partial_lh_leaf + state_child*block;
- for (c = 0; c < block; c++) {
- partial_lh_all[c] *= child_lh[c];
- }
- partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
- } else {
- // internal node
- double *partial_lh = partial_lh_all;
- double *partial_lh_child = child->partial_lh + ptn*block;
- dad_branch->scale_num[ptn] += child->scale_num[ptn];
-
- double *echild_ptr = echild;
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vchild = 0.0;
- for (i = 0; i < nstates; i++) {
- vchild += echild_ptr[i] * partial_lh_child[i];
- }
- echild_ptr += nstates;
- partial_lh[x] *= vchild;
- }
- partial_lh += nstates;
- partial_lh_child += nstates;
- }
- } // if
- echild += block*nstates;
- } // FOR_NEIGHBOR
-
-
- // compute dot-product with inv_eigenvector
- double lh_max = 0.0;
- double *partial_lh_tmp = partial_lh_all;
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *inv_evec_ptr = inv_evec;
- for (c = 0; c < ncat; c++) {
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec_ptr[x];
- }
- inv_evec_ptr += nstates;
- partial_lh[i] = res;
- lh_max = max(fabs(res), lh_max);
- }
- partial_lh += nstates;
- partial_lh_tmp += nstates;
- }
-
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- partial_lh = dad_branch->partial_lh + ptn*block;
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
- } // for ptn
- dad_branch->lh_scale_factor += sum_scale;
-
- // end multifurcating treatment
-
- } else if (left->node->isLeaf() && right->node->isLeaf()) {
- // special treatment for TIP-TIP (cherry) case
-
- // pre compute information for both tips
- double *partial_lh_left = partial_lh_leaves;
- double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
-
-// vector<int>::iterator it;
-// for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-// int state = (*it);
-// for (c = 0; c < ncat; c++)
-// for (x = 0; x < nstates; x++) {
-// double vleft = 0.0;
-// for (i = 0; i < nstates; i++) {
-// vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
-// }
-// partial_lh_left[state*block+c*nstates+x] = vleft;
-// }
-// }
-//
-// for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
-// int state = (*it);
-// for (c = 0; c < ncat; c++)
-// for (x = 0; x < nstates; x++) {
-// double vright = 0.0;
-// for (i = 0; i < nstates; i++) {
-// vright += eright[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
-// }
-// partial_lh_right[state*block+c*nstates+x] = vright;
-// }
-// }
-//
-// for (x = 0; x < block; x++) {
-// size_t addr = aln->STATE_UNKNOWN * block;
-// partial_lh_left[addr+x] = 1.0;
-// partial_lh_right[addr+x] = 1.0;
-// }
-//
-
- // scale number must be ZERO
- memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
-#ifdef _OPENMP
-//#pragma omp parallel for private(ptn, c, x, i, partial_lh_tmp)
-#pragma omp parallel for private(ptn, c, x, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- int state_right = (ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- double *left = partial_lh_left + (state_left*block+c*nstates);
- double *right = partial_lh_right + (state_right*block+c*nstates);
- for (x = 0; x < nstates; x++) {
- partial_lh_tmp[x] = left[x] * right[x];
- }
-
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[c*nstatesqr+i*nstates+x];
- }
- partial_lh[c*nstates+i] = res;
- }
- }
- }
-// delete [] partial_lh_right;
-// delete [] partial_lh_left;
- } else if (left->node->isLeaf() && !right->node->isLeaf()) {
- // special treatment to TIP-INTERNAL NODE case
- // only take scale_num from the right subtree
- memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
-
- // pre compute information for left tip
- double *partial_lh_left = partial_lh_leaves;
-
-// vector<int>::iterator it;
-// for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-// int state = (*it);
-// for (c = 0; c < ncat; c++)
-// for (x = 0; x < nstates; x++) {
-// double vleft = 0.0;
-// for (i = 0; i < nstates; i++) {
-// vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
-// }
-// partial_lh_left[state*block+c*nstates+x] = vleft;
-// }
-// }
-// for (x = 0; x < block; x++) {
-// size_t addr = aln->STATE_UNKNOWN * block;
-// partial_lh_left[addr+x] = 1.0;
-// }
-//
-
- double sum_scale = 0.0;
-#ifdef _OPENMP
-//#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, partial_lh_tmp)
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *partial_lh_right = right->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- double lh_max = 0.0;
-
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vleft = 0.0, vright = 0.0;
- size_t addr = c*nstatesqr+x*nstates;
- vleft = partial_lh_left[state_left*block+c*nstates+x];
- for (i = 0; i < nstates; i++) {
- vright += eright[addr+i] * partial_lh_right[c*nstates+i];
- }
- partial_lh_tmp[x] = vleft * (vright);
- }
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[c*nstatesqr+i*nstates+x];
- }
- partial_lh[c*nstates+i] = res;
- lh_max = max(fabs(res), lh_max);
- }
- }
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
-
- }
- dad_branch->lh_scale_factor += sum_scale;
-// delete [] partial_lh_left;
-
- } else {
- // both left and right are internal node
-
- double sum_scale = 0.0;
-#ifdef _OPENMP
-//#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, partial_lh_tmp)
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *partial_lh_left = left->partial_lh + ptn*block;
- double *partial_lh_right = right->partial_lh + ptn*block;
- double lh_max = 0.0;
- dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
-
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vleft = 0.0, vright = 0.0;
- size_t addr = c*nstatesqr+x*nstates;
- for (i = 0; i < nstates; i++) {
- vleft += eleft[addr+i] * partial_lh_left[c*nstates+i];
- vright += eright[addr+i] * partial_lh_right[c*nstates+i];
- }
- partial_lh_tmp[x] = vleft*vright;
- }
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[c*nstatesqr+i*nstates+x];
- }
- partial_lh[c*nstates+i] = res;
- lh_max = max(lh_max, fabs(res));
- }
- }
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
- }
- dad_branch->lh_scale_factor += sum_scale;
-
- }
-
- delete [] partial_lh_leaves;
- delete [] echildren;
-}
-
-//template <const int nstates>
-void PhyloTree::computeMixrateLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
- PhyloNode *node = (PhyloNode*) dad_branch->node;
- PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
- if (!central_partial_lh)
- initializeAllPartialLh();
- if (node->isLeaf()) {
- PhyloNode *tmp_node = dad;
- dad = node;
- node = tmp_node;
- PhyloNeighbor *tmp_nei = dad_branch;
- dad_branch = node_branch;
- node_branch = tmp_nei;
- }
- if ((dad_branch->partial_lh_computed & 1) == 0)
- computeMixratePartialLikelihoodEigen(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 1) == 0)
- computeMixratePartialLikelihoodEigen(node_branch, node);
- size_t nstates = aln->num_states;
- size_t ncat = site_rate->getNRate();
-
- size_t block = ncat * nstates;
- size_t ptn; // for big data size > 4GB memory required
- size_t c, i;
- size_t orig_nptn = aln->size();
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- double *eval = model->getEigenvalues();
- assert(eval);
-
- assert(theta_all);
- if (!theta_computed) {
- // precompute theta for fast branch length optimization
-
- if (dad->isLeaf()) {
- // special treatment for TIP-INTERNAL NODE case
-#ifdef _OPENMP
-#pragma omp parallel for private(ptn, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *theta = theta_all + ptn*block;
- double *lh_tip = tip_partial_lh + ((int)((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]))*nstates*ncat;
- for (i = 0; i < block; i++) {
- theta[i] = lh_tip[i] * partial_lh_dad[i];
- }
-
- }
- // ascertainment bias correction
- } else {
- // both dad and node are internal nodes
- double *partial_lh_node = node_branch->partial_lh;
- double *partial_lh_dad = dad_branch->partial_lh;
-
- size_t all_entries = nptn*block;
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
- for (i = 0; i < all_entries; i++) {
- theta_all[i] = partial_lh_node[i] * partial_lh_dad[i];
- }
- }
- theta_computed = true;
- }
-
- double *val0 = new double[block];
- double *val1 = new double[block];
- double *val2 = new double[block];
- for (c = 0; c < ncat; c++) {
- double prop = site_rate->getProp(c);
- for (i = 0; i < nstates; i++) {
- double cof = eval[c*nstates+i]*site_rate->getRate(c);
- double val = exp(cof*dad_branch->length) * prop;
- double val1_ = cof*val;
- val0[c*nstates+i] = val;
- val1[c*nstates+i] = val1_;
- val2[c*nstates+i] = cof*val1_;
- }
- }
-
-
- double my_df = 0.0, my_ddf = 0.0, prob_const = 0.0, df_const = 0.0, ddf_const = 0.0;
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: my_df, my_ddf, prob_const, df_const, ddf_const) private(ptn, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn], df_ptn = 0.0, ddf_ptn = 0.0;
- double *theta = theta_all + ptn*block;
- for (i = 0; i < block; i++) {
- lh_ptn += val0[i] * theta[i];
- df_ptn += val1[i] * theta[i];
- ddf_ptn += val2[i] * theta[i];
- }
-
-// assert(lh_ptn > 0.0);
- lh_ptn = fabs(lh_ptn);
-
- if (ptn < orig_nptn) {
- double df_frac = df_ptn / lh_ptn;
- double ddf_frac = ddf_ptn / lh_ptn;
- double freq = ptn_freq[ptn];
- double tmp1 = df_frac * freq;
- double tmp2 = ddf_frac * freq;
- my_df += tmp1;
- my_ddf += tmp2 - tmp1 * df_frac;
- } else {
- // ascertainment bias correction
- prob_const += lh_ptn;
- df_const += df_ptn;
- ddf_const += ddf_ptn;
- }
- }
- df = my_df;
- ddf = my_ddf;
- if (isnan(df) || isinf(df)) {
- df = 0.0;
- ddf = 0.0;
-// outWarning("Numerical instability (some site-likelihood = 0)");
- }
-
- if (orig_nptn < nptn) {
- // ascertainment bias correction
- prob_const = 1.0 - prob_const;
- double df_frac = df_const / prob_const;
- double ddf_frac = ddf_const / prob_const;
- int nsites = aln->getNSite();
- df += nsites * df_frac;
- ddf += nsites *(ddf_frac + df_frac*df_frac);
- }
-
-
- delete [] val2;
- delete [] val1;
- delete [] val0;
-}
-
-//template <const int nstates>
-double PhyloTree::computeMixrateLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- PhyloNode *node = (PhyloNode*) dad_branch->node;
- PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
- if (!central_partial_lh)
- initializeAllPartialLh();
- if (node->isLeaf()) {
- PhyloNode *tmp_node = dad;
- dad = node;
- node = tmp_node;
- PhyloNeighbor *tmp_nei = dad_branch;
- dad_branch = node_branch;
- node_branch = tmp_nei;
- }
- if ((dad_branch->partial_lh_computed & 1) == 0)
-// computeMixratePartialLikelihoodEigen(dad_branch, dad);
- computePartialLikelihood(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 1) == 0)
-// computeMixratePartialLikelihoodEigen(node_branch, node);
- computePartialLikelihood(node_branch, node);
- double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
- size_t nstates = aln->num_states;
- size_t ncat = site_rate->getNRate();
-
- size_t block = ncat * nstates;
- size_t ptn; // for big data size > 4GB memory required
- size_t c, i;
- size_t orig_nptn = aln->size();
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- double *eval = model->getEigenvalues();
- assert(eval);
-
- double *val = new double[block];
- for (c = 0; c < ncat; c++) {
- double len = site_rate->getRate(c)*dad_branch->length;
- double prop = site_rate->getProp(c);
- for (i = 0; i < nstates; i++)
- val[c*nstates+i] = exp(eval[c*nstates+i]*len) * prop;
- }
-
- double prob_const = 0.0;
- memset(_pattern_lh_cat, 0, nptn*ncat*sizeof(double));
-
- if (dad->isLeaf()) {
- // special treatment for TIP-INTERNAL NODE case
- double *partial_lh_node = new double[(aln->STATE_UNKNOWN+1)*block];
- IntVector states_dad = aln->seq_states[dad->id];
- states_dad.push_back(aln->STATE_UNKNOWN);
- // precompute information from one tip
- for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
- double *lh_node = partial_lh_node +(*it)*block;
- double *lh_tip = tip_partial_lh + (*it)*block;
- for (i = 0; i < block; i++)
- lh_node[i] = val[i]*lh_tip[i];
- }
-
- // now do the real computation
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*ncat;
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
- double *lh_node = partial_lh_node + state_dad*block;
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += lh_node[i] * partial_lh_dad[i];
- }
- lh_node += nstates;
- partial_lh_dad += nstates;
- lh_ptn += *lh_cat;
- lh_cat++;
- }
-// assert(lh_ptn > 0.0);
- if (ptn < orig_nptn) {
- lh_ptn = log(fabs(lh_ptn));
- _pattern_lh[ptn] = lh_ptn;
- tree_lh += lh_ptn * ptn_freq[ptn];
- } else {
- // bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] >= 1)
- lh_ptn *= SCALING_THRESHOLD;
- prob_const += lh_ptn;
- }
- }
- delete [] partial_lh_node;
- } else {
- // both dad and node are internal nodes
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*ncat;
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *partial_lh_node = node_branch->partial_lh + ptn*block;
- double *val_tmp = val;
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
- }
- lh_ptn += *lh_cat;
- partial_lh_node += nstates;
- partial_lh_dad += nstates;
- val_tmp += nstates;
- lh_cat++;
- }
-
- assert(lh_ptn > 0.0);
- if (ptn < orig_nptn) {
- lh_ptn = log(lh_ptn);
- _pattern_lh[ptn] = lh_ptn;
- tree_lh += lh_ptn * ptn_freq[ptn];
- } else {
- // bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
- lh_ptn *= SCALING_THRESHOLD;
- prob_const += lh_ptn;
- }
- }
- }
-
-
- if (orig_nptn < nptn) {
- // ascertainment bias correction
- prob_const = log(1.0 - prob_const);
- for (ptn = 0; ptn < orig_nptn; ptn++)
- _pattern_lh[ptn] -= prob_const;
- tree_lh -= aln->getNSite()*prob_const;
- assert(!isnan(tree_lh) && !isinf(tree_lh));
- }
-
- assert(!isnan(tree_lh) && !isinf(tree_lh));
-
- delete [] val;
- return tree_lh;
-}
-
-
-/*******************************************************
- *
- * non-vectorized likelihood functions for mixture models
- *
- ******************************************************/
-
-//template <const int nstates>
-void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- // don't recompute the likelihood
- assert(dad);
- if (dad_branch->partial_lh_computed & 1)
- return;
- dad_branch->partial_lh_computed |= 1;
-
- size_t nstates = aln->num_states;
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- PhyloNode *node = (PhyloNode*)(dad_branch->node);
-
- if (!tip_partial_lh_computed)
- computeTipPartialLikelihood();
-
- if (node->isLeaf()) {
- dad_branch->lh_scale_factor = 0.0;
- return;
- }
-
- size_t ptn, c;
- size_t orig_ntn = aln->size();
- size_t ncat = site_rate->getNRate(), nmixture = model->getNMixtures();
- const size_t nstatesqr=nstates*nstates;
- size_t i, x, m;
- size_t statecat = nstates * ncat;
-// size_t statemix = nstates * nmixture;
- size_t block = nstates * ncat * nmixture;
-
- double *evec = model->getEigenvectors();
- double *inv_evec = model->getInverseEigenvectors();
- assert(inv_evec && evec);
- double *eval = model->getEigenvalues();
-
- dad_branch->lh_scale_factor = 0.0;
-
- // internal node
- PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
- FOR_NEIGHBOR_IT(node, dad, it) {
- PhyloNeighbor *nei = (PhyloNeighbor*)*it;
- if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
- if ((nei->partial_lh_computed & 1) == 0)
- computePartialLikelihood(nei, node);
- dad_branch->lh_scale_factor += nei->lh_scale_factor;
- }
-
- if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
- // re-orient partial_lh
- bool done = false;
- FOR_NEIGHBOR_IT(node, dad, it2) {
- PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
- if (backnei->partial_lh) {
- dad_branch->partial_lh = backnei->partial_lh;
- dad_branch->scale_num = backnei->scale_num;
- backnei->partial_lh = NULL;
- backnei->scale_num = NULL;
- backnei->partial_lh_computed &= ~1; // clear bit
- done = true;
- break;
- }
- }
- assert(done && "partial_lh is not re-oriented");
- }
-
-
- double *echildren = new double[block*nstates*(node->degree()-1)];
- double *partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*(node->degree()-1)];
- double *echild = echildren;
- double *partial_lh_leaf = partial_lh_leaves;
-
- FOR_NEIGHBOR_IT(node, dad, it) {
- // precompute information buffer
- double expchild[nstates];
- PhyloNeighbor *child = (PhyloNeighbor*)*it;
- for (c = 0; c < ncat; c++) {
- double len_child = site_rate->getRate(c) * child->length;
- for (m = 0; m < nmixture; m++) {
- for (i = 0; i < nstates; i++) {
- expchild[i] = exp(eval[m*nstates+i]*len_child);
- }
- for (x = 0; x < nstates; x++)
- for (i = 0; i < nstates; i++) {
- echild[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expchild[i];
- }
- }
- }
- if (child->node->isLeaf()) {
- vector<int>::iterator it;
- for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
- int state = (*it);
- for (m = 0; m < nmixture; m++) {
- double *this_echild = &echild[m*nstatesqr*ncat];
- double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
- double *this_partial_lh_leaf = &partial_lh_leaf[state*block+m*statecat];
- for (x = 0; x < statecat; x++) {
- double vchild = 0.0;
- for (i = 0; i < nstates; i++) {
- vchild += this_echild[x*nstates+i] * this_tip_partial_lh[i];
- }
- this_partial_lh_leaf[x] = vchild;
- }
- }
- }
- size_t addr = aln->STATE_UNKNOWN * block;
- for (x = 0; x < block; x++) {
- partial_lh_leaf[addr+x] = 1.0;
- }
- partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
- }
- echild += block*nstates;
- }
-
- double *eleft = echildren, *eright = echildren + block*nstates;
-
- if (!left->node->isLeaf() && right->node->isLeaf()) {
- PhyloNeighbor *tmp = left;
- left = right;
- right = tmp;
- double *etmp = eleft;
- eleft = eright;
- eright = etmp;
- }
-
- if (node->degree() > 3) {
- /*--------------------- multifurcating node ------------------*/
- // now for-loop computing partial_lh over all site-patterns
- double sum_scale = 0.0;
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m) schedule(static)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_all[block];
- for (i = 0; i < block; i++)
- partial_lh_all[i] = 1.0;
- dad_branch->scale_num[ptn] = 0;
-
- double *partial_lh_leaf = partial_lh_leaves;
- double *echild = echildren;
-
- FOR_NEIGHBOR_IT(node, dad, it) {
- PhyloNeighbor *child = (PhyloNeighbor*)*it;
- if (child->node->isLeaf()) {
- // external node
- int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- double *child_lh = partial_lh_leaf + state_child*block;
- for (c = 0; c < block; c++) {
- // compute real partial likelihood vector
- partial_lh_all[c] *= child_lh[c];
- }
- partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
- } else {
- // internal node
- double *partial_lh = partial_lh_all;
- double *partial_lh_child = child->partial_lh + ptn*block;
- dad_branch->scale_num[ptn] += child->scale_num[ptn];
- double *echild_ptr = echild;
-
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vchild = 0.0;
-// size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
- for (i = 0; i < nstates; i++) {
- vchild += echild_ptr[i] * partial_lh_child[i];
- }
- echild_ptr += nstates;
- partial_lh[x] *= vchild;
- }
- partial_lh += nstates;
- partial_lh_child += nstates;
- }
- }
-
- } // if
- echild += block*nstates;
- } // FOR_NEIGHBOR
-
- // compute dot-product with inv_eigenvector
- double lh_max = 0.0;
- double *partial_lh_tmp = partial_lh_all;
- double *partial_lh = dad_branch->partial_lh+ptn*block;
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- double *inv_evec_ptr = inv_evec + m*nstatesqr;
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec_ptr[x];
- }
- inv_evec_ptr += nstates;
- partial_lh[i] = res;
- lh_max = max(fabs(res), lh_max);
- }
- partial_lh += nstates;
- partial_lh_tmp += nstates;
- }
- }
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- partial_lh = dad_branch->partial_lh + ptn*block;
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
- } // for ptn
- dad_branch->lh_scale_factor += sum_scale;
-
- // end multifurcating treatment
-
- } else if (left->node->isLeaf() && right->node->isLeaf()) {
- // special treatment for TIP-TIP (cherry) case
-
- // pre compute information for both tips
- double *partial_lh_left = partial_lh_leaves;
- double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
-
- // scale number must be ZERO
- memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
-#ifdef _OPENMP
-#pragma omp parallel for private(ptn, c, x, i, m)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- int state_right = (ptn < orig_ntn) ? (aln->at(ptn))[right->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- double *left = partial_lh_left + (state_left*block+m*statecat+c*nstates);
- double *right = partial_lh_right + (state_right*block+m*statecat+c*nstates);
- for (x = 0; x < nstates; x++) {
- partial_lh_tmp[x] = left[x] * right[x];
- }
-
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[m*nstatesqr+i*nstates+x];
- }
- partial_lh[m*statecat+c*nstates+i] = res;
- }
- }
- }
- }
- } else if (left->node->isLeaf() && !right->node->isLeaf()) {
- // special treatment to TIP-INTERNAL NODE case
- // only take scale_num from the right subtree
- memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
-
- // pre compute information for left tip
- double *partial_lh_left = partial_lh_leaves;
-
- double sum_scale = 0.0;
-#ifdef _OPENMP
-//#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m, partial_lh_tmp)
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *partial_lh_right = right->partial_lh + ptn*block;
- int state_left = (ptn < orig_ntn) ? (aln->at(ptn))[left->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
- double lh_max = 0.0;
-
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vleft = 0.0, vright = 0.0;
- size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
- vleft = partial_lh_left[state_left*block+m*statecat+c*nstates+x];
- for (i = 0; i < nstates; i++) {
- vright += eright[addr+i] * partial_lh_right[m*statecat+c*nstates+i];
- }
- partial_lh_tmp[x] = vleft * (vright);
- }
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[m*nstatesqr+i*nstates+x];
- }
- partial_lh[m*statecat+c*nstates+i] = res;
- lh_max = max(fabs(res), lh_max);
- }
- }
- }
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
-
- }
- dad_branch->lh_scale_factor += sum_scale;
-
- } else {
- // both left and right are internal node
-
- double sum_scale = 0.0;
-#ifdef _OPENMP
-//#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m, partial_lh_tmp)
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double partial_lh_tmp[nstates];
- double *partial_lh = dad_branch->partial_lh + ptn*block;
- double *partial_lh_left = left->partial_lh + ptn*block;
- double *partial_lh_right = right->partial_lh + ptn*block;
- double lh_max = 0.0;
- dad_branch->scale_num[ptn] = left->scale_num[ptn] + right->scale_num[ptn];
-
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- // compute real partial likelihood vector
- for (x = 0; x < nstates; x++) {
- double vleft = 0.0, vright = 0.0;
- size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
- for (i = 0; i < nstates; i++) {
- vleft += eleft[addr+i] * partial_lh_left[m*statecat+c*nstates+i];
- vright += eright[addr+i] * partial_lh_right[m*statecat+c*nstates+i];
- }
- partial_lh_tmp[x] = vleft*vright;
- }
- // compute dot-product with inv_eigenvector
- for (i = 0; i < nstates; i++) {
- double res = 0.0;
- for (x = 0; x < nstates; x++) {
- res += partial_lh_tmp[x]*inv_evec[m*nstatesqr+i*nstates+x];
- }
- partial_lh[m*statecat+c*nstates+i] = res;
- lh_max = max(lh_max, fabs(res));
- }
- }
- }
- if (lh_max < SCALING_THRESHOLD) {
- // now do the likelihood scaling
- for (i = 0; i < block; i++) {
- partial_lh[i] *= SCALING_THRESHOLD_INVER;
- }
- // unobserved const pattern will never have underflow
- sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
- dad_branch->scale_num[ptn] += 1;
- }
-
- }
- dad_branch->lh_scale_factor += sum_scale;
-
- }
-
- delete [] partial_lh_leaves;
- delete [] echildren;
-}
-
-//template <const int nstates>
-void PhyloTree::computeMixtureLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
- PhyloNode *node = (PhyloNode*) dad_branch->node;
- PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
- if (!central_partial_lh)
- initializeAllPartialLh();
- if (node->isLeaf()) {
- PhyloNode *tmp_node = dad;
- dad = node;
- node = tmp_node;
- PhyloNeighbor *tmp_nei = dad_branch;
- dad_branch = node_branch;
- node_branch = tmp_nei;
- }
- if ((dad_branch->partial_lh_computed & 1) == 0)
- computeMixturePartialLikelihoodEigen(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 1) == 0)
- computeMixturePartialLikelihoodEigen(node_branch, node);
- size_t nstates = aln->num_states;
- size_t ncat = site_rate->getNRate();
- size_t nmixture = model->getNMixtures();
-
- size_t block = ncat * nstates * nmixture;
- size_t statemix = nstates * nmixture;
- size_t statecat = nstates * ncat;
- size_t ptn; // for big data size > 4GB memory required
- size_t c, i, m;
- size_t orig_nptn = aln->size();
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- double *eval = model->getEigenvalues();
- assert(eval);
-
- assert(theta_all);
- if (!theta_computed) {
- // precompute theta for fast branch length optimization
-
- if (dad->isLeaf()) {
- // special treatment for TIP-INTERNAL NODE case
-#ifdef _OPENMP
-#pragma omp parallel for private(ptn, i, m)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- double *theta = theta_all + ptn*block;
- double *lh_tip = tip_partial_lh +
- ((int)((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn]))*statemix;
- for (m = 0; m < nmixture; m++) {
- for (i = 0; i < statecat; i++) {
- theta[m*statecat+i] = lh_tip[m*nstates + i%nstates] * partial_lh_dad[m*statecat+i];
- }
- }
-
- }
- // ascertainment bias correction
- } else {
- // both dad and node are internal nodes
- double *partial_lh_node = node_branch->partial_lh;
- double *partial_lh_dad = dad_branch->partial_lh;
-
- size_t all_entries = nptn*block;
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
- for (i = 0; i < all_entries; i++) {
- theta_all[i] = partial_lh_node[i] * partial_lh_dad[i];
- }
- }
- theta_computed = true;
- }
-
- double *val0 = new double[block];
- double *val1 = new double[block];
- double *val2 = new double[block];
- for (c = 0; c < ncat; c++) {
- double prop = site_rate->getProp(c);
- for (m = 0; m < nmixture; m++) {
- for (i = 0; i < nstates; i++) {
- double cof = eval[m*nstates+i]*site_rate->getRate(c);
- double val = exp(cof*dad_branch->length) * prop * ((ModelMixture*)model)->prop[m];
- double val1_ = cof*val;
- val0[(m*ncat+c)*nstates+i] = val;
- val1[(m*ncat+c)*nstates+i] = val1_;
- val2[(m*ncat+c)*nstates+i] = cof*val1_;
- }
- }
- }
-
-
- double my_df = 0.0, my_ddf = 0.0, prob_const = 0.0, df_const = 0.0, ddf_const = 0.0;
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: my_df, my_ddf, prob_const, df_const, ddf_const) private(ptn, i)
-#endif
- for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn], df_ptn = 0.0, ddf_ptn = 0.0;
- double *theta = theta_all + ptn*block;
- for (i = 0; i < block; i++) {
- lh_ptn += val0[i] * theta[i];
- df_ptn += val1[i] * theta[i];
- ddf_ptn += val2[i] * theta[i];
- }
-
-// assert(lh_ptn > 0.0);
- lh_ptn = fabs(lh_ptn);
-
- if (ptn < orig_nptn) {
- double df_frac = df_ptn / lh_ptn;
- double ddf_frac = ddf_ptn / lh_ptn;
- double freq = ptn_freq[ptn];
- double tmp1 = df_frac * freq;
- double tmp2 = ddf_frac * freq;
- my_df += tmp1;
- my_ddf += tmp2 - tmp1 * df_frac;
- } else {
- // ascertainment bias correction
- prob_const += lh_ptn;
- df_const += df_ptn;
- ddf_const += ddf_ptn;
- }
- }
- df = my_df;
- ddf = my_ddf;
- if (isnan(df) || isinf(df)) {
- df = 0.0;
- ddf = 0.0;
-// outWarning("Numerical instability (some site-likelihood = 0)");
- }
-
- if (orig_nptn < nptn) {
- // ascertainment bias correction
- prob_const = 1.0 - prob_const;
- double df_frac = df_const / prob_const;
- double ddf_frac = ddf_const / prob_const;
- int nsites = aln->getNSite();
- df += nsites * df_frac;
- ddf += nsites *(ddf_frac + df_frac*df_frac);
- }
-
-
- delete [] val2;
- delete [] val1;
- delete [] val0;
-}
-
-//template <const int nstates>
-double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloNode *dad) {
- PhyloNode *node = (PhyloNode*) dad_branch->node;
- PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
- if (!central_partial_lh)
- initializeAllPartialLh();
- if (node->isLeaf()) {
- PhyloNode *tmp_node = dad;
- dad = node;
- node = tmp_node;
- PhyloNeighbor *tmp_nei = dad_branch;
- dad_branch = node_branch;
- node_branch = tmp_nei;
}
- if ((dad_branch->partial_lh_computed & 1) == 0)
-// computeMixturePartialLikelihoodEigen(dad_branch, dad);
- computePartialLikelihood(dad_branch, dad);
- if ((node_branch->partial_lh_computed & 1) == 0)
- computePartialLikelihood(node_branch, node);
- double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
- size_t nstates = aln->num_states;
- size_t ncat = site_rate->getNRate();
- size_t nmixture = model->getNMixtures();
-
- size_t block = ncat * nstates * nmixture;
- size_t statemix = nstates * nmixture;
- size_t catmix = ncat * nmixture;
- size_t ptn; // for big data size > 4GB memory required
- size_t c, i, m;
- size_t orig_nptn = aln->size();
- size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
- double *eval = model->getEigenvalues();
- assert(eval);
- double *val = new double[block];
- for (c = 0; c < ncat; c++) {
- double len = site_rate->getRate(c)*dad_branch->length;
- double prop = site_rate->getProp(c);
- for (m = 0; m < nmixture; m++)
- for (i = 0; i < nstates; i++)
- val[(m*ncat+c)*nstates+i] = exp(eval[m*nstates+i]*len) * prop * ((ModelMixture*)model)->prop[m];
- }
- double prob_const = 0.0;
- // 2015-11-30: _pattern_lh_cat now stores mixture and cat likelihoods
- memset(_pattern_lh_cat, 0, nptn*catmix*sizeof(double));
+ memset(ptn_ancestral_prob, 0, sizeof(double)*nptn*nstates);
if (dad->isLeaf()) {
// special treatment for TIP-INTERNAL NODE case
- double *partial_lh_node = new double[(aln->STATE_UNKNOWN+1)*block];
- IntVector states_dad = aln->seq_states[dad->id];
- states_dad.push_back(aln->STATE_UNKNOWN);
- // precompute information from one tip
- for (IntVector::iterator it = states_dad.begin(); it != states_dad.end(); it++) {
- double *lh_node = partial_lh_node +(*it)*block;
- double *lh_tip = tip_partial_lh + (*it)*statemix;
- double *val_tmp = val;
- for (m = 0; m < nmixture; m++) {
- for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- lh_node[i] = val_tmp[i] * lh_tip[m*nstates+i];
- }
- lh_node += nstates;
- val_tmp += nstates;
- }
- }
- }
+ double partial_lh_leaf[(aln->STATE_UNKNOWN+1)*block];
+
+ for (IntVector::iterator it = aln->seq_states[dad->id].begin(); it != aln->seq_states[dad->id].end(); it++) {
+ int state = (*it);
+ for (m = 0; m < nmixture; m++) {
+ double *this_echild = &echild[m*nstatesqr*ncat];
+ double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
+ double *this_partial_lh_leaf = &partial_lh_leaf[state*block+m*statecat];
+ for (x = 0; x < statecat; x++) {
+ double vchild = 0.0;
+ for (i = 0; i < nstates; i++) {
+ vchild += this_echild[x*nstates+i] * this_tip_partial_lh[i];
+ }
+ this_partial_lh_leaf[x] = vchild;
+ }
+ }
+ }
+ size_t addr = aln->STATE_UNKNOWN * block;
+ for (x = 0; x < block; x++) {
+ partial_lh_leaf[addr+x] = 1.0;
+ }
+
// now do the real computation
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c, m)
+#pragma omp parallel for private(ptn, i, c, m, x)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*catmix;
+ double *lh_state = ptn_ancestral_prob + ptn*nstates;
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
- int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
- double *lh_node = partial_lh_node + state_dad*block;
- for (m = 0; m < nmixture; m++) {
+ int state_dad = (aln->at(ptn))[dad->id];
+ double *lh_leaf = partial_lh_leaf + state_dad*block;
+ for (m = 0; m < nmixture; m++) {
+ double *this_inv_evec = inv_evec + (m*nstatesqr);
for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += lh_node[i] * partial_lh_dad[i];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ double vnode = 0.0;
+ for (i = 0; i < nstates; i++) {
+ vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[i];
+ }
+ lh_state[x] += lh_leaf[x] * vnode;
}
- lh_node += nstates;
- partial_lh_dad += nstates;
- lh_ptn += *lh_cat;
- lh_cat++;
- }
-
- }
-// assert(lh_ptn > 0.0);
- if (ptn < orig_nptn) {
- lh_ptn = log(fabs(lh_ptn));
- _pattern_lh[ptn] = lh_ptn;
- tree_lh += lh_ptn * ptn_freq[ptn];
- } else {
- // bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] >= 1)
- lh_ptn *= SCALING_THRESHOLD;
- prob_const += lh_ptn;
- }
+ lh_leaf += nstates;
+ partial_lh_dad += nstates;
+ }
+ }
+
+ double lh_sum = lh_state[0];
+ for (x = 1; x < nstates; x++)
+ lh_sum += lh_state[x];
+ lh_sum = 1.0/lh_sum;
+ for (x = 0; x < nstates; x++)
+ lh_state[x] *= lh_sum;
}
- delete [] partial_lh_node;
} else {
// both dad and node are internal nodes
#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, i, c, m)
+#pragma omp parallel for private(ptn, i, c, m, x)
#endif
for (ptn = 0; ptn < nptn; ptn++) {
- double lh_ptn = ptn_invar[ptn];
- double *lh_cat = _pattern_lh_cat + ptn*catmix;
+ double *lh_state = ptn_ancestral_prob + ptn*nstates;
double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
double *partial_lh_node = node_branch->partial_lh + ptn*block;
- double *val_tmp = val;
+
for (m = 0; m < nmixture; m++) {
+ double *this_inv_evec = inv_evec + (m*nstatesqr);
for (c = 0; c < ncat; c++) {
- for (i = 0; i < nstates; i++) {
- *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
+ // compute real partial likelihood vector
+ for (x = 0; x < nstates; x++) {
+ double vdad = 0.0, vnode = 0.0;
+ size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
+ for (i = 0; i < nstates; i++) {
+ vdad += echild[addr+i] * partial_lh_node[m*statecat+c*nstates+i];
+ vnode += this_inv_evec[i*nstates+x] * partial_lh_dad[m*statecat+c*nstates+i];
+ }
+ lh_state[x] += vnode*vdad;
}
- lh_ptn += *lh_cat;
- partial_lh_node += nstates;
- partial_lh_dad += nstates;
- val_tmp += nstates;
- lh_cat++;
}
}
- assert(lh_ptn > 0.0);
- if (ptn < orig_nptn) {
- lh_ptn = log(lh_ptn);
- _pattern_lh[ptn] = lh_ptn;
- tree_lh += lh_ptn * ptn_freq[ptn];
- } else {
- // bugfix 2016-01-21, prob_const can be rescaled
- if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
- lh_ptn *= SCALING_THRESHOLD;
- prob_const += lh_ptn;
- }
+ double lh_sum = lh_state[0];
+ for (x = 1; x < nstates; x++)
+ lh_sum += lh_state[x];
+ lh_sum = 1.0/lh_sum;
+ for (x = 0; x < nstates; x++)
+ lh_state[x] *= lh_sum;
+
}
}
+}
+void PhyloTree::computeJointAncestralSequences(int *ancestral_seqs) {
- if (orig_nptn < nptn) {
- // ascertainment bias correction
- prob_const = log(1.0 - prob_const);
- for (ptn = 0; ptn < orig_nptn; ptn++)
- _pattern_lh[ptn] -= prob_const;
- tree_lh -= aln->getNSite()*prob_const;
- assert(!isnan(tree_lh) && !isinf(tree_lh));
+ // step 1-3 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
+ assert(root->isLeaf());
+ int *C = new int[(size_t)getAlnNPattern()*model->num_states*leafNum];
+ computeAncestralLikelihood((PhyloNeighbor*)root->neighbors[0], NULL, C);
+
+ // step 4-5 of the dynamic programming algorithm of Pupko et al. 2000, MBE 17:890-896
+ computeAncestralState((PhyloNeighbor*)root->neighbors[0], NULL, C, ancestral_seqs);
+
+ clearAllPartialLH();
+
+ delete[] C;
+}
+
+void PhyloTree::computeAncestralLikelihood(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C) {
+ PhyloNode *node = (PhyloNode*)dad_branch->node;
+ if (node->isLeaf())
+ return;
+
+ int num_leaves = 0;
+
+ // recursive into subtree
+ FOR_NEIGHBOR_DECLARE(node, dad, it) {
+ if ((*it)->node->isLeaf()) {
+ num_leaves++;
+ } else {
+ computeAncestralLikelihood((PhyloNeighbor*)(*it), node, C);
+ }
}
- assert(!isnan(tree_lh) && !isinf(tree_lh));
+ // TODO mem save
+ if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
+ // re-orient partial_lh
+ bool done = false;
+ FOR_NEIGHBOR_IT(node, dad, it2) {
+ PhyloNeighbor *backnei = ((PhyloNeighbor*)(*it2)->node->findNeighbor(node));
+ if (backnei->partial_lh) {
+ dad_branch->partial_lh = backnei->partial_lh;
+ dad_branch->scale_num = backnei->scale_num;
+ backnei->partial_lh = NULL;
+ backnei->scale_num = NULL;
+ backnei->partial_lh_computed &= ~1; // clear bit
+ done = true;
+ break;
+ }
+ }
+ assert(done && "partial_lh is not re-oriented");
+ }
+
+ size_t nptn = aln->getNPattern();
+ size_t ptn;
+ size_t nstates = model->num_states;
+ size_t nstatesqr = nstates*nstates;
+ size_t parent, child;
+ double *trans_mat = new double[nstatesqr];
+ double *lh_leaves = NULL;
+ if (num_leaves > 0) {
+ lh_leaves = new double[(aln->STATE_UNKNOWN+1)*nstates*num_leaves];
+ }
+ if (dad) {
+ model->computeTransMatrix(dad_branch->length, trans_mat);
+ for (parent = 0; parent < nstatesqr; parent++)
+ trans_mat[parent] = log(trans_mat[parent]);
+ } else {
+ model->getStateFrequency(trans_mat);
+ for (parent = 0; parent < nstates; parent++)
+ trans_mat[parent] = log(trans_mat[parent]);
+ for (parent = 1; parent < nstates; parent++)
+ memcpy(trans_mat+parent*nstates, trans_mat, sizeof(double)*nstates);
+ }
+
+ // compute information buffer for leaves
+ int ambi_aa[] = {
+ 4+8, // B = N or D
+ 32+64, // Z = Q or E
+ 512+1024 // U = I or L
+ };
+ int leafid = 0;
+ FOR_NEIGHBOR(node, dad, it) {
+ if ((*it)->node->isLeaf()) {
+ double trans_leaf[nstatesqr];
+ model->computeTransMatrix((*it)->length, trans_leaf);
+ double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
+
+ // assign lh_leaf for normal states
+ for (parent = 0; parent < nstates; parent++)
+ for (child = 0; child < nstates; child++)
+ lh_leaf[child*nstates+parent] = log(trans_leaf[parent*nstates+child]);
+
+ // for unknown state
+ double *this_lh_leaf = lh_leaf + (aln->STATE_UNKNOWN*nstates);
+ for (parent = 0; parent < nstates; parent++)
+ this_lh_leaf[parent] = 0.0;
+
+ // special treatment for ambiguous characters
+ switch (aln->seq_type) {
+ case SEQ_DNA:
+ for (int state = 4; state < 18; state++) {
+ this_lh_leaf = lh_leaf + (state*nstates);
+ int cstate = state-nstates+1;
+ for (parent = 0; parent < nstates; parent++) {
+ double sumlh = 0.0;
+ for (child = 0; child < nstates; child++) {
+ if ((cstate) & (1 << child))
+ sumlh += trans_leaf[parent*nstates+child];
+ }
+ this_lh_leaf[parent] = log(sumlh);
+ }
+ }
+ break;
+ case SEQ_PROTEIN:
+ for (int state = 0; state < sizeof(ambi_aa)/sizeof(int); state++) {
+ this_lh_leaf = lh_leaf + ((state+20)*nstates);
+ for (parent = 0; parent < nstates; parent++) {
+ double sumlh = 0.0;
+ for (child = 0; child < nstates; child++) {
+ if (ambi_aa[state] & (1 << child))
+ sumlh += trans_leaf[parent*nstates+child];
+ }
+ this_lh_leaf[parent] = log(sumlh);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ leafid++;
+ }
+ }
- delete [] val;
- return tree_lh;
+ // initialize L_y(i) and C_y(i)
+// memset(dad_branch->partial_lh, 0, nptn*nstates*sizeof(double));
+
+ int *C_node = C + (node->id-leafNum)*nptn*nstates;
+
+ for (ptn = 0; ptn < nptn; ptn++) {
+ double *lh_dad = dad_branch->partial_lh+ptn*nstates;
+ int *this_C_node = C_node + (ptn*nstates);
+ leafid = 0;
+ double sumlh[nstates];
+ memset(sumlh, 0, sizeof(double)*nstates);
+ FOR_NEIGHBOR(node, dad, it) {
+ PhyloNeighbor *childnei = (PhyloNeighbor*)(*it);
+ if ((*it)->node->isLeaf()) {
+ double *lh_leaf = lh_leaves+leafid*nstates*(aln->STATE_UNKNOWN+1);
+ // external node
+ int state_child;
+ state_child = (aln->at(ptn))[(*it)->node->id];
+ double *child_lh = lh_leaf + state_child*nstates;
+ for (child = 0; child < nstates; child++)
+ sumlh[child] += child_lh[child];
+ leafid++;
+ } else {
+ double *child_lh = childnei->partial_lh + ptn*nstates;
+ for (child = 0; child < nstates; child++)
+ sumlh[child] += child_lh[child];
+ }
+ }
+
+
+ if (dad) {
+ // internal node
+ for (parent = 0; parent < nstates; parent++) {
+ lh_dad[parent] = trans_mat[parent*nstates] + sumlh[0];
+ this_C_node[parent] = 0;
+ for (child = 1; child < nstates; child++) {
+ double lh = trans_mat[parent*nstates+child] + sumlh[child];
+ if (lh > lh_dad[parent]) {
+ lh_dad[parent] = lh;
+ this_C_node[parent] = child;
+ }
+ }
+ }
+ } else {
+ // at the root
+ lh_dad[0] = trans_mat[0] + sumlh[0];
+ this_C_node[0] = 0;
+ for (parent = 1; parent < nstates; parent++) {
+ double lh = trans_mat[parent] + sumlh[parent];
+ if (lh > lh_dad[0]) {
+ lh_dad[0] = lh;
+ this_C_node[0] = parent;
+ }
+ }
+ }
+ }
+
+
+ if (lh_leaves)
+ delete[] lh_leaves;
+ delete[] trans_mat;
+}
+
+
+void PhyloTree::computeAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad, int *C, int *ancestral_seqs) {
+ PhyloNode *node = (PhyloNode*)dad_branch->node;
+ if (node->isLeaf())
+ return;
+
+ size_t nptn = aln->getNPattern();
+ size_t ptn;
+ size_t nstates = model->num_states;
+
+ int *C_node = C + (node->id-leafNum)*nptn*nstates;
+ int *ancestral_seqs_node = ancestral_seqs + (node->id-leafNum)*nptn;
+ if (dad) {
+ // at an internal node
+ int *ancestral_seqs_dad = ancestral_seqs + (dad->id-leafNum)*nptn;
+ for (ptn = 0; ptn < nptn; ptn++)
+ ancestral_seqs_node[ptn] = C_node[ptn*nstates+ancestral_seqs_dad[ptn]];
+
+ } else {
+ // at the root
+ for (ptn = 0; ptn < nptn; ptn++)
+ ancestral_seqs_node[ptn] = C_node[ptn*nstates];
+ }
+ FOR_NEIGHBOR_IT(node, dad, it)
+ computeAncestralState((PhyloNeighbor*)(*it), node, C, ancestral_seqs);
}
+
+
+
diff --git a/pllnni.cpp b/pllnni.cpp
old mode 100755
new mode 100644
index 9c71a91..550a9c8
--- a/pllnni.cpp
+++ b/pllnni.cpp
@@ -1,3 +1,24 @@
+/***************************************************************************
+ * Copyright (C) 2014 by *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -21,7 +42,7 @@ extern VerboseMode verbose_mode;
int NNI_MAX_NR_STEP = 10;
/* program options */
-extern Params *globalParam;
+extern Params *globalParams;
extern Alignment *globalAlignment;
/**
@@ -244,8 +265,8 @@ set<int> getAffectedNodes(pllInstance* tr, nodeptr p) {
void pllEvalAllNNIs(pllInstance *tr, partitionList *pr, SearchInfo &searchinfo) {
/* DTH: mimic IQTREE::optimizeNNI 's first call to IQTREE::saveCurrentTree */
- if((globalParam->online_bootstrap == PLL_TRUE) &&
- (globalParam->gbo_replicates > 0)){
+ if((globalParams->online_bootstrap == PLL_TRUE) &&
+ (globalParams->gbo_replicates > 0)){
tr->fastScaling = PLL_FALSE;
pllEvaluateLikelihood(tr, pr, tr->start, PLL_FALSE, PLL_TRUE);
pllSaveCurrentTree(tr, pr, tr->start);
@@ -294,7 +315,7 @@ double pllDoNNISearch(pllInstance* tr, partitionList *pr, SearchInfo &searchinfo
/* evaluate NNIs */
pllEvalAllNNIs(tr, pr, searchinfo);
- if (searchinfo.speednni) {
+ if (globalParams->speednni) {
searchinfo.aBranches.clear();
}
@@ -322,7 +343,7 @@ double pllDoNNISearch(pllInstance* tr, partitionList *pr, SearchInfo &searchinfo
for (vector<pllNNIMove>::iterator it = selectedNNIs.begin(); it != selectedNNIs.end(); it++) {
/* do the topological change */
doOneNNI(tr, pr, (*it).p, (*it).nniType, TOPO_ONLY);
- if (searchinfo.speednni) {
+ if (globalParams->speednni) {
vector<string> aBranches = getAffectedBranches(tr, (*it).p);
searchinfo.aBranches.insert(aBranches.begin(), aBranches.end());
}
@@ -338,7 +359,7 @@ double pllDoNNISearch(pllInstance* tr, partitionList *pr, SearchInfo &searchinfo
if (selectedNNIs.size() != 0) {
//pllEvaluateLikelihood(tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
pllOptimizeBranchLengths(tr, pr, 1);
- if (globalParam->count_trees) {
+ if (globalParams->count_trees) {
countDistinctTrees(tr, pr);
}
int numNNI = selectedNNIs.size();
@@ -504,7 +525,7 @@ double doOneNNI(pllInstance *tr, partitionList *pr, nodeptr p, int swap, NNI_Typ
}
// Optimize the central branch
pllOptimizeOneBranch(tr, pr, p);
- if((globalParam->online_bootstrap == PLL_TRUE) && (globalParam->gbo_replicates > 0)){
+ if((globalParams->online_bootstrap == PLL_TRUE) && (globalParams->gbo_replicates > 0)){
tr->fastScaling = PLL_FALSE;
pllEvaluateLikelihood(tr, pr, p, PLL_FALSE, PLL_TRUE); // DTH: modified the last arg
pllSaveCurrentTree(tr, pr, p);
@@ -571,8 +592,8 @@ double doOneNNI(pllInstance *tr, partitionList *pr, nodeptr p, int swap, NNI_Typ
else
pllUpdatePartials(tr, pr, r, PLL_FALSE);
pllOptimizeOneBranch(tr, pr, r);
- if((globalParam->online_bootstrap == PLL_TRUE) &&
- (globalParam->gbo_replicates > 0)){
+ if((globalParams->online_bootstrap == PLL_TRUE) &&
+ (globalParams->gbo_replicates > 0)){
tr->fastScaling = PLL_FALSE;
pllEvaluateLikelihood(tr, pr, r, PLL_FALSE, PLL_TRUE); // DTH: modified the last arg
pllSaveCurrentTree(tr, pr, r);
@@ -674,7 +695,7 @@ int evalNNIForBran(pllInstance* tr, partitionList *pr, nodeptr p, SearchInfo &se
/* do an NNI move of type 1 */
double lh1 = doOneNNI(tr, pr, p, 0, searchinfo.nni_type, &searchinfo);
- if (globalParam->count_trees)
+ if (globalParams->count_trees)
countDistinctTrees(tr, pr);
pllNNIMove nni1;
nni1.p = p;
@@ -709,7 +730,7 @@ int evalNNIForBran(pllInstance* tr, partitionList *pr, nodeptr p, SearchInfo &se
/* do an NNI move of type 2 */
double lh2 = doOneNNI(tr, pr, p, 1, searchinfo.nni_type, &searchinfo);
- if (globalParam->count_trees)
+ if (globalParams->count_trees)
countDistinctTrees(tr, pr);
// Create the nniMove struct to store this move
@@ -789,7 +810,7 @@ bool isAffectedBranch(nodeptr p, SearchInfo &searchinfo) {
void evalNNIForSubtree(pllInstance* tr, partitionList *pr, nodeptr p, SearchInfo &searchinfo) {
if (!isTip(p->number, tr->mxtips) && !isTip(p->back->number, tr->mxtips)) {
- if (searchinfo.speednni && searchinfo.curNumNNISteps != 1) {
+ if (globalParams->speednni && searchinfo.curNumNNISteps != 1) {
if (isAffectedBranch(p, searchinfo)) {
evalNNIForBran(tr, pr, p, searchinfo);
}
@@ -906,14 +927,14 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
// online bootstrap
int nptn = pllUFBootDataPtr->n_patterns;
int updated = 0;
- int nsamples = globalParam->gbo_replicates;
+ int nsamples = globalParams->gbo_replicates;
for (int sample = 0; sample < nsamples; sample++) {
double rell = 0.0;
for (int ptn = 0; ptn < nptn; ptn++)
rell += pattern_lh[ptn] * pllUFBootDataPtr->boot_samples[sample][ptn];
- if (rell > pllUFBootDataPtr->boot_logl[sample] + globalParam->ufboot_epsilon ||
- (rell > pllUFBootDataPtr->boot_logl[sample] - globalParam->ufboot_epsilon &&
+ if (rell > pllUFBootDataPtr->boot_logl[sample] + globalParams->ufboot_epsilon ||
+ (rell > pllUFBootDataPtr->boot_logl[sample] - globalParams->ufboot_epsilon &&
random_double() <= 1.0/(pllUFBootDataPtr->boot_counts[sample]+1))) {
// if (!globalParam->store_candidate_trees)
{
@@ -928,7 +949,7 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
}
}
if (rell <= pllUFBootDataPtr->boot_logl[sample] +
- globalParam->ufboot_epsilon) {
+ globalParams->ufboot_epsilon) {
pllUFBootDataPtr->boot_counts[sample]++;
} else {
pllUFBootDataPtr->boot_counts[sample] = 1;
diff --git a/pllnni.h b/pllnni.h
index a7a0f62..cd660a3 100644
--- a/pllnni.h
+++ b/pllnni.h
@@ -1,3 +1,24 @@
+/***************************************************************************
+ * Copyright (C) 2014 by *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
#ifndef NNISEARCH_H
#define NNISEARCH_H
@@ -31,7 +52,6 @@ typedef struct {
typedef struct {
// FOR GENERAL TREE SEARCH
- bool speednni;
vector<pllNNIMove> posNNIList; // positive NNIs
unordered_set<string> aBranches; // Set of branches that are affected by the previous NNIs
double curLogl; // Current tree log-likelihood
diff --git a/quartet.cpp b/quartet.cpp
index ca2dc5d..e21007d 100644
--- a/quartet.cpp
+++ b/quartet.cpp
@@ -921,7 +921,7 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
// set up parameters
quartet_tree->setParams(params);
quartet_tree->optimize_by_newton = params->optimize_by_newton;
- quartet_tree->setLikelihoodKernel(params->SSE);
+ quartet_tree->setLikelihoodKernel(params->SSE, num_threads);
// set up partition model
if (isSuperTree()) {
diff --git a/split.cpp b/split.cpp
index 74a1506..62495d3 100644
--- a/split.cpp
+++ b/split.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
diff --git a/split.h b/split.h
index 640ac82..4a4343b 100644
--- a/split.h
+++ b/split.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -272,6 +274,7 @@ public:
Split *extractSubSplit(Split &taxa_mask);
string &getName() { return name; }
+
protected:
/**
number of taxa
diff --git a/splitgraph.cpp b/splitgraph.cpp
index 3479c14..4737268 100644
--- a/splitgraph.cpp
+++ b/splitgraph.cpp
@@ -639,7 +639,7 @@ void SplitGraph::scaleWeight(double norm, bool make_int, int precision) {
else
(*itg)->setWeight( round((*itg)->getWeight()*norm*pow((double)10.0,precision))/pow((double)10.0,precision));
}
-
+// TODO Implement a more efficient function using Hash Table
bool SplitGraph::containSplit(Split &sp) {
Split invert_sp(sp);
invert_sp.invert();
diff --git a/splitgraph.h b/splitgraph.h
index f7e5dd8..1960fd2 100644
--- a/splitgraph.h
+++ b/splitgraph.h
@@ -383,7 +383,7 @@ public:
* @return number of trivial splits removed
*/
int removeTrivialSplits();
-
+
protected:
/**
diff --git a/splitset.cpp b/splitset.cpp
index 555b1a1..5e4a8e0 100644
--- a/splitset.cpp
+++ b/splitset.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
diff --git a/splitset.h b/splitset.h
index 890337d..bab520f 100644
--- a/splitset.h
+++ b/splitset.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
diff --git a/stoprule.cpp b/stoprule.cpp
index a5018bf..8a5e4a1 100644
--- a/stoprule.cpp
+++ b/stoprule.cpp
@@ -19,6 +19,7 @@
***************************************************************************/
#include "stoprule.h"
#include "timeutil.h"
+#include "MPIHelper.h"
StopRule::StopRule() : CheckpointFactory()
{
@@ -35,6 +36,7 @@ StopRule::StopRule() : CheckpointFactory()
start_real_time = -1.0;
max_run_time = -1.0;
curIteration = 0;
+ should_stop = false;
}
void StopRule::initialize(Params ¶ms) {
@@ -49,6 +51,14 @@ void StopRule::initialize(Params ¶ms) {
max_run_time = params.maxtime * 60; // maxtime is in minutes
}
+void StopRule::getUFBootCountCheck(int &ufboot_count, int &ufboot_count_check) {
+ int step = step_iteration;
+ while (step*2 < MPIHelper::getInstance().getNumProcesses())
+ step *= 2;
+ ufboot_count = (curIteration/(step/2)+1)*(step/2);
+ ufboot_count_check = (curIteration/step+1)*step;
+}
+
StopRule::~StopRule()
{
}
@@ -96,32 +106,34 @@ void StopRule::restoreCheckpoint() {
// return ((cur_iteration+step_iteration-1)/step_iteration)*step_iteration;
// case SC_REAL_TIME:
//// return ((max_run_time - realtime_secs)/max_run_time);
-// assert(0); // TODO
+// assert(0);
// return 0;
// }
//}
bool StopRule::meetStopCondition(int cur_iteration, double cur_correlation) {
+ if (should_stop)
+ return true;
switch (stop_condition) {
- case SC_FIXED_ITERATION:
- return cur_iteration > min_iteration;
- case SC_WEIBULL:
- if (predicted_iteration == 0)
- return cur_iteration > min_iteration;
- else
- return cur_iteration > predicted_iteration;
- case SC_UNSUCCESS_ITERATION:
- return cur_iteration >= getLastImprovedIteration() + unsuccess_iteration;
- case SC_BOOTSTRAP_CORRELATION:
- return ((cur_correlation >= min_correlation) && (cur_iteration >= getLastImprovedIteration() + unsuccess_iteration))
- || cur_iteration > max_iteration;
- case SC_REAL_TIME:
- return (getRealTime() - start_real_time >= max_run_time);
+ case SC_FIXED_ITERATION:
+ return cur_iteration >= min_iteration;
+ case SC_WEIBULL:
+ if (predicted_iteration == 0)
+ return cur_iteration > min_iteration;
+ else
+ return cur_iteration > predicted_iteration;
+ case SC_UNSUCCESS_ITERATION:
+ return cur_iteration > getLastImprovedIteration() + unsuccess_iteration;
+ case SC_BOOTSTRAP_CORRELATION:
+ return ((cur_correlation >= min_correlation) && (cur_iteration > getLastImprovedIteration() + unsuccess_iteration))
+ || cur_iteration > max_iteration;
+ case SC_REAL_TIME:
+ return (getRealTime() - start_real_time >= max_run_time);
}
return false;
}
-double StopRule::getRemainingTime(int cur_iteration, double cur_correlation) {
+double StopRule::getRemainingTime(int cur_iteration) {
double realtime_secs = getRealTime() - start_real_time;
int niterations;
switch (stop_condition) {
diff --git a/stoprule.h b/stoprule.h
index 972a714..7cb9ea5 100644
--- a/stoprule.h
+++ b/stoprule.h
@@ -42,6 +42,8 @@ public:
*/
~StopRule();
+ void getUFBootCountCheck(int &ufboot_count, int &ufboot_count_check);
+
/**
save object into the checkpoint
*/
@@ -77,9 +79,17 @@ public:
@return TRUE if stop condition is met, FALSE otherwise
*/
bool meetStopCondition(int cur_iteration, double cur_correlation);
+
+ /**
+ return TRUE if cur_correlation is high enough
+ @param cur_correlation correlation coefficient
+ */
+ bool meetCorrelation(double cur_correlation) {
+ return cur_correlation >= min_correlation;
+ }
/** get the remaining time to converge, in seconds */
- double getRemainingTime(int cur_iteration, double cur_correlation);
+ double getRemainingTime(int cur_iteration);
/**
@return the number of iterations required to stop the search
@@ -100,6 +110,10 @@ public:
StopRule::curIteration = curIteration;
}
+ void shouldStop() {
+ should_stop = true;
+ }
+
private:
/**
@@ -149,6 +163,9 @@ private:
/** starting real time of the program */
double start_real_time;
+ /** TRUE to override stop condition */
+ bool should_stop;
+
/* FOLLOWING CODES ARE FROM IQPNNI version 3 */
// int nTime_;
diff --git a/superalignment.cpp b/superalignment.cpp
index 7311455..8f56c40 100644
--- a/superalignment.cpp
+++ b/superalignment.cpp
@@ -23,15 +23,26 @@
#include "superalignment.h"
#include "phylosupertree.h"
-SuperAlignment::SuperAlignment()
- : Alignment() {}
+SuperAlignment::SuperAlignment() : Alignment() {
+ max_num_states = 0;
+}
-SuperAlignment::SuperAlignment(PhyloSuperTree *super_tree)
- : Alignment()
+SuperAlignment::SuperAlignment(PhyloSuperTree *super_tree) : Alignment()
{
+ max_num_states = 0;
// first build taxa_index and partitions
int site, seq, nsite = super_tree->size();
PhyloSuperTree::iterator it;
+
+ // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed
+ // get the taxa names from existing tree
+ if (super_tree->root) {
+ super_tree->getTaxaName(seq_names);
+ taxa_index.resize(seq_names.size());
+ for (auto i = taxa_index.begin(); i != taxa_index.end(); i++)
+ i->resize(nsite, -1);
+ }
+
for (site = 0, it = super_tree->begin(); it != super_tree->end(); it++, site++) {
partitions.push_back((*it)->aln);
int nseq = (*it)->aln->getNSeq();
@@ -599,3 +610,50 @@ Alignment *SuperAlignment::concatenateAlignments(IntVector &ids) {
return aln;
}
+
+void SuperAlignment::countConstSite() {
+ num_informative_sites = 0;
+ max_num_states = 0;
+ frac_const_sites = 0;
+ frac_invariant_sites = 0;
+ size_t nsites = 0;
+ for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
+ (*it)->countConstSite();
+ num_informative_sites += (*it)->num_informative_sites;
+ if ((*it)->num_states > max_num_states)
+ max_num_states = (*it)->num_states;
+ nsites += (*it)->getNSite();
+ frac_const_sites += (*it)->frac_const_sites * (*it)->getNSite();
+ frac_invariant_sites += (*it)->frac_invariant_sites * (*it)->getNSite();
+ }
+ frac_const_sites /= nsites;
+ frac_invariant_sites /= nsites;
+}
+
+void SuperAlignment::orderPatternByNumChars() {
+ const int UINT_BITS = sizeof(UINT)*8;
+ int maxi = (num_informative_sites+UINT_BITS-1)/UINT_BITS;
+ pars_lower_bound = new UINT[maxi+1];
+ memset(pars_lower_bound, 0, (maxi+1)*sizeof(UINT));
+ int part, nseq = getNSeq(), npart = partitions.size();
+
+ // compute ordered_pattern
+ ordered_pattern.clear();
+ UINT sum_scores[npart];
+ for (part = 0; part != partitions.size(); part++) {
+ partitions[part]->orderPatternByNumChars();
+ // partial_partition
+ for (vector<Pattern>::iterator pit = partitions[part]->ordered_pattern.begin(); pit != partitions[part]->ordered_pattern.end(); pit++) {
+ Pattern pattern(*pit);
+ pattern.resize(nseq); // maximal unknown states
+ for (int j = 0; j < nseq; j++)
+ if (taxa_index[j][part] >= 0)
+ pattern[j] = (*pit)[taxa_index[j][part]];
+ else
+ pattern[j] = partitions[part]->STATE_UNKNOWN;
+ ordered_pattern.push_back(pattern);
+ }
+ sum_scores[part] = partitions[part]->pars_lower_bound[0];
+ }
+ // TODO compute pars_lower_bound (lower bound of pars score for remaining patterns)
+}
diff --git a/superalignment.h b/superalignment.h
index 583dd0a..ba47436 100644
--- a/superalignment.h
+++ b/superalignment.h
@@ -215,6 +215,22 @@ public:
*/
void buildPattern();
+ /**
+ count the fraction of constant sites in the alignment, update the variable frac_const_sites
+ */
+ virtual void countConstSite();
+
+ /**
+ * @return number of states, if it is a partition model, return max num_states across all partitions
+ */
+ virtual int getMaxNumStates() {
+ return max_num_states;
+ }
+
+ /** order pattern by number of character states and return in ptn_order
+ */
+ virtual void orderPatternByNumChars();
+
/**
actual partition alignments
*/
@@ -225,6 +241,9 @@ public:
*/
vector<IntVector> taxa_index;
+ /** maximum number of states across all partitions */
+ int max_num_states;
+
/**
* concatenate subset of alignments
* @param ids IDs of sub-alignments
diff --git a/test_scripts/README b/test_scripts/README
index f896292..890d622 100644
--- a/test_scripts/README
+++ b/test_scripts/README
@@ -1,19 +1,21 @@
-1. Complile your local branch:
- ./compile.sh <your_branch>
+1. Complile your local branch:
+ ./compile.sh <branch_name> [<iqtree_flags>]
EXAMPLE: ./compile.sh master
-You might also want to 'pull' code from the remote server to update your branch before performing the compilation. The binary of your branch will be stored in 'iqtree_binaries' directory. A binary of the most recent IQ-TREE release will also be compiled and stored in the folder.
+You might also want to 'pull' code from the remote server to update your branch before performing the compilation.
+The binary of your branch will be stored in 'iqtree_binaries' directory.
-2. If you want to run the standard tests, use the gen_test_standrd.py script (running the script without any option output the help menu) as follows:
+2. Prepare the config_file (see test_configs.txt for an example)
+
+3. If you want to run the standard tests, use the gen_test_standrd.py script (running the script without any option output the help menu) as follows:
./gen_test_standard.py -b <path_to_your_iqtree_binary> -c <config_file>
EXAMPLE: ./gen_test_standard.py -b iqtree_binaries/iqtree_master -c test_configs.txt
-A text file named '<your_binary_name>_test_standard_cmds.txt' containing all the test commnds will be generated. Copy all the content of the test_script folder to libby. Submit the job with the following commands:
+A text file named '<your_binary_name>_test_standard_cmds.txt' containing all the test commands will be generated. Copy all the content of the test_script folder to libby. Submit the job with the following commands:
./submit_jobs.sh <number_of_threads> <cmd_file> <aln_dir> <out_dir> <binary_dir>
- EXAMPLE: ./submit_jobs.sh 16 iqtree_master_test_standard_cmds.txt test_alignments iqtree_master_test_standard iqtree_binaries
-The LOG FILE containing the status of all jobs are writen in <out_dir>/<cmd_file>[0-9]*.log. Look into the file to see whether all jobs have run successfully. Grep for "ERROR" to see which job contains BUG.
+ EXAMPLE: ./submit_jobs.sh 16 iqtree_master_test_standard_cmds.txt test_alignments iqtree_master_test_standard iqtree_binaries
+The LOG FILE containing the status of all jobs are writen in <out_dir>/<cmd_file>[0-9]*.log. Look into the file to see whether all jobs have run successfully. Grep for "ERROR" to see which job contains BUG.
-3. If you want to test all the commands by users of the web server that caused bugs: (./gen_test_standard.py -h for help)
+4. If you want to test all the commands by users of the web server that caused bugs: (./gen_test_standard.py -h for help)
./gen_test_standard.py -b <path_to_iqtree_binary>
EXAMPLE: ./gen_test_standard.py -b iqtree_binaries/iqtree_master
-The above command creates a folder called 'webserver_alignments' that contains all the user alignments. The next steps are the same as described in 2.
+The above command creates a folder called 'webserver_alignments' that contains all the user alignments. The next steps are the same as described in 2.
EXAMPLE: ./submit_jobs.sh 40 iqtree_master_test_webserver_cmds.txt webserver_alignments iqtree_master_test_webserver iqtree_binaries
-
diff --git a/test_scripts/compile.sh b/test_scripts/compile.sh
index b532b0a..6f9287a 100755
--- a/test_scripts/compile.sh
+++ b/test_scripts/compile.sh
@@ -1,19 +1,18 @@
-#!/bin/bash -
+#!/bin/bash -
#===============================================================================
#
-# FILE: compile_binary.sh
-#
-# USAGE: ./compile_binary.sh
-#
-# DESCRIPTION: This script checkouts the last release version of IQ-TREE and the HEAD of
-# the current branch. Then it complile both version
-#
+# FILE: compile.sh
+#
+# USAGE: ./compile.sh
+#
+# DESCRIPTION: This script checkouts and compile the specified branch of IQ-TREE
+#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
-# AUTHOR: Tung Nguyen (nltung at gmail.com)
-# ORGANIZATION:
+# AUTHOR: Tung Nguyen (nltung at gmail.com)
+# ORGANIZATION:
# CREATED: 2015-01-26 13:02:57 CET
# REVISION: ---
#===============================================================================
@@ -51,70 +50,78 @@ require_clean_work_tree () {
#Check whether the git work tree is clean
#require_clean_work_tree
-if [ "$#" != 1 ]
+if [ "$#" -lt 1 ]
then
- echo "Please enter the name of the local branch you want to compile"
- echo "USAGE: $0 <branch_name>" >&2
- exit 1
+ echo "Please enter the name of the local branch you want to compile"
+ echo "USAGE: $0 <branch_name> [<iqtree_flags>]" >&2
+ exit 1
fi
#Determine hash code of current branch
#branch=`git status | grep "On branch" | awk '{print $3}'`
branch=$1
+flags=$2
+flagOMP="${flags} omp" # flags used to compile OpenMP version of IQ-TREE
+echo "COMPILING BRANCH ${branch} USING FLAGS ${flags}"
#Take the first 6 characters of the current head commit
commit_cur=`git log | head -n1 | awk '{print $2}' | cut -c 1-6`
-#Dictionary and binary names
-cur_build="build_${branch}"
-release_build="build_release"
-release_binary_prefix="iqtree_release"
-#cur_binary="iqtree_${commit_cur}"
-cur_binary="iqtree_${branch}"
-bin_dir="iqtree_binaries"
+#Assign names to build and binary directories
+flagSuffix=`echo ${flags} | sed 's/ /-/g'`
+buildDir="build-${branch}-${flagSuffix}"
+buildDirOMP="build-${branch}-${flagSuffix}-omp"
+binaryName="iqtree-${branch}"
+binaryNameOMP="${binaryName}-omp"
+binDir="iqtree-${branch}-bin"
-#Clean up
-if [ -e $cur_build ]
+#Create the build directory
+if [[ ! -e $buildDir ]]
then
- rm -rf $cur_build
+ mkdir $buildDir
fi
-if [ -e $release_build ]
+if [[ ! -e $buildDirOMP ]]
then
- rm -rf $release_build
+ mkdir $buildDirOMP
fi
-if [ -e $bin_dir ]
+
+#Create binary directory
+if [[ ! -e $binDir ]]
then
- rm -rf $bin_dir
+ mkdir $binDir
fi
-mkdir $bin_dir
-mkdir $cur_build
+
#Fetch changes from server
git fetch
curBranch=`git status | grep 'On branch' | awk '{print $3}'`
-if [ ${curBranch} != ${branch} ]
+if [[ ${curBranch} != ${branch} ]]
then
- git stash
- git checkout $branch
- git pull
- git submodule update
+ echo "Switch to branch ${branch} and pull code from the server ... "
+ git stash
+ echo "Current changes stashed."
+ git checkout $branch
+ git pull
+ #git submodule update
fi
-cmake -B${cur_build} -H..
-make -C ${cur_build} -j4
-cp ${cur_build}/iqtree ${bin_dir}/${cur_binary}
-#rm -rf ${cur_build}
-mkdir $release_build
-#Find the hash code of the most recent release in master
-commit=`git log origin/master | grep -m 1 -B 4 "release version" | grep "commit" | awk '{print $2}'`
-version=`git log origin/master | grep -m 1 "release version [0-9]*" | awk '{print $3}'`
-git checkout ${commit}
-git submodule update
-cmake -B${release_build} -H..
-make -C ${release_build} -j4
-cp ${release_build}/iqtree ${bin_dir}/${release_binary_prefix}_${version}
-git checkout ${curBranch}
-git stash apply
-git submodule update
+
+#Build the selected
+
+echo -e "\nGENERATING MAKEFILE FOR SEQUENTIAL VERSION OF IQ-TREE FOR BRANCH ${branch}\n"
+cmake -B${buildDir} -H.. -DIQTREE_FLAGS="${flags}"
+echo -e "\nBUILDING SEQUENTIAL VERSION OF IQ-TREE FOR BRANCH ${branch}\n"
+make -C ${buildDir} -j4
+
+echo -e "\nGENERATING MAKEFILE FOR OPENMP VERSION OF IQ-TREE FOR BRANCH ${branch}\n"
+echo ${flagOMP}
+echo ${buildDirOMP}
+cmake -B${buildDirOMP} -H.. -DIQTREE_FLAGS="${flagOMP}"
+echo -e "\nBUILDING OPENMP VERSION OF IQ-TREE FOR BRANCH ${branch}\n"
+make -C ${buildDirOMP} -j4
+
+#cp ${buildDir}/iqtree- ${binDir}/${binaryName}
#Clean up
-rm -rf $cur_build
-rm -rf $release_build
+#rm -rf $buildDir
+
+#echo -e "Binaries of IQ-TREE for branch ${branch} are stored in $binDir"
+#rm -rf $release_build
diff --git a/test_scripts/gen_test_standard.py b/test_scripts/gen_test_standard.py
index dfe7f1e..a14bc02 100755
--- a/test_scripts/gen_test_standard.py
+++ b/test_scripts/gen_test_standard.py
@@ -2,9 +2,9 @@
'''
Created on Jan. 26, 2015
- at author: tung
+ at author: Tung Nguyen <nltung at gmail.com>
'''
-import sys, os, time, multiprocessing, optparse
+import sys, os, time, multiprocessing, optparse
import subprocess, logging, datetime
def parse_config(config_file):
@@ -22,15 +22,15 @@ def parse_config(config_file):
readSingleAln = True
continue
if line == 'END_SINGLE_ALN':
- readSingleAln = False
+ readSingleAln = False
continue
if readSingleAln:
- singleAln.append(line)
+ singleAln.append(line)
if line == 'START_PARTITION_ALN':
readPartAln = True
continue
if line == 'END_PARTITION_ALN':
- readPartAln = False
+ readPartAln = False
continue
if readPartAln:
partitionAln.append(line.split())
@@ -51,13 +51,15 @@ def parse_config(config_file):
if genericOpt:
genericOpts.append(line)
return (singleAln, partitionAln, genericOpts, partOpts)
-
+
if __name__ == '__main__':
usage = "USAGE: %prog [options]"
parser = optparse.OptionParser(usage=usage)
parser.add_option('-b','--binary', dest="iqtree_bin", help='Path to your IQ-TREE binary')
parser.add_option('-c','--config', dest="config_file", help='Path to test configuration file')
+ parser.add_option('-o', '--output', dest="outFile", help='Output file for test cases')
+ parser.add_option('-f', '--flags', dest="flags", help='Additional flags for IQ-TREE')
(options, args) = parser.parse_args()
if not options.iqtree_bin or not options.config_file:
parser.print_help()
@@ -67,26 +69,28 @@ if __name__ == '__main__':
# Generate test commands for single model
for aln in singleAln:
for opt in genericOpts:
- cmd = '-s ' + aln + ' ' + opt
- testCmds.append(cmd)
+ cmd = '-s ' + aln + ' -redo ' + opt
+ if options.flags:
+ cmd = cmd + ' ' + options.flags
+ testCmds.append(cmd)
# Generate test commands for partition model
for aln in partitionAln:
for opt in genericOpts:
for partOpt in partOpts:
- cmd = '-s ' + aln[0] + ' ' + opt + ' ' + partOpt + ' ' + aln[1]
+ cmd = '-s ' + aln[0] + ' -redo ' + opt + ' ' + partOpt + ' ' + aln[1]
+ if options.flags:
+ cmd = cmd + ' ' + options.flags
testCmds.append(cmd)
+
testNr = 1
jobs = []
for cmd in testCmds:
testIDRel = os.path.basename(options.iqtree_bin) + "_TEST_" + str(testNr)
- testCMD = testIDRel + " " + options.iqtree_bin + " -pre " + testIDRel + " " + cmd
- testNr = testNr + 1
+ testCMD = testIDRel + " " + os.path.abspath(options.iqtree_bin) + " -pre " + testIDRel + " " + cmd
+ testNr = testNr + 1
jobs.append(testCMD)
# print "\n".join(jobs)
- outfile = open(os.path.basename(options.iqtree_bin) + '_test_standard_cmds.txt', "wb")
+ outfile = open(options.outFile, "wb")
for job in jobs:
print >> outfile, job
outfile.close()
-
-
-
diff --git a/test_scripts/generate_test_cmds.py b/test_scripts/generate_test_cmds.py
deleted file mode 100755
index 637eca9..0000000
--- a/test_scripts/generate_test_cmds.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-'''
-Created on Jan. 26, 2015
-
- at author: tung
-'''
-import sys, os, time, multiprocessing, optparse
-import subprocess, logging, datetime
-
-def parse_config(config_file):
- singleAln, partitionAln, partOpts, genericOpts = [], [], [], []
- with open(config_file) as f:
- #lines = f.readlines()
- lines = [line.strip() for line in f if line.strip()]
- readSingleAln = False
- readPartAln = False
- partOpt = False
- genericOpt = False
- for line in lines:
- #print line
- if line == 'START_SINGLE_ALN':
- readSingleAln = True
- continue
- if line == 'END_SINGLE_ALN':
- readSingleAln = False
- continue
- if readSingleAln:
- singleAln.append(line)
- if line == 'START_PARTITION_ALN':
- readPartAln = True
- continue
- if line == 'END_PARTITION_ALN':
- readPartAln = False
- continue
- if readPartAln:
- partitionAln.append(line.split())
- if line == 'START_PARTITION_OPTIONS':
- partOpt = True
- continue
- if line == 'END_PARTITION_OPTIONS':
- partOpt = False
- continue
- if line == 'START_GENERIC_OPTIONS':
- genericOpt = True
- continue
- if line == 'END_GENERIC_OPTIONS':
- genericOpt = False
- continue
- if partOpt:
- partOpts.append(line)
- if genericOpt:
- genericOpts.append(line)
- return (singleAln, partitionAln, genericOpts, partOpts)
-
-
-if __name__ == '__main__':
- usage = "USAGE: %prog [options]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option('-r','--release', dest="release_bin", help='Path to release binary', default="iqtree_release")
- parser.add_option('-t','--test', dest="test_bin", help='Path to test binary', default="iqtree_test")
- parser.add_option('-c','--config', dest="config_file", help='Path to test configuration file')
- parser.add_option('-o','--out_file', dest="out_file", help='Name of the output file', default="iqtree_test_cmds.txt")
- (options, args) = parser.parse_args()
- if len(sys.argv) == 1:
- parser.print_help()
- exit(0)
- (singleAln, partitionAln, genericOpts, partOpts) = parse_config(options.config_file)
- testCmds = []
- # Generate test commands for single model
- for aln in singleAln:
- for opt in genericOpts:
- cmd = '-s ' + aln + ' ' + opt
- testCmds.append(cmd)
- # Generate test commands for partition model
- for aln in partitionAln:
- for opt in genericOpts:
- for partOpt in partOpts:
- cmd = '-s ' + aln[0] + ' ' + opt + ' ' + partOpt + ' ' + aln[1]
- testCmds.append(cmd)
- testNr = 1
- jobs = []
- for cmd in testCmds:
- testIDRel = options.release_bin + "_TEST_" + str(testNr)
- release = testIDRel + " " + options.release_bin + " -pre " + testIDRel + " " + cmd
- testIDTest = options.test_bin + "_TEST_" + str(testNr)
- test = testIDTest + " " + options.test_bin + " -pre " + testIDTest + " " + cmd
- testNr = testNr + 1
- jobs.append(release)
- jobs.append(test)
-# print "\n".join(jobs)
- outfile = open(options.out_file, "wb")
- for job in jobs:
- print >> outfile, job
- outfile.close()
-
-
-
diff --git a/test_scripts/run_tests.sh b/test_scripts/run_tests.sh
new file mode 100755
index 0000000..5db3c3c
--- /dev/null
+++ b/test_scripts/run_tests.sh
@@ -0,0 +1,50 @@
+#!/bin/bash -
+#===============================================================================
+#
+# FILE: run_tests.sh
+#
+# USAGE: ./run_tests.sh
+#
+# DESCRIPTION:
+#
+# OPTIONS: ---
+# REQUIREMENTS: ---
+# BUGS: ---
+# NOTES: ---
+# AUTHOR: Tung Nguyen (nltung at gmail.com),
+# ORGANIZATION:
+# CREATED: 2016-08-12 16:43:54 CEST
+# REVISION: ---
+#===============================================================================
+
+set -o nounset # Treat unset variables as an error
+
+if [ "$#" -lt 1 ]
+then
+ echo "Please enter the name of the local branch you want to compile"
+ echo "USAGE: $0 <branch_name> [<iqtree_flags_in_quotes>]" >&2
+ exit 1
+fi
+
+branchName=$1
+flags=$2
+
+if [ "$flags" == "" ]; then
+ flags="static"
+fi
+
+#Compile the specified branch
+source compile.sh ${branchName} "$flags"
+
+#Generate test cases
+echo -e "\nGENERATE TEST CASES FOR THE SEQUENTIAL VERSION\n"
+testCasesSeq="${branchName}-seq-test-cases.txt"
+./gen_test_standard.py -b ${buildDir}/iqtree -c test_configs.txt -o "${testCasesSeq}"
+echo "Test cases were writen to ${testCasesSeq}"
+./submit_jobs.sh 24 ${testCasesSeq} test_data test-results-${branchName}-seq
+
+echo -e "\nGENERATE TEST CASES FOR THE OMP VERSION\n"
+testCasesOMP="${branchName}-omp-test-cases.txt"
+./gen_test_standard.py -b ${buildDirOMP}/iqtree-omp -c test_configs.txt -o "${testCasesOMP}" -f "-nt 2"
+echo "Test cases were writen to ${testCasesOMP}"
+./submit_jobs.sh 48 ${testCasesOMP} test_data test-results-${branchName}-omp
diff --git a/test_scripts/submit_jobs.sh b/test_scripts/submit_jobs.sh
index 7b6f60c..62b1e7a 100755
--- a/test_scripts/submit_jobs.sh
+++ b/test_scripts/submit_jobs.sh
@@ -19,16 +19,15 @@
set -o nounset # Treat unset variables as an error
-if [ $# -ne 5 ]
+if [ $# -ne 4 ]
then
- echo "USAGE: $0 <number_of_threads> <cmd_file> <aln_dir> <out_dir> <binary_dir>"
+ echo "USAGE: $0 <number_of_threads> <cmd_file> <aln_dir> <out_dir>"
exit 1
fi
numThreads=$1
cmd_file=$2
aln_dir=$3
out_dir=$4
-binary_dir=$5
if [ -d $out_dir ]
then
@@ -37,9 +36,8 @@ fi
mkdir $out_dir
cp ${aln_dir}/* $out_dir
cp $cmd_file $out_dir
-cp ${binary_dir}/* ${out_dir}/
cd $out_dir
-submitCMD="submit2sge -N iqtree_system_test -q cluster -r zuseX -s $numThreads \"../jobmanager.py -f $cmd_file -c $numThreads\""
+submitCMD="submit2sge -N iqtree_system_test -s $numThreads \"../jobmanager.py -f $cmd_file -c $numThreads\""
#echo "../jobmanager.py -f $cmd_file -c $numThreads" | qsub -V -S /bin/bash -cwd -j y -r y -N iqtree_system_test -l zuseX -l cluster -pe threads 16 -q q.norm at zuse02
$submitCMD
cd ..
diff --git a/test_scripts/submitjob.sh b/test_scripts/submitjob.sh
deleted file mode 100755
index 660a4eb..0000000
--- a/test_scripts/submitjob.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-cd test_data
-echo "../jobmanager.py -f ../iqtree_test_cmds.txt -c 16" | qsub -V -S /bin/bash -cwd -j y -r y -N iqtree_system_test -l zuseX -l cluster -pe threads 16 -q q.norm at zuse02
diff --git a/test_scripts/test_configs.txt b/test_scripts/test_configs.txt
index fbaa35e..b2bca5a 100644
--- a/test_scripts/test_configs.txt
+++ b/test_scripts/test_configs.txt
@@ -1,9 +1,12 @@
START_PARTITION_ALN
example.phy example.nex
d59_8.phy d59_8.nex
+d69_31.phy d69_31.nex
END_PARTITION_ALN
START_SINGLE_ALN
+d59_8.phy
+d69_31.phy
example.phy
prot_M126_27_269.phy
END_SINGLE_ALN
@@ -15,13 +18,10 @@ START_PARTITION_OPTIONS
END_PARTITION_OPTIONS
START_GENERIC_OPTIONS
--m TEST -n 1000
--m TEST -bb 1000 -n 1000
--m TEST -alrt 1000 -n 1000
--m TEST -lbp 1000 -n 1000
--m TEST -bb 1000 -alrt 1000 -lbp 1000 -n 1000
--m TEST -b 10 -n 1000
+-m TEST
+-m TESTNEW
+-m TEST -bb 10000 -alrt 1000 -lbp 1000
+-m TESTNEW -bb 10000 -alrt 1000 -lbp 1000
+-m TEST -b 100
+-m TESTNEW -b 100
END_GENERIC_OPTIONS
-
-
-
diff --git a/test_scripts/test_data/d59_8.nex b/test_scripts/test_data/d59_8.nex
index 80342c0..b0c9a2b 100644
--- a/test_scripts/test_data/d59_8.nex
+++ b/test_scripts/test_data/d59_8.nex
@@ -8,6 +8,4 @@ charset phyb3rd = 4572-5753;
charset set5_8S = 5754-5913;
charset its2 = 5914-6177;
charset gbss13rd = 6178-6951;
-
-
end;
diff --git a/tools.cpp b/tools.cpp
index 4613cc1..4e5af85 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -20,14 +22,15 @@
-#if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32) && !defined(__CYGWIN__)
+#include "tools.h"
+#include "timeutil.h"
+#include "MPIHelper.h"
+
+#if defined(Backtrace_FOUND)
#include <execinfo.h>
#include <cxxabi.h>
#endif
-#include "tools.h"
-#include "timeutil.h"
-
VerboseMode verbose_mode;
/*
@@ -640,6 +643,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
verbose_mode = VB_MIN;
params.tree_gen = NONE;
params.user_file = NULL;
+ params.constraint_tree_file = NULL;
params.opt_gammai = true;
params.opt_gammai_fast = false;
params.opt_gammai_keep_bran = false;
@@ -750,7 +754,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.manuel_analytic_approx = false;
params.leastSquareNNI = false;
params.ls_var_type = OLS;
- params.maxCandidates = 1000;
+ params.maxCandidates = 20;
params.popSize = 5;
params.p_delete = -1;
params.min_iterations = -1;
@@ -777,6 +781,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.num_rate_cats = 4;
params.max_rate_cats = 10;
params.gamma_shape = -1.0;
+ params.min_gamma_shape = MIN_GAMMA_SHAPE;
params.gamma_median = false;
params.p_invar_sites = -1.0;
params.optimize_model_rate_joint = false;
@@ -790,6 +795,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.iqp = false;
params.write_intermediate_trees = 0;
// params.avoid_duplicated_trees = false;
+ params.writeDistImdTrees = false;
params.rf_dist_mode = 0;
params.mvh_site_rate = false;
params.rate_mh_type = true;
@@ -801,12 +807,17 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.aBayes_test = false;
params.localbp_replicates = 0;
params.SSE = LK_EIGEN_SSE;
- params.lk_no_avx = false;
+ params.lk_no_avx = 0;
+ params.lk_safe_scaling = false;
+ params.numseq_safe_scaling = 2000;
params.print_site_lh = WSL_NONE;
+ params.print_partition_lh = false;
params.print_site_prob = WSL_NONE;
params.print_site_state_freq = WSF_NONE;
params.print_site_rate = false;
params.print_trees_site_posterior = 0;
+ params.print_ancestral_sequence = AST_NONE;
+ params.min_ancestral_prob = 0.95;
params.print_tree_lh = false;
params.lambda = 1;
params.speed_conf = 1.0;
@@ -854,6 +865,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.step_iterations = 100;
// params.store_candidate_trees = false;
params.print_ufboot_trees = 0;
+ params.contree_rfdist = -1;
//const double INF_NNI_CUTOFF = -1000000.0;
params.nni_cutoff = -1000000.0;
params.estimate_nni_cutoff = false;
@@ -871,7 +883,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
#else
params.pll = false;
#endif
- params.modeps = 0.01;
+ params.modelEps = 0.01;
params.parbran = false;
params.binary_aln_file = NULL;
params.maxtime = 1000000;
@@ -881,9 +893,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
// params.autostop = true; // turn on auto stopping rule by default now
params.unsuccess_iteration = 100;
params.speednni = true; // turn on reduced hill-climbing NNI by default now
- params.reduction = false;
params.numInitTrees = 100;
- params.fix_stable_splits = false;
+ params.fixStableSplits = false;
+ params.stableSplitThreshold = 0.9;
+ params.five_plus_five = false;
+ params.memCheck = false;
+ params.tabu = false;
+ params.adaptPertubation = false;
params.numSupportTrees = 20;
// params.sprDist = 20;
params.sprDist = 6;
@@ -894,7 +910,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.site_freq_file = NULL;
params.tree_freq_file = NULL;
#ifdef _OPENMP
- params.num_threads = 0;
+ params.num_threads = -1;
#else
params.num_threads = 1;
#endif
@@ -913,7 +929,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.print_splits_file = false;
params.ignore_identical_seqs = true;
params.write_init_tree = false;
- params.write_local_optimal_trees = false;
+ params.write_candidate_trees = false;
params.freq_const_patterns = NULL;
params.no_rescale_gamma_invar = false;
params.compute_seq_identity_along_tree = false;
@@ -1055,8 +1071,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.find_all = true;
continue;
}
- if (strcmp(argv[cnt], "-g") == 0
- || strcmp(argv[cnt], "--greedy") == 0) {
+ if (strcmp(argv[cnt], "--greedy") == 0) {
params.run_mode = GREEDY;
continue;
}
@@ -1867,7 +1882,9 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
}
if (strcmp(argv[cnt], "-lmd") == 0) {
cnt++;
- params.lambda = convert_double(argv[cnt]);
+ if (cnt >= argc)
+ throw "Use -lmd <lambda>";
+ params.lambda = convert_double(argv[cnt]);
if (params.lambda > 1.0)
throw "Lambda must be in (0,1]";
continue;
@@ -1890,9 +1907,30 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
continue;
}
if (strcmp(argv[cnt], "-noavx") == 0) {
- params.lk_no_avx = true;
+ params.lk_no_avx = 1;
+ continue;
+ }
+ if (strcmp(argv[cnt], "-nofma") == 0) {
+ params.lk_no_avx = 2;
continue;
}
+
+ if (strcmp(argv[cnt], "-safe") == 0) {
+ params.lk_safe_scaling = true;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-safe-seq") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "-safe-seq <number of sequences>";
+ params.numseq_safe_scaling = convert_int(argv[cnt]);
+ if (params.numseq_safe_scaling < 10)
+ throw "Too small -safe-seq";
+ continue;
+ }
+
+
if (strcmp(argv[cnt], "-f") == 0) {
cnt++;
if (cnt >= argc)
@@ -1929,7 +1967,8 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
if (cnt >= argc)
throw "Use -ft <treefile_to_infer_site_frequency_model>";
params.tree_freq_file = argv[cnt];
- params.print_site_state_freq = WSF_POSTERIOR_MEAN;
+ if (params.print_site_state_freq == WSF_NONE)
+ params.print_site_state_freq = WSF_POSTERIOR_MEAN;
continue;
}
@@ -1972,10 +2011,21 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
if (cnt >= argc)
throw "Use -a <gamma_shape>";
params.gamma_shape = convert_double(argv[cnt]);
-// if (params.gamma_shape < 0)
-// throw "Wrong number of gamma shape parameter (alpha)";
+ if (params.gamma_shape <= 0)
+ throw "Wrong gamma shape parameter (alpha)";
continue;
}
+
+ if (strcmp(argv[cnt], "-amin") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Use -amin <min_gamma_shape>";
+ params.min_gamma_shape = convert_double(argv[cnt]);
+ if (params.min_gamma_shape <= 0)
+ throw "Wrong minimum gamma shape parameter (alpha)";
+ continue;
+ }
+
if (strcmp(argv[cnt], "-gmean") == 0) {
params.gamma_median = false;
continue;
@@ -2133,15 +2183,27 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.iqp = true;
continue;
}
- if (strcmp(argv[cnt], "-wlt") == 0) {
- // write all candidate trees
- params.write_local_optimal_trees = true;
+ if (strcmp(argv[cnt], "-wct") == 0) {
+ params.write_candidate_trees = true;
continue;
}
+
if (strcmp(argv[cnt], "-wt") == 0) {
params.write_intermediate_trees = 1;
continue;
}
+
+ if (strcmp(argv[cnt], "-wdt") == 0) {
+ params.writeDistImdTrees = true;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-wtc") == 0) {
+ params.write_intermediate_trees = 1;
+ params.print_tree_lh = true;
+ continue;
+ }
+
if (strcmp(argv[cnt], "-wt2") == 0) {
params.write_intermediate_trees = 2;
// params.avoid_duplicated_trees = true;
@@ -2206,6 +2268,8 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
}
if (strcmp(argv[cnt], "-alrt") == 0) {
cnt++;
+ if (cnt >= argc)
+ throw "Use -alrt <#replicates | 0>";
int reps = convert_int(argv[cnt]);
if (reps == 0)
params.aLRT_test = true;
@@ -2222,6 +2286,8 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
}
if (strcmp(argv[cnt], "-lbp") == 0) {
cnt++;
+ if (cnt >= argc)
+ throw "Use -lbp <#replicates>";
params.localbp_replicates = convert_int(argv[cnt]);
if (params.localbp_replicates < 1000
&& params.localbp_replicates != 0)
@@ -2232,6 +2298,12 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.print_site_lh = WSL_SITE;
continue;
}
+
+ if (strcmp(argv[cnt], "-wpl") == 0) {
+ params.print_partition_lh = true;
+ continue;
+ }
+
if (strcmp(argv[cnt], "-wslg") == 0 || strcmp(argv[cnt], "-wslr") == 0) {
params.print_site_lh = WSL_RATECAT;
continue;
@@ -2260,6 +2332,28 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
continue;
}
+ if (strcmp(argv[cnt], "-asr") == 0) {
+ params.print_ancestral_sequence = AST_MARGINAL;
+ params.ignore_identical_seqs = false;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-asr-min") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Use -asr-min <probability>";
+
+ params.min_ancestral_prob = convert_double(argv[cnt]);
+ if (params.min_ancestral_prob < 0.5 || params.min_ancestral_prob > 1)
+ throw "Minimum ancestral probability [-asr-min] must be between 0.5 and 1.0";
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-asr-joint") == 0) {
+ params.print_ancestral_sequence = AST_JOINT;
+ params.ignore_identical_seqs = false;
+ continue;
+ }
if (strcmp(argv[cnt], "-wsr") == 0) {
params.print_site_rate = true;
@@ -2273,7 +2367,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.print_site_state_freq = WSF_POSTERIOR_MEAN;
continue;
}
- if (strcmp(argv[cnt], "-wsfm") == 0) {
+ if (strcmp(argv[cnt], "-wsfm") == 0 || strcmp(argv[cnt], "-fmax") == 0) {
params.print_site_state_freq = WSF_POSTERIOR_MAX;
continue;
}
@@ -2524,12 +2618,28 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
// params.store_candidate_trees = false;
// continue;
// }
- if (strcmp(argv[cnt], "-lhmemsave") == 0) {
- params.lh_mem_save = LM_PER_NODE;
- continue;
- }
- if (strcmp(argv[cnt], "-nolhmemsave") == 0) {
- params.lh_mem_save = LM_ALL_BRANCH;
+ if (strcmp(argv[cnt], "-mem") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Use -mem max_mem_size";
+ params.lh_mem_save = LM_MEM_SAVE;
+ int end_pos;
+ double mem = convert_double(argv[cnt], end_pos);
+ if (mem < 0)
+ throw "-mem must be non-negative";
+ if (argv[cnt][end_pos] == 'G') {
+ params.max_mem_size = mem * 1073741824.0;
+ } else if (argv[cnt][end_pos] == 'M') {
+ params.max_mem_size = mem * 1048576.0;
+ } else if (argv[cnt][end_pos] == '%'){
+ params.max_mem_size = mem * 0.01;
+ if (params.max_mem_size > 1)
+ throw "-mem percentage must be between 0 and 100";
+ } else {
+ if (mem > 1)
+ throw "Invalid -mem option. Example: -mem 200M, -mem 10G";
+ params.max_mem_size = mem;
+ }
continue;
}
// if (strcmp(argv[cnt], "-storetrees") == 0) {
@@ -2586,30 +2696,66 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.stop_condition = SC_REAL_TIME;
continue;
}
- if (strcmp(argv[cnt], "-numpars") == 0) {
+ if (strcmp(argv[cnt], "-numpars") == 0 || strcmp(argv[cnt], "-ninit") == 0) {
cnt++;
if (cnt >= argc)
- throw "Use -numpars <number_of_parsimony_trees>";
+ throw "Use -ninit <number_of_parsimony_trees>";
params.numInitTrees = convert_int(argv[cnt]);
+ if (params.numInitTrees < 0)
+ throw "-ninit must be non-negative";
if (params.numInitTrees < params.numNNITrees)
params.numNNITrees = params.numInitTrees;
continue;
}
if (strcmp(argv[cnt], "-fss") == 0) {
- params.fix_stable_splits = true;
+ params.fixStableSplits = true;
+// params.five_plus_five = true;
+ continue;
+ }
+ if (strcmp(argv[cnt], "--stable-thres") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Use --stable-thres <support_value_threshold>";
+ params.stableSplitThreshold = convert_double(argv[cnt]);
+ continue;
+ }
+ if (strcmp(argv[cnt], "-ff") == 0) {
+ params.five_plus_five = true;
continue;
}
- if (strcmp(argv[cnt], "-toppars") == 0) {
+
+ if (strcmp(argv[cnt], "-tabu") == 0) {
+ params.fixStableSplits = true;
+ params.tabu = true;
+ params.maxCandidates = params.numSupportTrees;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "--adt-pert") == 0) {
+ if (params.tabu == true) {
+ outError("option -tabu and --adt-pert cannot be combined");
+ }
+ params.adaptPertubation = true;
+ params.stableSplitThreshold = 1.0;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-memcheck") == 0) {
+ params.memCheck = true;
+ continue;
+ }
+
+ if (strcmp(argv[cnt], "-toppars") == 0 || strcmp(argv[cnt], "-ntop") == 0) {
cnt++;
if (cnt >= argc)
- throw "Use -toppars <number_of_top_parsimony_trees>";
+ throw "Use -ntop <number_of_top_parsimony_trees>";
params.numNNITrees = convert_int(argv[cnt]);
continue;
}
- if (strcmp(argv[cnt], "-nsp") == 0) {
+ if (strcmp(argv[cnt], "--num-sup-trees") == 0) {
cnt++;
if (cnt >= argc)
- throw "Use -nsp <number_of_support_trees>";
+ throw "Use --num-sup-trees <number_of_support_trees>";
params.numSupportTrees = convert_int(argv[cnt]);
continue;
}
@@ -2664,10 +2810,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
continue;
}
if (strcmp(argv[cnt], "-popsize") == 0
- || strcmp(argv[cnt], "-numcand") == 0) {
+ || strcmp(argv[cnt], "-numcand") == 0 || strcmp(argv[cnt], "-nbest") == 0) {
cnt++;
if (cnt >= argc)
- throw "Use -numcand <number_of_candidate_trees>";
+ throw "Use -nbest <number_of_candidate_trees>";
params.popSize = convert_int(argv[cnt]);
assert(params.popSize < params.numInitTrees);
continue;
@@ -2689,10 +2835,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
cnt++;
if (cnt >= argc)
throw "Use -me <model_epsilon>";
- params.modeps = convert_double(argv[cnt]);
- if (params.modeps <= 0.0)
+ params.modelEps = convert_double(argv[cnt]);
+ if (params.modelEps <= 0.0)
throw "Model epsilon must be positive";
- if (params.modeps > 0.1)
+ if (params.modelEps > 0.1)
throw "Model epsilon must not be larger than 0.1";
continue;
}
@@ -2704,10 +2850,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
params.speednni = false;
continue;
}
- if (strcmp(argv[cnt], "-reduction") == 0) {
- params.reduction = true;
- continue;
- }
+
if (strcmp(argv[cnt], "-snni") == 0) {
params.snni = true;
// dont need to turn this on here
@@ -2720,17 +2863,21 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
if (strcmp(argv[cnt], "-iqpnni") == 0) {
params.snni = false;
params.start_tree = STT_BIONJ;
- params.reduction = false;
params.numNNITrees = 1;
// continue; } if (strcmp(argv[cnt], "-auto") == 0) {
// params.autostop = true;
continue;
}
- if (strcmp(argv[cnt], "-stop_cond") == 0 || strcmp(argv[cnt], "-numstop") == 0) {
+ if (strcmp(argv[cnt], "-stop_cond") == 0 || strcmp(argv[cnt], "-numstop") == 0
+ || strcmp(argv[cnt], "-nstop") == 0) {
if (params.stop_condition != SC_BOOTSTRAP_CORRELATION)
params.stop_condition = SC_UNSUCCESS_ITERATION;
cnt++;
+ if (cnt >= argc)
+ throw "Use -nstop <#iterations>";
params.unsuccess_iteration = convert_int(argv[cnt]);
+ if (params.unsuccess_iteration <= 0)
+ throw "-nstop iterations must be positive";
continue;
}
if (strcmp(argv[cnt], "-lsbran") == 0) {
@@ -2881,10 +3028,14 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
if (strcmp(argv[cnt], "-omp") == 0 || strcmp(argv[cnt], "-nt") == 0) {
cnt++;
if (cnt >= argc)
- throw "Use -nt <num_threads>";
- params.num_threads = convert_int(argv[cnt]);
- if (params.num_threads < 1)
- throw "At least 1 thread please";
+ throw "Use -nt <num_threads|AUTO>";
+ if (strcmp(argv[cnt], "AUTO") == 0)
+ params.num_threads = 0;
+ else {
+ params.num_threads = convert_int(argv[cnt]);
+ if (params.num_threads < 1)
+ throw "At least 1 thread please";
+ }
continue;
}
// if (strcmp(argv[cnt], "-rootstate") == 0) {
@@ -2939,6 +3090,14 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
continue;
}
+ if (strcmp(argv[cnt], "-g") == 0) {
+ cnt++;
+ if (cnt >= argc)
+ throw "Use -g <constraint_tree>";
+ params.constraint_tree_file = argv[cnt];
+ continue;
+ }
+
if (strcmp(argv[cnt], "-lmap") == 0) {
cnt++;
if (cnt >= argc)
@@ -3012,6 +3171,10 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
continue;
}
+ if (strcmp(argv[cnt], "--no-uniqueseq") == 0) {
+ params.suppress_output_flags |= OUT_UNIQUESEQ;
+ continue;
+ }
if (argv[cnt][0] == '-') {
string err = "Invalid \"";
@@ -3051,6 +3214,13 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
usage(argv, false);
#endif
}
+
+ if (params.do_au_test && params.topotest_replicates == 0)
+ outError("For AU test please please specify number of bootstrap replicates via -zb option");
+
+ if (params.lh_mem_save == LM_MEM_SAVE && params.partition_file)
+ outError("-mem option does not work with partition models yet");
+
if (!params.out_prefix) {
if (params.eco_dag_file)
params.out_prefix = params.eco_dag_file;
@@ -3065,6 +3235,12 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
else
params.out_prefix = params.user_file;
}
+// if (MPIHelper::getInstance().isWorker()) {
+ // BUG: setting out_prefix this way cause access to stack, which is cleaned up after returning from this function
+// string newPrefix = string(params.out_prefix) + "." + NumberToString(MPIHelper::getInstance().getProcessID()) ;
+// params.out_prefix = (char *) newPrefix.c_str();
+// }
+
}
extern void printCopyright(ostream &out);
@@ -3161,6 +3337,8 @@ void usage_iqtree(char* argv[], bool full_command) {
<< " -v, -vv, -vvv Verbose mode, printing more messages to screen" << endl
<< " -quiet Silent mode, suppress printing to screen (stdout)" << endl
<< " -keep-ident Keep identical sequences (default: remove & finally add)" << endl
+ << " -safe Safe likelihood kernel to avoid numerical underflow" << endl
+ << " -mem RAM Maximal RAM usage for memory saving mode" << endl
<< endl << "CHECKPOINTING TO RESUME STOPPED RUN:" << endl
<< " -redo Redo analysis even for successful runs (default: resume)" << endl
<< " -cptime <seconds> Minimum checkpoint time interval (default: 20)" << endl
@@ -3170,16 +3348,17 @@ void usage_iqtree(char* argv[], bool full_command) {
<< " -wql Print quartet log-likelihoods to .quartetlh file" << endl
<< endl << "NEW STOCHASTIC TREE SEARCH ALGORITHM:" << endl
// << " -pll Use phylogenetic likelihood library (PLL) (default: off)" << endl
- << " -numpars <number> Number of initial parsimony trees (default: 100)" << endl
- << " -toppars <number> Number of best parsimony trees (default: 20)" << endl
- << " -sprrad <number> Radius for parsimony SPR search (default: 6)" << endl
- << " -numcand <number> Size of the candidate tree set (defaut: 5)" << endl
+ << " -ninit <number> Number of initial parsimony trees (default: 100)" << endl
+ << " -ntop <number> Number of top initial trees (default: 20)" << endl
+ << " -nbest <number> Number of best trees retained during search (defaut: 5)" << endl
+ << " -n <#iterations> Fix number of iterations to <#iterations> (default: auto)" << endl
+ << " -nstop <number> Number of unsuccessful iterations to stop (default: 100)" << endl
<< " -pers <proportion> Perturbation strength for randomized NNI (default: 0.5)" << endl
+ << " -sprrad <number> Radius for parsimony SPR search (default: 6)" << endl
<< " -allnni Perform more thorough NNI search (default: off)" << endl
- << " -numstop <number> Number of unsuccessful iterations to stop (default: 100)" << endl
- << " -n <#iterations> Fix number of iterations to <#iterations> (default: auto)" << endl
- << " -iqp Use the IQP tree perturbation (default: randomized NNI)" << endl
- << " -iqpnni Switch back to the old IQPNNI tree search algorithm" << endl
+ << " -g <constraint_tree> (Multifurcating) topological constraint tree file" << endl
+// << " -iqp Use the IQP tree perturbation (default: randomized NNI)" << endl
+// << " -iqpnni Switch back to the old IQPNNI tree search algorithm" << endl
<< endl << "ULTRAFAST BOOTSTRAP:" << endl
<< " -bb <#replicates> Ultrafast bootstrap (>=1000)" << endl
<< " -wbt Write bootstrap trees to .ufboot file (default: none)" << endl
@@ -3259,6 +3438,7 @@ void usage_iqtree(char* argv[], bool full_command) {
<< " Invar, Gamma, Invar+Gamma, or FreeRate model where 'n' is" << endl
<< " number of categories (default: n=4)" << endl
<< " -a <Gamma_shape> Gamma shape parameter for site rates (default: estimate)" << endl
+ << " -amin <min_shape> Min Gamma shape parameter for site rates (default: 0.02)" << endl
<< " -gmedian Median approximation for +G site rates (default: mean)" << endl
<< " --opt-gamma-inv More thorough estimation for +I+G model parameters" << endl
<< " -i <p_invar> Proportion of invariable sites (default: estimate)" << endl
@@ -3268,7 +3448,8 @@ void usage_iqtree(char* argv[], bool full_command) {
<< endl << "SITE-SPECIFIC FREQUENCY MODEL:" << endl
<< " -ft <tree_file> Input tree to infer site frequency model" << endl
<< " -fs <in_freq_file> Input site frequency model file" << endl
- << " -wsf Write site frequency model to .sitefreq file" << endl
+ << " -fmax Posterior maximum instead of posterior mean approximation" << endl
+ //<< " -wsf Write site frequency model to .sitefreq file" << endl
//<< " -c <#categories> Number of Gamma rate categories (default: 4)" << endl
// << endl << "TEST OF MODEL HOMOGENEITY:" << endl
// << " -m WHTEST Testing model (GTR+G) homogeneity assumption using" << endl
@@ -3308,6 +3489,12 @@ void usage_iqtree(char* argv[], bool full_command) {
<< " -zb <#replicates> Performing BP,KH,SH,ELW tests for trees passed via -z" << endl
<< " -zw Also performing weighted-KH and weighted-SH tests" << endl
<< " -au Also performing approximately unbiased (AU) test" << endl
+// << endl << "ANCESTRAL SEQUENCE RECONSTRUCTION:" << endl
+// << " -asr Compute ancestral states by marginal reconstruction" << endl
+// << " -asr-min <prob> Min probability to assign ancestral sequence (default: 0.95)" << endl
+// << " -wja Write ancestral sequences by joint reconstruction" << endl
+
+
<< endl;
cout << "GENERATING RANDOM TREES:" << endl;
@@ -3333,6 +3520,7 @@ void usage_iqtree(char* argv[], bool full_command) {
<< " -wspr Write site probabilities per rate category" << endl
<< " -wspm Write site probabilities per mixture class" << endl
<< " -wspmr Write site probabilities per mixture+rate class" << endl
+ << " -wpl Write partition log-likelihoods to .partlh file" << endl
<< " -fconst f1,...,fN Add constant patterns into alignment (N=#nstates)" << endl
<< " -me <epsilon> Logl epsilon for model parameter optimization (default 0.01)" << endl
<< " --no-outfiles Suppress printing output files" << endl;
@@ -3372,7 +3560,7 @@ void quickStartGuide() {
#endif
<< "To show all available options: run 'iqtree -h'" << endl << endl
<< "Have a look at the tutorial and manual for more information:" << endl
- << " http://www.cibiv.at/software/iqtree" << endl << endl;
+ << " http://www.iqtree.org" << endl << endl;
exit(0);
}
@@ -3659,10 +3847,12 @@ int random_int(int n, int *rstream) {
return (int) floor(random_double(rstream) * n);
} /* randominteger */
-//int randint(int a, int b) {
-// return a + (RAND_MAX * rand() + rand()) % (b + 1 - a);
-//}
-//
+/* returns a random integer in the range [a; b] */
+int random_int(int a, int b) {
+ assert(b > a);
+ //return a + (RAND_MAX * rand() + rand()) % (b + 1 - a);
+ return a + random_int(b - a);
+}
double random_double(int *rstream) {
#ifndef FIXEDINTRAND
@@ -3802,6 +3992,8 @@ void trimString(string &str) {
str.erase(str.find_last_not_of(" \n\r\t")+1);
}
+
+
Params& Params::getInstance() {
static Params instance;
return instance;
@@ -3819,6 +4011,7 @@ int countPhysicalCPUCores() {
#else
logicalcpucount = sysconf( _SC_NPROCESSORS_ONLN );
#endif
+ if (logicalcpucount < 1) logicalcpucount = 1;
return logicalcpucount;
if (logicalcpucount % 2 != 0)
@@ -3837,6 +4030,7 @@ int countPhysicalCPUCores() {
} else {
physicalcpucount = logicalcpucount;
}
+ if (physicalcpucount < 1) physicalcpucount = 1;
return physicalcpucount;
}
@@ -3845,7 +4039,7 @@ int countPhysicalCPUCores() {
/** Print a demangled stack backtrace of the caller function to FILE* out. */
-#if defined(WIN32) || defined(__CYGWIN__)
+#if !defined(Backtrace_FOUND)
// donothing for WIN32
void print_stacktrace(ostream &out, unsigned int max_frames) {}
@@ -3976,10 +4170,19 @@ void print_stacktrace(ostream &out, unsigned int max_frames)
}
-#endif // WIN32
+#endif // Backtrace_FOUND
bool memcmpcpy(void * destination, const void * source, size_t num) {
bool diff = (memcmp(destination, source, num) != 0);
memcpy(destination, source, num);
return diff;
}
+
+// Pairing function: see https://en.wikipedia.org/wiki/Pairing_function
+int pairInteger(int int1, int int2) {
+ if (int1 <= int2) {
+ return ((int1 + int2)*(int1 + int2 + 1)/2 + int2);
+ } else {
+ return ((int1 + int2)*(int1 + int2 + 1)/2 + int1);
+ }
+}
diff --git a/tools.h b/tools.h
index 480648d..2e2f380 100644
--- a/tools.h
+++ b/tools.h
@@ -1,6 +1,8 @@
/***************************************************************************
- * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
- * minh.bui at univie.ac.at *
+ * Copyright (C) 2009-2015 by *
+ * BUI Quang Minh <minh.bui at univie.ac.at> *
+ * Lam-Tung Nguyen <nltung at gmail.com> *
+ * *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
@@ -32,6 +34,8 @@
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
+#include <string.h>
+#include <sstream>
//#include <sys/time.h>
//#include <time.h>
@@ -61,10 +65,10 @@ inline void _my_assert(const char* expression, const char *func, const char* fil
#define USE_HASH_MAP
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(GCC_VERSION)
#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#else
-#define GCC_VERSION 0
+//#else
+//#define GCC_VERSION 0
#endif
// for MSVC
@@ -73,13 +77,23 @@ inline void _my_assert(const char* expression, const char *func, const char* fil
#endif
#if defined(USE_HASH_MAP)
+// #include <unordered_map>
+// #include <unordered_set>
+
#if defined(_MSC_VER)
#include <unordered_map>
#include <unordered_set>
#elif defined(__clang__)
- #include <tr1/unordered_map>
- #include <tr1/unordered_set>
- using namespace std::tr1;
+ // libc++ detected: _LIBCPP_VERSION
+ // libstdc++ detected: __GLIBCXX__
+ #if __has_include(<unordered_map>) // defines _LIBCPP_VERSION
+ #include <unordered_map>
+ #include <unordered_set>
+ #else
+ #include <tr1/unordered_map>
+ #include <tr1/unordered_set>
+ using namespace std::tr1;
+ #endif
#elif !defined(__GNUC__)
#include <hash_map>
#include <hash_set>
@@ -95,6 +109,7 @@ inline void _my_assert(const char* expression, const char *func, const char* fil
#include <tr1/unordered_set>
using namespace std::tr1;
#endif
+
#else
#include <map>
#include <set>
@@ -222,7 +237,7 @@ typedef vector<string> StrVector;
/**
matrix of double number
*/
-#define matrix(T) vector<vector<T> >
+#define matrix(T) vector< vector<T> >
/**
matrix of double
@@ -287,6 +302,7 @@ const int WT_NEWLINE = 128;
const int WT_BR_LEN_FIXED_WIDTH = 256;
const int WT_BR_ID = 512;
const int WT_BR_LEN_ROUNDING = 1024;
+const int WT_BR_LEN_SHORT = 2048; // store only 6 digits after the comma for branch lengths
const int TRUE = 1;
const int FALSE = 0;
@@ -414,7 +430,7 @@ enum LikelihoodKernel {
};
enum LhMemSave {
- LM_DETECT, LM_ALL_BRANCH, LM_PER_NODE
+ LM_PER_NODE, LM_MEM_SAVE
};
enum SiteLoglType {
@@ -425,6 +441,10 @@ enum SiteFreqType {
WSF_NONE, WSF_POSTERIOR_MEAN, WSF_POSTERIOR_MAX
};
+enum AncestralSeqType {
+ AST_NONE, AST_MARGINAL, AST_JOINT
+};
+
const int BRLEN_OPTIMIZE = 0; // optimize branch lengths
const int BRLEN_FIX = 1; // fix branch lengths
const int BRLEN_SCALE = 2; // scale branch lengths
@@ -432,6 +452,15 @@ const int BRLEN_SCALE = 2; // scale branch lengths
const int OUT_LOG = 1; // .log file written or not
const int OUT_TREEFILE = 2; // .treefile file written or not
const int OUT_IQTREE = 4; // .iqtree file written or not
+const int OUT_UNIQUESEQ = 8; // .uniqueseq file written or not
+
+
+const double MIN_GAMMA_RATE = 1e-6;
+// change from 0.01 to 0.02 as 0.01 causes numerical problems
+const double MIN_GAMMA_SHAPE = 0.02;
+const double MAX_GAMMA_SHAPE = 1000.0;
+const double TOL_GAMMA_SHAPE = 0.001;
+
/** maximum number of newton-raphson steps for NNI branch evaluation */
extern int NNI_MAX_NR_STEP;
@@ -456,6 +485,32 @@ private:
public:
/**
+ * Fast and accurate optimiation for alpha and p_invar
+ */
+ bool fai;
+
+ /**
+ * Option to check memory consumption only
+ */
+ bool memCheck;
+
+ /**
+ * The support threshold for stable splits (Default = 0.9)
+ */
+ double stableSplitThreshold;
+
+ /**
+ * Option for adaptive perturbation.
+ * Branches that are shared among all candidate trees will be perturbed
+ */
+ bool adaptPertubation;
+
+ /**
+ * Option to do mutlipe start for estimating alpha and p_invar
+ */
+ bool testAlpha;
+
+ /**
* Restart the optimization of alpha and pinvar from different starting
* pinv values (supercedes the option testAlpha
*/
@@ -491,17 +546,30 @@ public:
bool exh_ai;
/**
- * User file contains the alpha and invar parameters
+ * Text file contain all pairs of alpha and p_invar to
+ * evaluate.
+ * TODO Remove this option and implement the exhaustive search
+ * directly into IQ-TREE
*/
char* alpha_invar_file;
/**
+ * Enable tabu search for NNI
+ */
+ bool tabu;
+
+ /**
+ * Use (5+5)-ES strategy
+ */
+ bool five_plus_five;
+
+ /**
* Turn on feature to identify stable splits and fix them during tree search
*/
- bool fix_stable_splits;
+ bool fixStableSplits;
/**
- * Number of distinct locally optimal trees
+ * Number of best trees used to compute stable splits
*/
int numSupportTrees;
@@ -523,11 +591,14 @@ public:
/**
* Number of best trees in the candidate set used to generate perturbed trees
+ * In term of evolutionary algorithm, this is the population size
*/
int popSize;
/**
* Maximum number of trees stored in the candidate tree set
+ * This is just a technical constraint to ensure that the candidate tree set
+ * does not have to store a lot of trees
*/
int maxCandidates;
@@ -536,10 +607,6 @@ public:
*/
bool speednni;
- /**
- * use reduction technique to constraint tree space
- */
- bool reduction;
/**
* portion of NNI used for perturbing the tree
@@ -549,7 +616,7 @@ public:
/**
* logl epsilon for model parameter optimization
*/
- double modeps;
+ double modelEps;
/**
* New search heuristics (DEFAULT: ON)
@@ -672,6 +739,9 @@ public:
/* type of starting tree */
START_TREE_TYPE start_tree;
+ /** name of constraint tree file in NEWICK format */
+ char *constraint_tree_file;
+
/**
prefix of the output file, default is the same as input file
*/
@@ -1227,6 +1297,11 @@ public:
double gamma_shape;
/**
+ minimum shape parameter (alpha) of the Gamma distribution for site rates
+ */
+ double min_gamma_shape;
+
+ /**
TRUE to use median rate for discrete categories, FALSE to use mean rate instead
*/
bool gamma_median;
@@ -1299,15 +1374,23 @@ public:
char *bootstrap_spec;
/**
- 1 if output all intermediate trees from every IQPNNI iteration
+ 1 if output all intermediate trees (initial trees, NNI-optimal trees and trees after each NNI step)
2 if output all intermediate trees + 1-NNI-away trees
*/
int write_intermediate_trees;
/**
- * Write out all candidate trees (the locally optimal trees)
+ * Write all distinct intermediate trees and there likelihoods
+ * Note: intermediate trees are trees that have been visited by the search. These include trees created by
+ * NNI-steps within each NNI iteration.
*/
- int write_local_optimal_trees;
+ bool writeDistImdTrees;
+
+ /**
+ * Write trees obtained at the end of each NNI search
+ */
+ bool write_candidate_trees;
+
/**
TRUE to avoid duplicated trees while writing intermediate trees
@@ -1368,7 +1451,13 @@ public:
LikelihoodKernel SSE;
/** TRUE to not use AVX even available in CPU, default: FALSE */
- bool lk_no_avx;
+ int lk_no_avx;
+
+ /** TRUE for safe numerical scaling (per category; used for large trees), default: FALSE */
+ bool lk_safe_scaling;
+
+ /** minimum number of sequences to always use safe scaling, default: 2000 */
+ int numseq_safe_scaling;
/**
WSL_NONE: do not print anything
@@ -1376,9 +1465,13 @@ public:
WSL_RATECAT: print site log-likelihood per rate category
WSL_MIXTURE: print site log-likelihood per mixture class
WSL_MIXTURE_RATECAT: print site log-likelihood per mixture class per rate category
+ WSL_STATE: print site log-likelihood per state
*/
SiteLoglType print_site_lh;
+ /** TRUE to print partition log-likelihood, default: FALSE */
+ bool print_partition_lh;
+
/**
control printing posterior probability of each site belonging to a rate/mixture categories
same meaning as print_site_lh, but results are printed to .siteprob file
@@ -1389,6 +1482,16 @@ public:
SiteLoglType print_site_prob;
/**
+ AST_NONE: do not print ancestral sequences (default)
+ AST_MARGINAL: print ancestral sequences by marginal reconstruction
+ AST_JOINT: print ancestral sequences by joint reconstruction
+ */
+ AncestralSeqType print_ancestral_sequence;
+
+ /** minimum probability to assign an ancestral state */
+ double min_ancestral_prob;
+
+ /**
0: print nothing
1: print site state frequency vectors
*/
@@ -1620,6 +1723,8 @@ public:
/** true to print all UFBoot trees to a file */
int print_ufboot_trees;
+ int contree_rfdist;
+
/****** variables for NNI cutoff heuristics ******/
/**
@@ -1722,6 +1827,9 @@ public:
* 1: only store 1 partial likelihood vector per node */
LhMemSave lh_mem_save;
+ /** maximum size of memory allowed to use */
+ double max_mem_size;
+
/* TRUE to print .splits file in star-dot format */
bool print_splits_file;
@@ -1733,7 +1841,6 @@ public:
/** frequencies of const patterns to be inserted into alignment */
char *freq_const_patterns;
-
/** BQM 2015-02-25: true to NOT rescale Gamma+Invar rates by (1-p_invar) */
bool no_rescale_gamma_invar;
@@ -1979,7 +2086,7 @@ int64_t convert_int64(const char *str) throw (string);
@param end_pos end position
@return the number
*/
-int64_t convert_int64(const char *str, int64_t &end_pos) throw (string);
+int64_t convert_int64(const char *str, int &end_pos) throw (string);
/**
convert string to double, with error checking
@@ -2400,4 +2507,29 @@ inline uint32_t popcount_lauradoux(unsigned *buf, int n) {
*/
bool memcmpcpy(void * destination, const void * source, size_t num);
+/**
+ * Generating a unique integer from a pair of 2 integer
+ * This method is called cantor pairing function (see wikepedia).
+ * @param int1 the first integer
+ * @param int2 the second integer
+ * @return the encoding of the 2 integer
+ */
+int pairInteger(int int1, int int2);
+
+template <typename T>
+string NumberToString ( T Number )
+{
+ ostringstream ss;
+ ss << Number;
+ return ss.str();
+}
+
+template <typename T>
+T StringToNumber ( const string &Text )
+{
+ istringstream ss(Text);
+ T result;
+ return ss >> result ? result : 0;
+}
+
#endif
diff --git a/vectorclass/changelog.txt b/vectorclass/changelog.txt
index a27dad3..4018a1b 100755
--- a/vectorclass/changelog.txt
+++ b/vectorclass/changelog.txt
@@ -1,18 +1,31 @@
change log for vectorclass.zip
------------------------------
-version 1.20
- * round functions: suppress precision exception under SSE4.1 and higher
+2016-09-27 version 1.23
+ * temporary fix of a problem in Clang version 3.9 inserted in vectorf128.h
+
+2016-05-03 version 1.22
+ * added optional namespace
+ * fixed problem with decimal.h
+
+2016-04-24 version 1.21
+ * fix problems with XOP option in gcc
+ * improved horizontal_and/or for sse2
+ * improved Vec2q and Vec4q constructor on Microsoft Visual Studio 2015
+ * removed warnings by gcc option -Wcast-qual
+
+2015-12-04 version 1.20
+ * round functions: suppress precision exception under SSE4.1 and higher
+ * fix compiler problems with AVX512 multiplication in gcc version 5.1
+ * fix compiler problems with pow function in Microsoft Visual Studio 2015
2015-11-14 version 1.19
* fix various problems with Clang compiler
-
2015-09-25 version 1.18
* fix compiler error for Vec8s divide_by_i(Vec8s const & x) under Clang compiler
* fix error in Vec4d::size() in vectorf256e.h
-
2015-07-31 version 1.17
* improved operator > for Vec4uq
* more special cases in blend4q
@@ -21,7 +34,6 @@ version 1.20
with macro named BTYPE in winnt.h
* fixed bug in Vec4db constructor
-
2014-10-24 version 1.16
* workaround for problem in Clang compiler extended to version 3.09 because not fixed yet by Clang
(vectorf128.h line 134)
@@ -31,7 +43,6 @@ version 1.20
* manual discusses dynamic allocation of arrays of vectors
* various minor changes
-
2014-10-17 version 1.15
* added files ranvec1.h and ranvec1.cpp for random number generator
* constructors to make boolean vectors from their elements
@@ -44,7 +55,6 @@ version 1.20
* explicit fused multiply-and-add used in math functions to improve performance
on compilers that don't automatically insert FMA
-
2014-07-24 version 1.14
* support for AVX-512f instruction set and 512-bit vectors:
Vec16i, Vec16ui, Vec8q, Vec8uq, Vec16f, Vec8d, and corresponding boolean vectors
@@ -56,14 +66,12 @@ version 1.20
* improved precision in exp2 and exp10 functions
* various bug fixes
-
2014-05-11 version 1.13
* pow function improved
* mul_add, mul_sub, mul_sub_x functions
* propagation of error codes through nan_code function
* "denormal" renamed to "subnormal" everywhere, in accordance with IEEE 754-2008 standard
-
2014-04-20 version 1.12
* inline implementation of mathematical functions added (vectormath_exp.h vectormath_trig.h vectormath_common.h)
* vectormath.h renamed to vectormath_lib.h because a new alternative is added
@@ -84,11 +92,9 @@ version 1.20
* removed signalling nan function
* minor improvements in various blend and lookup functions
-
2014-03-01 version 1.11
* fixed missing unsigned operators >>= in vectori256.h
-
2013-10-04 version 1.10
* clear distinction between boolean vectors and integer vectors for the sake of
compatibility with mask registers in forthcoming AVX512 instruction set
@@ -101,11 +107,9 @@ version 1.20
* workaround problem in MS Visual Studio 11.0. Bug report 735861 and 804274
* minor bug fixes
-
2013-03-31 version 1.03 beta
* bug fix for Vec2d cos (Vec2d const & x), VECTORMATH = 1
-
2012-08-01 version 1.02 beta
* added file vector3d.h for 3-dimensional vectors
* added file complexvec.h for complex numbers and complex vectors
@@ -113,7 +117,6 @@ version 1.20
* added function change_sign for floating point vectors
* added operators +, -, *, / between floating point vectors and scalars to remove overloading ambiguity
-
2012-07-08 version 1.01 beta
* added file decimal.h with Number <-> string conversion functions:
bin2bcd, bin2ascii, bin2hex_ascii, ascii2bin
@@ -126,6 +129,5 @@ version 1.20
* minor improvement in abs function
* added version number to VECTORCLASS_H
-
2012-05-30 version 1.00 beta
* first public release
diff --git a/vectorclass/dispatch_example.cpp b/vectorclass/dispatch_example.cpp
index 640a683..4fd05e3 100755
--- a/vectorclass/dispatch_example.cpp
+++ b/vectorclass/dispatch_example.cpp
@@ -1,8 +1,8 @@
/************************* dispatch_example.cpp ****************************
| Author: Agner Fog
| Date created: 2012-05-30
-| Last modified: 2014-07-23
-| Version: 1.14
+* Last modified: 2016-04-26
+* Version: 1.22
| Project: vector classes
| Description:
| Example of CPU dispatching.
@@ -17,7 +17,7 @@
| g++ -O3 -msse2 -otest instrset_detect.cpp d2.o d5.o d7.o d8.o d9.o
| ./test
|
-| (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+| (c) Copyright 2012-2016 GNU General Public License http://www.gnu.org/licenses
\*****************************************************************************/
#include <stdio.h>
@@ -25,6 +25,9 @@
#define MAX_VECTOR_SIZE 512
#include "vectorclass.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
// define function type (change this to fit your purpose. Should not contain vector types)
typedef float MyFuncType(float*);
@@ -97,3 +100,6 @@ int main(int argc, char* argv[])
#endif // INSTRSET == 2
+#ifdef VCL_NAMESPACE
+}
+#endif
diff --git a/vectorclass/instrset.h b/vectorclass/instrset.h
index 4fb83e2..fb3ebf6 100755
--- a/vectorclass/instrset.h
+++ b/vectorclass/instrset.h
@@ -1,8 +1,8 @@
/**************************** instrset.h **********************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2014-10-22
-* Version: 1.16
+* Last modified: 2016-05-02
+* Version: 1.22
* Project: vector classes
* Description:
* Header file for various compiler-specific tasks and other common tasks to
@@ -14,11 +14,11 @@
* > defines template class to represent compile-time integer constant
* > defines template for compile-time error messages
*
-* (c) Copyright 2012 - 2014 GNU General Public License www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License www.gnu.org/licenses
******************************************************************************/
#ifndef INSTRSET_H
-#define INSTRSET_H 116
+#define INSTRSET_H 122
// Detect 64 bit mode
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__)
@@ -26,7 +26,7 @@
#endif
// Find instruction set from compiler macros if INSTRSET not defined
-// Note: Microsoft compilers do not define these macros automatically
+// Note: Most of these macros are not defined in Microsoft compilers
#ifndef INSTRSET
#if defined ( __AVX512F__ ) || defined ( __AVX512__ ) // || defined ( __AVX512ER__ )
#define INSTRSET 9
@@ -107,7 +107,7 @@
// FMA4 instruction set
#if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__))
#include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why
-#endif // __FMA4__
+#endif // __FMA4__
// Define integer types with known size
@@ -156,10 +156,16 @@
#endif // _MSC_VER
// functions in instrset_detect.cpp
-int instrset_detect(void); // tells which instruction sets are supported
-bool hasFMA3(void); // true if FMA3 instructions supported
-bool hasFMA4(void); // true if FMA4 instructions supported
-bool hasXOP (void); // true if XOP instructions supported
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+ int instrset_detect(void); // tells which instruction sets are supported
+ bool hasFMA3(void); // true if FMA3 instructions supported
+ bool hasFMA4(void); // true if FMA4 instructions supported
+ bool hasXOP(void); // true if XOP instructions supported
+#ifdef VCL_NAMESPACE
+}
+#endif
// GCC version
#if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__)
@@ -174,7 +180,7 @@ bool hasXOP (void); // true if XOP instructions su
// Apple bug 18746972
#endif
-// Fix problem with macros named min and max in WinDef.h
+// Fix problem with non-overloadable macros named min and max in WinDef.h
#ifdef _MSC_VER
#if defined (_WINDEF_) && defined(min) && defined(max)
#undef min
@@ -185,19 +191,25 @@ bool hasXOP (void); // true if XOP instructions su
#endif
#endif
-// Template class to represent compile-time integer constant
-template <int32_t n> class Const_int_t {}; // represent compile-time signed integer constant
-template <uint32_t n> class Const_uint_t {}; // represent compile-time unsigned integer constant
-#define const_int(n) (Const_int_t <n>()) // n must be compile-time integer constant
-#define const_uint(n) (Const_uint_t<n>()) // n must be compile-time unsigned integer constant
-
-// Template for compile-time error messages
-template <bool> class Static_error_check {
- public: Static_error_check(){};
-};
-template <> class Static_error_check<false> { // generate compile-time error if false
- private: Static_error_check(){};
-};
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+ // Template class to represent compile-time integer constant
+ template <int32_t n> class Const_int_t {}; // represent compile-time signed integer constant
+ template <uint32_t n> class Const_uint_t {}; // represent compile-time unsigned integer constant
+ #define const_int(n) (Const_int_t <n>()) // n must be compile-time integer constant
+ #define const_uint(n) (Const_uint_t<n>()) // n must be compile-time unsigned integer constant
+
+ // Template for compile-time error messages
+ template <bool> class Static_error_check {
+ public: Static_error_check() {};
+ };
+ template <> class Static_error_check<false> { // generate compile-time error if false
+ private: Static_error_check() {};
+ };
+#ifdef VCL_NAMESPACE
+}
+#endif
#endif // INSTRSET_H
diff --git a/vectorclass/instrset_detect.cpp b/vectorclass/instrset_detect.cpp
index 03c5777..5023d08 100755
--- a/vectorclass/instrset_detect.cpp
+++ b/vectorclass/instrset_detect.cpp
@@ -1,26 +1,30 @@
/************************** instrset_detect.cpp ****************************
| Author: Agner Fog
| Date created: 2012-05-30
-| Last modified: 2014-07-23
-| Version: 1.14
+* Last modified: 2016-04-26
+* Version: 1.22
| Project: vector classes
| Description:
| Functions for checking which instruction sets are supported.
|
-| (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+| (c) Copyright 2012-2016 GNU General Public License http://www.gnu.org/licenses
\*****************************************************************************/
#include "instrset.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+
// Define interface to cpuid instruction.
// input: eax = functionnumber, ecx = 0
// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
static inline void cpuid (int output[4], int functionnumber) {
-#if defined (_MSC_VER) || defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included
+#if defined (_MSC_VER) //|| defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included
__cpuidex(output, functionnumber, 0); // intrinsic function for CPUID
-#elif defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax
+#elif defined(__GNUC__) || defined(__clang__) || defined (__INTEL_COMPILER) // use inline assembly, Gnu/AT&T syntax
int a, b, c, d;
__asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : );
@@ -151,3 +155,7 @@ bool hasXOP(void) {
cpuid(abcd, 0x80000001); // call cpuid function 0x80000001
return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP
}
+
+#ifdef VCL_NAMESPACE
+}
+#endif
diff --git a/vectorclass/special.zip b/vectorclass/special.zip
index 3f3ce57..47abdbe 100755
Binary files a/vectorclass/special.zip and b/vectorclass/special.zip differ
diff --git a/vectorclass/vectorclass.h b/vectorclass/vectorclass.h
index 0368ef8..426f6b9 100755
--- a/vectorclass/vectorclass.h
+++ b/vectorclass/vectorclass.h
@@ -1,8 +1,8 @@
/**************************** vectorclass.h ********************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-11-07
-* Version: 1.19
+* Last modified: 2016-09-27
+* Version: 1.23
* Project: vector classes
* Description:
* Header file defining vector classes as interface to intrinsic functions
@@ -22,10 +22,10 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License www.gnu.org/licenses
+* (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses
******************************************************************************/
#ifndef VECTORCLASS_H
-#define VECTORCLASS_H 116
+#define VECTORCLASS_H 123
// Maximum vector size, bits. Allowed values are 128, 256, 512
#ifndef MAX_VECTOR_SIZE
@@ -64,6 +64,6 @@
#endif // INSTRSET >= 9
#endif // MAX_VECTOR_SIZE >= 512
-#endif // INSTRSET < 2
+#endif // INSTRSET < 2
#endif // VECTORCLASS_H
diff --git a/vectorclass/vectorclass.pdf b/vectorclass/vectorclass.pdf
index 91e66c8..498385a 100755
Binary files a/vectorclass/vectorclass.pdf and b/vectorclass/vectorclass.pdf differ
diff --git a/vectorclass/vectorf128.h b/vectorclass/vectorf128.h
index 86fca47..defefce 100755
--- a/vectorclass/vectorf128.h
+++ b/vectorclass/vectorf128.h
@@ -1,8 +1,8 @@
/**************************** vectorf128.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-11-27
-* Version: 1.20
+* Last modified: 2016-09-27
+* Version: 1.23
* Project: vector classes
* Description:
* Header file defining floating point vector classes as interface to
@@ -30,14 +30,22 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
#ifndef VECTORF128_H
#define VECTORF128_H
-#include "vectori128.h" // Define integer vectors
+#if defined _MSC_VER && _MSC_VER >= 1800
+// solve problem with ambiguous overloading of pow function in Microsoft math.h:
+// make sure math.h is included first rather than last
+#include <math.h>
+#endif
+#include "vectori128.h" // Define integer vectors
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -131,11 +139,11 @@ public:
operator __m128() const {
return xmm;
}
-#if defined (__clang__) && CLANG_VERSION < 30900 || defined(__apple_build_version__)
-#define FIX_CLANG_VECTOR_ALIAS_AMBIGUITY // clang 3.3 - 3.5 has silent conversion between intrinsic vector types.
- // I expected this to be fixed in version 3.4 but it still exists!
+#if defined (__clang__) /* && CLANG_VERSION < xxxxx */ || defined(__apple_build_version__)
+#define FIX_CLANG_VECTOR_ALIAS_AMBIGUITY // clang 3.3 has silent conversion between intrinsic vector types.
+ // I expected this to be fixed in version 3.4 but it still exists in version 3.9!
// http://llvm.org/bugs/show_bug.cgi?id=17164
- // Problem: The version number is not consistent across platforms
+ // Additional problem: The version number is not consistent across platforms
// The Apple build has different version numbers. Too bad!
// http://llvm.org/bugs/show_bug.cgi?id=12643
@@ -246,12 +254,14 @@ static inline Vec4fb andnot(Vec4fb const & a, Vec4fb const & b) {
// horizontal_and. Returns true if all bits are 1
static inline bool horizontal_and (Vec4fb const & a) {
- return horizontal_and(Vec128b(_mm_castps_si128(a)));
+ return _mm_movemask_ps(a) == 0x0F;
+ //return horizontal_and(Vec128b(_mm_castps_si128(a)));
}
// horizontal_or. Returns true if at least one bit is 1
static inline bool horizontal_or (Vec4fb const & a) {
- return horizontal_or(Vec128b(_mm_castps_si128(a)));
+ return _mm_movemask_ps(a) != 0;
+ //return horizontal_or(Vec128b(_mm_castps_si128(a)));
}
@@ -414,12 +424,14 @@ static inline Vec2db andnot(Vec2db const & a, Vec2db const & b) {
// horizontal_and. Returns true if all bits are 1
static inline bool horizontal_and (Vec2db const & a) {
- return horizontal_and(Vec128b(_mm_castpd_si128(a)));
+ return _mm_movemask_pd(a) == 3;
+ //return horizontal_and(Vec128b(_mm_castpd_si128(a)));
}
// horizontal_or. Returns true if at least one bit is 1
static inline bool horizontal_or (Vec2db const & a) {
- return horizontal_or(Vec128b(_mm_castpd_si128(a)));
+ return _mm_movemask_pd(a) != 0;
+ //return horizontal_or(Vec128b(_mm_castpd_si128(a)));
}
@@ -491,9 +503,9 @@ public:
case 1:
xmm = _mm_load_ss(p); break;
case 2:
- xmm = _mm_castpd_ps(_mm_load_sd((double*)p)); break;
+ xmm = _mm_castpd_ps(_mm_load_sd((double const*)p)); break;
case 3:
- t1 = _mm_castpd_ps(_mm_load_sd((double*)p));
+ t1 = _mm_castpd_ps(_mm_load_sd((double const*)p));
t2 = _mm_load_ss(p + 2);
xmm = _mm_movelh_ps(t1, t2); break;
case 4:
@@ -1894,7 +1906,7 @@ static inline Vec2d pow(Vec2d const & a, Const_int_t<n>) {
// avoid unsafe optimization in function round
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) && INSTRSET < 5
-static inline Vec4f round(Vec4f const & a) __attribute__ ((optimize("-fno-unsafe-math-optimizations")));
+static inline Vec2d round(Vec2d const & a) __attribute__ ((optimize("-fno-unsafe-math-optimizations")));
#elif defined (FLOAT_CONTROL_PRECISE_FOR_ROUND)
#pragma float_control(push)
#pragma float_control(precise,on)
@@ -2618,4 +2630,8 @@ static inline Vec2db to_Vec2db(uint8_t x) {
return Vec2db(to_Vec2qb(x));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORF128_H
diff --git a/vectorclass/vectorf256.h b/vectorclass/vectorf256.h
index dc126a0..148442a 100755
--- a/vectorclass/vectorf256.h
+++ b/vectorclass/vectorf256.h
@@ -1,8 +1,8 @@
/**************************** vectorf256.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-11-27
-* Version: 1.20
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining 256-bit floating point vector classes as interface
@@ -27,7 +27,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -44,7 +44,9 @@
#include "vectorf128.h" // Define 128-bit vectors
-
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -3163,4 +3165,8 @@ static inline Vec4db to_Vec4db(uint8_t x) {
return Vec4db(to_Vec4qb(x));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORF256_H
diff --git a/vectorclass/vectorf256e.h b/vectorclass/vectorf256e.h
index 39c4410..386da1b 100755
--- a/vectorclass/vectorf256e.h
+++ b/vectorclass/vectorf256e.h
@@ -1,8 +1,8 @@
/**************************** vectorf256e.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-08-25
-* Version: 1.18
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining 256-bit floating point vector classes as interface
@@ -16,7 +16,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -34,6 +34,9 @@
#include "vectorf128.h" // Define 128-bit vectors
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -2066,4 +2069,8 @@ static inline Vec4db to_Vec4db(uint8_t x) {
return Vec4db(to_Vec4qb(x));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORF256_H
diff --git a/vectorclass/vectorf512.h b/vectorclass/vectorf512.h
index 5fab837..cfd16f3 100755
--- a/vectorclass/vectorf512.h
+++ b/vectorclass/vectorf512.h
@@ -1,8 +1,8 @@
/**************************** vectorf512.h *******************************
* Author: Agner Fog
* Date created: 2014-07-23
-* Last modified: 2015-11-27
-* Version: 1.20
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining floating point vector classes as interface to intrinsic
@@ -23,7 +23,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -36,6 +36,10 @@
#include "vectori512.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+
// Define missing intrinsic functions
#if defined (GCC_VERSION) && GCC_VERSION < 41102 && !defined(__INTEL_COMPILER) && !defined(__clang__)
@@ -787,12 +791,12 @@ static inline Vec16f ceil(Vec16f const & a) {
// function round_to_int: round to nearest integer (even). (result as integer vector)
static inline Vec16i round_to_int(Vec16f const & a) {
- return _mm512_cvt_roundps_epi32(a, _MM_FROUND_NO_EXC);
+ return _mm512_cvt_roundps_epi32(a, 0+8 /*_MM_FROUND_NO_EXC*/);
}
// function truncate_to_int: round towards zero. (result as integer vector)
static inline Vec16i truncate_to_int(Vec16f const & a) {
- return _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC);
+ return _mm512_cvtt_roundps_epi32(a, 0+8 /*_MM_FROUND_NO_EXC*/);
}
// function to_float: convert integer vector to float vector
@@ -1438,7 +1442,7 @@ static inline Vec8d ceil(Vec8d const & a) {
// function round_to_int: round to nearest integer (even). (result as integer vector)
static inline Vec8i round_to_int(Vec8d const & a) {
//return _mm512_cvtpd_epi32(a);
- return _mm512_cvt_roundpd_epi32(a, __MM_FROUND_NO_EXC);
+ return _mm512_cvt_roundpd_epi32(a, 0+8);
}
// function truncate_to_int: round towards zero. (result as integer vector)
@@ -1473,7 +1477,7 @@ static inline Vec8q round_to_int64(Vec8d const & a) {
// result as 64-bit integer vector, but with limited range
static inline Vec8q round_to_int64_limited(Vec8d const & a) {
//Vec4q b = _mm512_cvtpd_epi32(a); // round to 32-bit integers
- Vec4q b = _mm512_cvt_roundpd_epi32(a, __MM_FROUND_NO_EXC); // round to 32-bit integers
+ Vec4q b = _mm512_cvt_roundpd_epi32(a, 0+8); // round to 32-bit integers
__m512i c = permute8q<0,-256,1,-256,2,-256,3,-256>(Vec8q(b,b)); // get bits 64-127 to position 128-191, etc.
__m512i s = _mm512_srai_epi32(c, 31); // sign extension bits
return _mm512_unpacklo_epi32(c, s); // interleave with sign extensions
@@ -2362,4 +2366,8 @@ static inline Vec8db to_Vec8db(uint8_t x) {
return Vec8db(to_Vec8qb(x));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORF512_H
diff --git a/vectorclass/vectorf512e.h b/vectorclass/vectorf512e.h
index a0077b3..77ad9ca 100755
--- a/vectorclass/vectorf512e.h
+++ b/vectorclass/vectorf512e.h
@@ -1,8 +1,8 @@
/**************************** vectorf512.h *******************************
* Author: Agner Fog
* Date created: 2014-07-23
-* Last modified: 2014-10-22
-* Version: 1.16
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining floating point vector classes as interface to intrinsic
@@ -23,7 +23,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -36,6 +36,9 @@
#include "vectori512e.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -2124,4 +2127,8 @@ static inline Vec8db to_Vec8db(uint8_t x) {
return Vec8db(to_Vec8qb(x));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORF512_H
diff --git a/vectorclass/vectori128.h b/vectorclass/vectori128.h
index 73f53d5..76d29fb 100755
--- a/vectorclass/vectori128.h
+++ b/vectorclass/vectori128.h
@@ -1,8 +1,8 @@
/**************************** vectori128.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-11-07
-* Version: 1.19
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining integer vector classes as interface to intrinsic
@@ -39,7 +39,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
#ifndef VECTORI128_H
#define VECTORI128_H
@@ -50,7 +50,9 @@
#error Please compile for the SSE2 instruction set or higher
#endif
-
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -344,7 +346,7 @@ public:
else {
// worst case. read 1 byte at a time and suffer store forwarding penalty
char x[16];
- for (int i = 0; i < n; i++) x[i] = ((char *)p)[i];
+ for (int i = 0; i < n; i++) x[i] = ((char const *)p)[i];
load(x);
}
cutoff(n);
@@ -526,6 +528,22 @@ static inline Vec16cb andnot (Vec16cb const & a, Vec16cb const & b) {
return Vec16cb(andnot(Vec128b(a), Vec128b(b)));
}
+// Horizontal Boolean functions for Vec16cb
+
+// horizontal_and. Returns true if all elements are true
+static inline bool horizontal_and(Vec16cb const & a) {
+ return _mm_movemask_epi8(a) == 0xFFFF;
+}
+
+// horizontal_or. Returns true if at least one element is true
+static inline bool horizontal_or(Vec16cb const & a) {
+#if INSTRSET >= 5 // SSE4.1 supported. Use PTEST
+ return !_mm_testz_si128(a, a);
+#else
+ return _mm_movemask_epi8(a) != 0;
+#endif
+}
+
/*****************************************************************************
*
@@ -643,7 +661,7 @@ static inline Vec16cb operator == (Vec16c const & a, Vec16c const & b) {
// vector operator != : returns true for elements for which a != b
static inline Vec16cb operator != (Vec16c const & a, Vec16c const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comneq_epi8(a,b);
+ return (Vec16cb)_mm_comneq_epi8(a,b);
#else // SSE2 instruction set
return Vec16cb(Vec16c(~(a == b)));
#endif
@@ -662,7 +680,7 @@ static inline Vec16cb operator < (Vec16c const & a, Vec16c const & b) {
// vector operator >= : returns true for elements for which a >= b (signed)
static inline Vec16cb operator >= (Vec16c const & a, Vec16c const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comge_epi8(a,b);
+ return (Vec16cb)_mm_comge_epi8(a,b);
#else // SSE2 instruction set
return Vec16cb(Vec16c(~(b > a)));
#endif
@@ -947,9 +965,9 @@ static inline Vec16uc & operator >>= (Vec16uc & a, int b) {
// vector operator >= : returns true for elements for which a >= b (unsigned)
static inline Vec16cb operator >= (Vec16uc const & a, Vec16uc const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comge_epu8(a,b);
+ return (Vec16cb)_mm_comge_epu8(a,b);
#else // SSE2 instruction set
- return _mm_cmpeq_epi8(_mm_max_epu8(a,b),a); // a == max(a,b)
+ return (Vec16cb)_mm_cmpeq_epi8(_mm_max_epu8(a,b),a); // a == max(a,b)
#endif
}
@@ -961,7 +979,7 @@ static inline Vec16cb operator <= (Vec16uc const & a, Vec16uc const & b) {
// vector operator > : returns true for elements for which a > b (unsigned)
static inline Vec16cb operator > (Vec16uc const & a, Vec16uc const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comgt_epu8(a,b);
+ return (Vec16cb)_mm_comgt_epu8(a,b);
#else // SSE2 instruction set
return Vec16cb(Vec16c(~(b >= a)));
#endif
@@ -1123,7 +1141,7 @@ public:
else {
// worst case. read 1 byte at a time and suffer store forwarding penalty
int16_t x[8];
- for (int i = 0; i < n; i++) x[i] = ((int16_t *)p)[i];
+ for (int i = 0; i < n; i++) x[i] = ((int16_t const *)p)[i];
load(x);
}
cutoff(n);
@@ -1328,6 +1346,22 @@ static inline Vec8sb andnot (Vec8sb const & a, Vec8sb const & b) {
return Vec8sb(andnot(Vec128b(a), Vec128b(b)));
}
+// Horizontal Boolean functions for Vec8sb
+
+// horizontal_and. Returns true if all elements are true
+static inline bool horizontal_and(Vec8sb const & a) {
+ return _mm_movemask_epi8(a) == 0xFFFF;
+}
+
+// horizontal_or. Returns true if at least one element is true
+static inline bool horizontal_or(Vec8sb const & a) {
+#if INSTRSET >= 5 // SSE4.1 supported. Use PTEST
+ return !_mm_testz_si128(a, a);
+#else
+ return _mm_movemask_epi8(a) != 0;
+#endif
+}
+
/*****************************************************************************
*
@@ -1433,7 +1467,7 @@ static inline Vec8sb operator == (Vec8s const & a, Vec8s const & b) {
// vector operator != : returns true for elements for which a != b
static inline Vec8sb operator != (Vec8s const & a, Vec8s const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comneq_epi16(a,b);
+ return (Vec8sb)_mm_comneq_epi16(a,b);
#else // SSE2 instruction set
return Vec8sb (~(a == b));
#endif
@@ -1452,7 +1486,7 @@ static inline Vec8sb operator < (Vec8s const & a, Vec8s const & b) {
// vector operator >= : returns true for elements for which a >= b (signed)
static inline Vec8sb operator >= (Vec8s const & a, Vec8s const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comge_epi16(a,b);
+ return (Vec8sb)_mm_comge_epi16(a,b);
#else // SSE2 instruction set
return Vec8sb (~(b > a));
#endif
@@ -1753,7 +1787,7 @@ static inline Vec8s operator <= (Vec8us const & a, Vec8us const & b) {
// vector operator > : returns true for elements for which a > b (unsigned)
static inline Vec8s operator > (Vec8us const & a, Vec8us const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comgt_epu16(a,b);
+ return (Vec8s)_mm_comgt_epu16(a,b);
#else // SSE2 instruction set
return Vec8s (~(b >= a));
#endif
@@ -1947,12 +1981,12 @@ public:
case 0:
*this = 0; break;
case 1:
- xmm = _mm_cvtsi32_si128(*(int32_t*)p); break;
+ xmm = _mm_cvtsi32_si128(*(int32_t const*)p); break;
case 2:
// intrinsic for movq is missing!
- xmm = _mm_setr_epi32(((int32_t*)p)[0], ((int32_t*)p)[1], 0, 0); break;
+ xmm = _mm_setr_epi32(((int32_t const*)p)[0], ((int32_t const*)p)[1], 0, 0); break;
case 3:
- xmm = _mm_setr_epi32(((int32_t*)p)[0], ((int32_t*)p)[1], ((int32_t*)p)[2], 0); break;
+ xmm = _mm_setr_epi32(((int32_t const*)p)[0], ((int32_t const*)p)[1], ((int32_t const*)p)[2], 0); break;
case 4:
load(p); break;
default:
@@ -2122,6 +2156,22 @@ static inline Vec4ib andnot (Vec4ib const & a, Vec4ib const & b) {
return Vec4ib(andnot(Vec128b(a), Vec128b(b)));
}
+// Horizontal Boolean functions for Vec4ib
+
+// horizontal_and. Returns true if all elements are true
+static inline bool horizontal_and(Vec4ib const & a) {
+ return _mm_movemask_epi8(a) == 0xFFFF;
+}
+
+// horizontal_or. Returns true if at least one element is true
+static inline bool horizontal_or(Vec4ib const & a) {
+#if INSTRSET >= 5 // SSE4.1 supported. Use PTEST
+ return !_mm_testz_si128(a, a);
+#else
+ return _mm_movemask_epi8(a) != 0;
+#endif
+}
+
/*****************************************************************************
*
@@ -2237,7 +2287,7 @@ static inline Vec4ib operator == (Vec4i const & a, Vec4i const & b) {
// vector operator != : returns true for elements for which a != b
static inline Vec4ib operator != (Vec4i const & a, Vec4i const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comneq_epi32(a,b);
+ return (Vec4ib)_mm_comneq_epi32(a,b);
#else // SSE2 instruction set
return Vec4ib(Vec4i (~(a == b)));
#endif
@@ -2256,7 +2306,7 @@ static inline Vec4ib operator < (Vec4i const & a, Vec4i const & b) {
// vector operator >= : returns true for elements for which a >= b (signed)
static inline Vec4ib operator >= (Vec4i const & a, Vec4i const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comge_epi32(a,b);
+ return (Vec4ib)_mm_comge_epi32(a,b);
#else // SSE2 instruction set
return Vec4ib(Vec4i (~(b > a)));
#endif
@@ -2556,12 +2606,12 @@ static inline Vec4ui operator << (Vec4ui const & a, int32_t b) {
// vector operator > : returns true for elements for which a > b (unsigned)
static inline Vec4ib operator > (Vec4ui const & a, Vec4ui const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comgt_epu32(a,b);
+ return (Vec4ib)_mm_comgt_epu32(a,b);
#else // SSE2 instruction set
__m128i signbit = _mm_set1_epi32(0x80000000);
__m128i a1 = _mm_xor_si128(a,signbit);
__m128i b1 = _mm_xor_si128(b,signbit);
- return _mm_cmpgt_epi32(a1,b1); // signed compare
+ return (Vec4ib)_mm_cmpgt_epi32(a1,b1); // signed compare
#endif
}
@@ -2573,10 +2623,10 @@ static inline Vec4ib operator < (Vec4ui const & a, Vec4ui const & b) {
// vector operator >= : returns true for elements for which a >= b (unsigned)
static inline Vec4ib operator >= (Vec4ui const & a, Vec4ui const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_comge_epu32(a,b);
+ return (Vec4ib)_mm_comge_epu32(a,b);
#elif INSTRSET >= 5 // SSE4.1
__m128i max_ab = _mm_max_epu32(a,b); // max(a,b), unsigned
- return _mm_cmpeq_epi32(a,max_ab); // a == max(a,b)
+ return (Vec4ib)_mm_cmpeq_epi32(a,max_ab); // a == max(a,b)
#else // SSE2 instruction set
return Vec4ib(Vec4i (~(b > a)));
#endif
@@ -2706,7 +2756,7 @@ public:
}
// Constructor to broadcast the same value into all elements:
Vec2q(int64_t i) {
-#if defined (_MSC_VER) && ! defined(__INTEL_COMPILER)
+#if defined (_MSC_VER) && _MSC_VER < 1900 && ! defined(__INTEL_COMPILER)
// MS compiler has no _mm_set1_epi64x in 32 bit mode
#if defined(__x86_64__) // 64 bit mode
#if _MSC_VER < 1700
@@ -2733,12 +2783,12 @@ public:
#endif // __x86_64__
#else // Other compilers
- xmm = _mm_set1_epi64x(i); // emmintrin.h
+ xmm = _mm_set1_epi64x(i);
#endif
}
// Constructor to build from all elements:
Vec2q(int64_t i0, int64_t i1) {
-#if defined (_MSC_VER) && ! defined(__INTEL_COMPILER)
+#if defined (_MSC_VER) && _MSC_VER < 1900 && ! defined(__INTEL_COMPILER)
// MS compiler has no _mm_set_epi64x in 32 bit mode
#if defined(__x86_64__) // 64 bit mode
#if _MSC_VER < 1700
@@ -2791,7 +2841,7 @@ public:
*this = 0; break;
case 1:
// intrinsic for movq is missing!
- *this = Vec2q(*(int64_t*)p, 0); break;
+ *this = Vec2q(*(int64_t const*)p, 0); break;
case 2:
load(p); break;
default:
@@ -2974,6 +3024,22 @@ static inline Vec2qb andnot (Vec2qb const & a, Vec2qb const & b) {
return Vec2qb(andnot(Vec128b(a), Vec128b(b)));
}
+// Horizontal Boolean functions for Vec2qb
+
+// horizontal_and. Returns true if all elements are true
+static inline bool horizontal_and(Vec2qb const & a) {
+ return _mm_movemask_epi8(a) == 0xFFFF;
+}
+
+// horizontal_or. Returns true if at least one element is true
+static inline bool horizontal_or(Vec2qb const & a) {
+#if INSTRSET >= 5 // SSE4.1 supported. Use PTEST
+ return !_mm_testz_si128(a, a);
+#else
+ return _mm_movemask_epi8(a) != 0;
+#endif
+}
+
/*****************************************************************************
*
@@ -3115,7 +3181,7 @@ static inline Vec2qb operator == (Vec2q const & a, Vec2q const & b) {
// vector operator != : returns true for elements for which a != b
static inline Vec2qb operator != (Vec2q const & a, Vec2q const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return Vec2q(_mm_comneq_epi64(a,b));
+ return Vec2qb(_mm_comneq_epi64(a,b));
#else // SSE2 instruction set
return Vec2qb(Vec2q(~(a == b)));
#endif
@@ -3148,7 +3214,7 @@ static inline Vec2qb operator > (Vec2q const & a, Vec2q const & b) {
// vector operator >= : returns true for elements for which a >= b (signed)
static inline Vec2qb operator >= (Vec2q const & a, Vec2q const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return Vec2q(_mm_comge_epi64(a,b));
+ return Vec2qb(_mm_comge_epi64(a,b));
#else // SSE2 instruction set
return Vec2qb(Vec2q(~(a < b)));
#endif
@@ -3275,12 +3341,12 @@ static inline Vec2q abs_saturated(Vec2q const & a) {
// Use negative count to rotate right
static inline Vec2q rotate_left(Vec2q const & a, int b) {
#ifdef __XOP__ // AMD XOP instruction set
- return _mm_rot_epi64(a,Vec2q(b));
+ return (Vec2q)_mm_rot_epi64(a,Vec2q(b));
#else // SSE2 instruction set
__m128i left = _mm_sll_epi64(a,_mm_cvtsi32_si128(b & 0x3F)); // a << b
__m128i right = _mm_srl_epi64(a,_mm_cvtsi32_si128((64-b) & 0x3F)); // a >> (64 - b)
__m128i rot = _mm_or_si128(left,right); // or
- return rot;
+ return (Vec2q)rot;
#endif
}
@@ -3386,7 +3452,7 @@ static inline Vec2uq operator << (Vec2uq const & a, int32_t b) {
// vector operator > : returns true for elements for which a > b (unsigned)
static inline Vec2qb operator > (Vec2uq const & a, Vec2uq const & b) {
#if defined ( __XOP__ ) // AMD XOP instruction set
- return Vec2q(_mm_comgt_epu64(a,b));
+ return Vec2qb(_mm_comgt_epu64(a,b));
#elif INSTRSET >= 6 // SSE4.2
__m128i sign64 = constant4i<0,(int32_t)0x80000000,0,(int32_t)0x80000000>();
__m128i aflip = _mm_xor_si128(a, sign64);
@@ -3415,7 +3481,7 @@ static inline Vec2qb operator < (Vec2uq const & a, Vec2uq const & b) {
// vector operator >= : returns true for elements for which a >= b (unsigned)
static inline Vec2qb operator >= (Vec2uq const & a, Vec2uq const & b) {
#ifdef __XOP__ // AMD XOP instruction set
- return Vec2q(_mm_comge_epu64(a,b));
+ return Vec2qb(_mm_comge_epu64(a,b));
#else // SSE2 instruction set
return Vec2qb(Vec2q(~(b > a)));
#endif
@@ -5374,7 +5440,7 @@ public:
sign = _mm_set1_epi32(sgn);
}
void set(int32_t d) { // Set or change divisor, calculate parameters
- const int32_t d1 = abs(d);
+ const int32_t d1 = ::abs(d);
int32_t sh, m;
if (d1 > 1) {
sh = bit_scan_reverse(d1-1); // shift count = ceil(log2(d1))-1 = (bit_scan_reverse(d1-1)+1)-1
@@ -5471,7 +5537,7 @@ public:
sign = _mm_set1_epi32(sgn);
}
void set(int16_t d) { // Set or change divisor, calculate parameters
- const int32_t d1 = abs(d);
+ const int32_t d1 = ::abs(d);
int32_t sh, m;
if (d1 > 1) {
sh = bit_scan_reverse(d1-1); // shift count = ceil(log2(d1))-1 = (bit_scan_reverse(d1-1)+1)-1
@@ -6148,4 +6214,8 @@ static inline Vec2qb to_Vec2qb(uint8_t x);
#endif // INSTRSET < 9 || MAX_VECTOR_SIZE < 512
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORI128_H
diff --git a/vectorclass/vectori256.h b/vectorclass/vectori256.h
index 47da1ea..890212c 100755
--- a/vectorclass/vectori256.h
+++ b/vectorclass/vectori256.h
@@ -1,8 +1,8 @@
/**************************** vectori256.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2015-11-08
-* Version: 1.19
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining integer vector classes as interface to intrinsic
@@ -36,7 +36,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -58,6 +58,9 @@
#include "vectori128.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -334,7 +337,7 @@ public:
*this = Vec32c(Vec16c().load_partial(n, p), 0);
}
else if (n < 32) {
- *this = Vec32c(Vec16c().load(p), Vec16c().load_partial(n-16, (char*)p+16));
+ *this = Vec32c(Vec16c().load(p), Vec16c().load_partial(n-16, (char const*)p+16));
}
else {
load(p);
@@ -1104,7 +1107,7 @@ public:
*this = Vec16s(Vec8s().load_partial(n, p), 0);
}
else if (n < 16) {
- *this = Vec16s(Vec8s().load(p), Vec8s().load_partial(n-8, (int16_t*)p+8));
+ *this = Vec16s(Vec8s().load(p), Vec8s().load_partial(n-8, (int16_t const*)p+8));
}
else {
load(p);
@@ -1829,7 +1832,7 @@ public:
*this = Vec8i(Vec4i().load_partial(n, p), 0);
}
else if (n < 8) {
- *this = Vec8i(Vec4i().load(p), Vec4i().load_partial(n-4, (int32_t*)p+4));
+ *this = Vec8i(Vec4i().load(p), Vec4i().load_partial(n-4, (int32_t const*)p+4));
}
else {
load(p);
@@ -2495,7 +2498,7 @@ public:
}
// Constructor to broadcast the same value into all elements:
Vec4q(int64_t i) {
-#if defined (_MSC_VER) && ! defined (__x86_64__) && ! defined(__INTEL_COMPILER)
+#if defined (_MSC_VER) && _MSC_VER < 1900 && ! defined (__x86_64__) && ! defined(__INTEL_COMPILER)
// MS compiler cannot use _mm256_set1_epi64x in 32 bit mode, and
// cannot put 64-bit values into xmm register without using
// mmx registers, and it makes no emms
@@ -2511,7 +2514,7 @@ public:
}
// Constructor to build from all elements:
Vec4q(int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
-#if defined (_MSC_VER) && ! defined (__x86_64__) && ! defined(__INTEL_COMPILER)
+#if defined (_MSC_VER) && _MSC_VER < 1900 && ! defined (__x86_64__) && ! defined(__INTEL_COMPILER)
// MS compiler cannot put 64-bit values into xmm register without using
// mmx registers, and it makes no emms
union {
@@ -2560,7 +2563,7 @@ public:
*this = Vec4q(Vec2q().load_partial(n, p), 0);
}
else if (n < 4) {
- *this = Vec4q(Vec2q().load(p), Vec2q().load_partial(n-2, (int64_t*)p+2));
+ *this = Vec4q(Vec2q().load(p), Vec2q().load_partial(n-2, (int64_t const*)p+2));
}
else {
load(p);
@@ -3688,7 +3691,7 @@ static inline Vec32c permute32c(Vec32c const & a) {
&& i8 ==((i0+8 )&31) && i9 ==((i0+9 )&31) && i10==((i0+10)&31) && i11==((i0+11)&31) && i12==((i0+12)&31) && i13==((i0+13)&31) && i14==((i0+14)&31) && i15==((i0+15)&31)
&& i16==((i0+16)&31) && i17==((i0+17)&31) && i18==((i0+18)&31) && i19==((i0+19)&31) && i20==((i0+20)&31) && i21==((i0+21)&31) && i22==((i0+22)&31) && i23==((i0+23)&31)
&& i24==((i0+24)&31) && i25==((i0+25)&31) && i26==((i0+26)&31) && i27==((i0+27)&31) && i28==((i0+28)&31) && i29==((i0+29)&31) && i30==((i0+30)&31) && i31==((i0+31)&31)) {
- __m256i t1 = _mm256_permute4x64_epi64(a, 0x4E);
+ t1 = _mm256_permute4x64_epi64(a, 0x4E);
return _mm256_alignr_epi8(a, t1, i0 & 15);
}
@@ -4581,7 +4584,7 @@ static inline Vec8i lookup(Vec8i const & index, void const * table) {
}
if (n <= 16) {
Vec8i table1 = Vec8i().load(table);
- Vec8i table2 = Vec8i().load((int32_t*)table + 8);
+ Vec8i table2 = Vec8i().load((int32_t const*)table + 8);
Vec8i y1 = lookup8(index, table1);
Vec8i y2 = lookup8(index, table2);
Vec8ib s = index > 7;
@@ -5508,5 +5511,8 @@ static inline Vec4qb to_Vec4qb(uint8_t x);
#endif // INSTRSET < 9 || MAX_VECTOR_SIZE < 512
+#ifdef VCL_NAMESPACE
+}
+#endif
#endif // VECTORI256_H
diff --git a/vectorclass/vectori256e.h b/vectorclass/vectori256e.h
index 71d0ffb..507b886 100755
--- a/vectorclass/vectori256e.h
+++ b/vectorclass/vectori256e.h
@@ -1,8 +1,8 @@
/**************************** vectori256e.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
-* Last modified: 2014-10-16
-* Version: 1.16
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining 256-bit integer point vector classes as interface
@@ -25,7 +25,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -43,6 +43,9 @@
#include "vectori128.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -325,7 +328,7 @@ public:
*this = Vec32c(Vec16c().load_partial(n, p), 0);
}
else if (n < 32) {
- *this = Vec32c(Vec16c().load(p), Vec16c().load_partial(n-16, (char*)p+16));
+ *this = Vec32c(Vec16c().load(p), Vec16c().load_partial(n-16, (char const*)p+16));
}
else {
load(p);
@@ -1040,7 +1043,7 @@ public:
*this = Vec16s(Vec8s().load_partial(n, p), 0);
}
else if (n < 16) {
- *this = Vec16s(Vec8s().load(p), Vec8s().load_partial(n-8, (int16_t*)p+8));
+ *this = Vec16s(Vec8s().load(p), Vec8s().load_partial(n-8, (int16_t const*)p+8));
}
else {
load(p);
@@ -1742,7 +1745,7 @@ public:
*this = Vec8i(Vec4i().load_partial(n, p), 0);
}
else if (n < 8) {
- *this = Vec8i(Vec4i().load(p), Vec4i().load_partial(n-4, (int32_t*)p+4));
+ *this = Vec8i(Vec4i().load(p), Vec4i().load_partial(n-4, (int32_t const*)p+4));
}
else {
load(p);
@@ -2447,7 +2450,7 @@ public:
*this = Vec4q(Vec2q().load_partial(n, p), 0);
}
else if (n < 4) {
- *this = Vec4q(Vec2q().load(p), Vec2q().load_partial(n-2, (int64_t*)p+2));
+ *this = Vec4q(Vec2q().load(p), Vec2q().load_partial(n-2, (int64_t const*)p+2));
}
else {
load(p);
@@ -4329,4 +4332,8 @@ static inline Vec4qb to_Vec4qb(uint8_t x) {
return Vec4q(to_Vec2qb(x), to_Vec2qb(x>>2));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORI256_H
diff --git a/vectorclass/vectori512.h b/vectorclass/vectori512.h
index dac51c3..156dfe7 100755
--- a/vectorclass/vectori512.h
+++ b/vectorclass/vectori512.h
@@ -1,8 +1,8 @@
/**************************** vectori512.h *******************************
* Author: Agner Fog
* Date created: 2014-07-23
-* Last modified: 2014-10-16
-* Version: 1.16
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining integer vector classes as interface to intrinsic
@@ -25,7 +25,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -47,6 +47,9 @@
#include "vectori256.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
// Bug fix for missing intrinsics:
// _mm512_cmpgt_epu32_mask, _mm512_cmpgt_epu64_mask
@@ -1360,10 +1363,22 @@ static inline Vec8q & operator -- (Vec8q & a) {
// vector operator * : multiply element by element
static inline Vec8q operator * (Vec8q const & a, Vec8q const & b) {
-#if defined (GCC_VERSION) && GCC_VERSION < 41100 && !defined(__INTEL_COMPILER) && !defined(__clang__)
- return Vec8q(a.get_low() * b.get_low(), a.get_high() * b.get_high()); // _mm512_mullox_epi64 missing in gcc 4.10.
+#if defined (__INTEL_COMPILER)
+ return _mm512_mullox_epi64(a, b); // _mm512_mullox_epi64 missing in gcc
#else
- return _mm512_mullox_epi64(a, b);
+ // return Vec8q(a.get_low() * b.get_low(), a.get_high() * b.get_high());
+
+ // instruction does not exist. Split into 32-bit multiplies
+ //__m512i ahigh = _mm512_shuffle_epi32(a, 0xB1); // swap H<->L
+ __m512i ahigh = _mm512_srli_epi64(a, 32); // high 32 bits of each a
+ __m512i bhigh = _mm512_srli_epi64(b, 32); // high 32 bits of each b
+ __m512i prodahb = _mm512_mul_epu32(ahigh, b); // ahigh*b
+ __m512i prodbha = _mm512_mul_epu32(bhigh, a); // bhigh*a
+ __m512i prodhl = _mm512_add_epi64(prodahb, prodbha); // sum of high*low products
+ __m512i prodhi = _mm512_slli_epi64(prodhl, 32); // same, shifted high
+ __m512i prodll = _mm512_mul_epu32(a, b); // alow*blow = 64 bit unsigned products
+ __m512i prod = _mm512_add_epi64(prodll, prodhi); // low*low+(high*low)<<32
+ return prod;
#endif
}
@@ -2730,4 +2745,8 @@ static inline Vec8qb to_Vec8qb(uint8_t x) {
return (__mmask8)x;
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORI512_H
diff --git a/vectorclass/vectori512e.h b/vectorclass/vectori512e.h
index de7dac6..1acacad 100755
--- a/vectorclass/vectori512e.h
+++ b/vectorclass/vectori512e.h
@@ -1,8 +1,8 @@
/**************************** vectori512e.h *******************************
* Author: Agner Fog
* Date created: 2014-07-23
-* Last modified: 2014-10-16
-* Version: 1.16
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file defining integer vector classes as interface to intrinsic
@@ -25,7 +25,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014 - 2016 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
// check combination of header files
@@ -36,6 +36,9 @@
#else
#define VECTORI512_H 1
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/*****************************************************************************
*
@@ -87,13 +90,13 @@ public:
// Member function to load from array (unaligned)
Vec512b & load(void const * p) {
z0 = Vec8i().load(p);
- z1 = Vec8i().load((int32_t*)p+8);
+ z1 = Vec8i().load((int32_t const*)p+8);
return *this;
}
// Member function to load from array, aligned by 64
Vec512b & load_a(void const * p) {
z0 = Vec8i().load_a(p);
- z1 = Vec8i().load_a((int32_t*)p+8);
+ z1 = Vec8i().load_a((int32_t const*)p+8);
return *this;
}
// Member function to store into array (unaligned)
@@ -729,7 +732,7 @@ public:
}
else {
z0 = Vec8i().load(p);
- z1 = Vec8i().load_partial(n - 8, (int32_t *)p + 8);
+ z1 = Vec8i().load_partial(n - 8, (int32_t const*)p + 8);
}
return *this;
}
@@ -1250,13 +1253,13 @@ public:
// Member function to load from array (unaligned)
Vec8q & load(void const * p) {
z0 = Vec4q().load(p);
- z1 = Vec4q().load((int64_t*)p+4);
+ z1 = Vec4q().load((int64_t const*)p+4);
return *this;
}
// Member function to load from array, aligned by 64
Vec8q & load_a(void const * p) {
z0 = Vec4q().load_a(p);
- z1 = Vec4q().load_a((int64_t*)p+4);
+ z1 = Vec4q().load_a((int64_t const*)p+4);
return *this;
}
// Partial load. Load n elements and set the rest to 0
@@ -1267,7 +1270,7 @@ public:
}
else {
z0 = Vec4q().load(p);
- z1 = Vec4q().load_partial(n - 4, (int64_t *)p + 4);
+ z1 = Vec4q().load_partial(n - 4, (int64_t const*)p + 4);
}
return *this;
}
@@ -2542,4 +2545,8 @@ static inline Vec8qb to_Vec8qb(uint8_t x) {
return Vec8q(to_Vec4qb(x), to_Vec4qb(x>>4));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORI512_H
diff --git a/vectorclass/vectormath_common.h b/vectorclass/vectormath_common.h
index edcbd13..ee29469 100755
--- a/vectorclass/vectormath_common.h
+++ b/vectorclass/vectormath_common.h
@@ -1,8 +1,8 @@
/*************************** vectormath_common.h ****************************
* Author: Agner Fog
* Date created: 2014-04-18
-* Last modified: 2014-10-16
-* Version: 1.16
+* Last modified: 2016-05-02
+* Version: 1.22
* Project: vector classes
* Description:
* Header file containing common code for inline version of mathematical functions.
@@ -21,7 +21,7 @@
*
* For detailed instructions, see VectorClass.pdf
*
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
******************************************************************************/
#ifndef VECTORMATH_COMMON_H
@@ -50,6 +50,9 @@
#define VM_SMALLEST_NORMAL 2.2250738585072014E-308 // smallest normal number, double
#define VM_SMALLEST_NORMALF 1.17549435E-38f // smallest normal number, float
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/******************************************************************************
templates for producing infinite and nan in desired vector type
@@ -151,7 +154,7 @@ longest dependency chains first.
******************************************************************************/
// template <typedef VECTYPE, typedef CTYPE>
-template <class VTYPE, class CTYPE>
+template <class VTYPE, class CTYPE>
static inline VTYPE polynomial_2(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2) {
// calculates polynomial c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
@@ -160,86 +163,86 @@ static inline VTYPE polynomial_2(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2)
return mul_add(x2, c2, mul_add(x, c1, c0));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_3(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
// calculates polynomial c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
//return (c2 + c3*x)*x2 + (c1*x + c0);
- return mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0));
+ return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_4(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
// calculates polynomial c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c2+c3*x)*x2 + ((c0+c1*x) + c4*x4);
- return mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0) + c4*x4);
+ return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c4*x4);
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_4n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
// calculates polynomial 1*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c2+c3*x)*x2 + ((c0+c1*x) + x4);
- return mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0) + x4);
+ return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + x4);
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_5(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
// calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c2+c3*x)*x2 + ((c4+c5*x)*x4 + (c0+c1*x));
- return mul_add(mul_add(c3,x,c2), x2, mul_add(mul_add(c5,x,c4), x4, mul_add(c1,x,c0)));
+ return mul_add(mul_add(c3, x, c2), x2, mul_add(mul_add(c5, x, c4), x4, mul_add(c1, x, c0)));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_5n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
// calculates polynomial 1*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c2+c3*x)*x2 + ((c4+x)*x4 + (c0+c1*x));
- return mul_add( mul_add(c3,x,c2), x2, mul_add(c4+x,x4,mul_add(c1,x,c0)) );
+ return mul_add(mul_add(c3, x, c2), x2, mul_add(c4 + x, x4, mul_add(c1, x, c0)));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_6(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6) {
// calculates polynomial c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c4+c5*x+c6*x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(c6,x2,mul_add(c5,x,c4)), x4, mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0)));
+ return mul_add(mul_add(c6, x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_6n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
// calculates polynomial 1*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return (c4+c5*x+x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(c5,x,c4+x2), x4, mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0)));
+ return mul_add(mul_add(c5, x, c4 + x2), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_7(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7) {
// calculates polynomial c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
//return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(mul_add(c7,x,c6), x2, mul_add(c5,x,c4)), x4, mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0)));
+ return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_8(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8) {
// calculates polynomial c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
@@ -247,11 +250,11 @@ static inline VTYPE polynomial_8(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2,
VTYPE x4 = x2 * x2;
VTYPE x8 = x4 * x4;
//return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8*x8 + (c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(mul_add(c7,x,c6), x2, mul_add(c5,x,c4)), x4,
- mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0)+c8*x8));
+ return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
+ mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c8*x8));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_9(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9) {
// calculates polynomial c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
@@ -259,12 +262,12 @@ static inline VTYPE polynomial_9(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2,
VTYPE x4 = x2 * x2;
VTYPE x8 = x4 * x4;
//return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(c9,x,c8), x8, mul_add(
- mul_add(mul_add(c7,x,c6), x2, mul_add(c5,x,c4)), x4,
- mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0))));
+ return mul_add(mul_add(c9, x, c8), x8, mul_add(
+ mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
+ mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_10(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10) {
// calculates polynomial c10*x^10 + c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
@@ -272,29 +275,29 @@ static inline VTYPE polynomial_10(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2,
VTYPE x4 = x2 * x2;
VTYPE x8 = x4 * x4;
//return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x+c10*x2)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
- return mul_add(mul_add(x2,c10,mul_add(c9,x,c8)), x8,
- mul_add(mul_add(mul_add(c7,x,c6),x2,mul_add(c5,x,c4)), x4,
- mul_add(mul_add(c3,x,c2),x2,mul_add(c1,x,c0))));
-}
+ return mul_add(mul_add(x2, c10, mul_add(c9, x, c8)), x8,
+ mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
+ mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
+}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_13(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
// calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
// VTYPE may be a vector type, CTYPE is a scalar type
VTYPE x2 = x * x;
VTYPE x4 = x2 * x2;
VTYPE x8 = x4 * x4;
- return mul_add(
- mul_add(
- mul_add(c13,x,c12), x4,
- mul_add(mul_add(c11,x,c10), x2, mul_add(c9,x,c8))), x8,
- mul_add(
- mul_add(mul_add(c7,x,c6), x2, mul_add(c5,x,c4)), x4,
- mul_add(mul_add(c3,x,c2), x2, mul_add(c1,x,c0))));
+ return mul_add(
+ mul_add(
+ mul_add(c13, x, c12), x4,
+ mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
+ mul_add(
+ mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
+ mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
}
-template<class VTYPE, class CTYPE>
+template<class VTYPE, class CTYPE>
static inline VTYPE polynomial_13m(VTYPE const & x, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
// calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0
// VTYPE may be a vector type, CTYPE is a scalar type
@@ -303,8 +306,12 @@ static inline VTYPE polynomial_13m(VTYPE const & x, CTYPE c2, CTYPE c3, CTYPE c4
VTYPE x8 = x4 * x4;
// return ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
return mul_add(
- mul_add(mul_add(c13,x,c12), x4, mul_add(mul_add(c11,x,c10), x2, mul_add(c9,x,c8))), x8,
- mul_add( mul_add(mul_add(c7,x,c6), x2, mul_add(c5,x,c4)), x4, mul_add(mul_add(c3,x,c2),x2,x)));
+ mul_add(mul_add(c13, x, c12), x4, mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
+ mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, x)));
}
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif
diff --git a/vectorclass/vectormath_exp.h b/vectorclass/vectormath_exp.h
index 465ada8..66a90ea 100755
--- a/vectorclass/vectormath_exp.h
+++ b/vectorclass/vectormath_exp.h
@@ -1,8 +1,8 @@
/**************************** vectormath_exp.h ******************************
* Author: Agner Fog
* Date created: 2014-04-18
-* Last modified: 2015-02-10
-* Version: 1.16
+* Last modified: 2016-04-26
+* Version: 1.22
* Project: vector classes
* Description:
* Header file containing inline vector functions of logarithms, exponential
@@ -25,7 +25,7 @@
*
* For detailed instructions, see vectormath_common.h and VectorClass.pdf
*
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
******************************************************************************/
#ifndef VECTORMATH_EXP_H
@@ -33,6 +33,9 @@
#include "vectormath_common.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/******************************************************************************
* Exponential functions
@@ -1329,7 +1332,7 @@ static inline VTYPE pow_template_d(VTYPE const & x0, VTYPE const & y) {
z = select(xfinite, z, select(y == 0., VTYPE(1.), select(y < 0., VTYPE(0.), infinite_vec<VTYPE>() | ( VTYPE(reinterpret_d(yodd)) & x0))));
z = select(is_nan(x0), select(is_nan(y), x0 | y, x0), select(is_nan(y), y, z));
return z;
-};
+}
//This template is in vectorf128.h to prevent implicit conversion of float y to int when float version is not defined:
@@ -1615,7 +1618,7 @@ public:
// but we can check a even anyway at no cost to be sure)
if (a == 0) return 1.f;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, float(double(a)/double(b)));
+ y = pow(y, float(double(a)/double(b)));
if (a & b & 1) y = sign_combine(y, x); // apply sign if a and b both odd
if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y); // zero allowed for positive a and b
return y;
@@ -1624,7 +1627,7 @@ public:
Vec2d y = x;
if (a == 0) return 1.;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, double((long double)a/(long double)b));
+ y = pow(y, double((long double)a/(long double)b));
if (a & b & 1) y = sign_combine(y, x);
if ((a ^ b) >= 0) y = select(x == 0., 0., y);
return y;
@@ -1634,7 +1637,7 @@ public:
Vec8f y = x;
if (a == 0) return 1.f;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, float(double(a)/double(b)));
+ y = pow(y, float(double(a)/double(b)));
if (a & b & 1) y = sign_combine(y, x);
if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y);
return y;
@@ -1643,7 +1646,7 @@ public:
Vec4d y = x;
if (a == 0) return 1.;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, double((long double)a/(long double)b));
+ y = pow(y, double((long double)a/(long double)b));
if (a & b & 1) y = sign_combine(y, x);
if ((a ^ b) >= 0) y = select(x == 0., 0., y);
return y;
@@ -1654,7 +1657,7 @@ public:
Vec16f y = x;
if (a == 0) return 1.f;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, float(double(a)/double(b)));
+ y = pow(y, float(double(a)/double(b)));
if (a & b & 1) y = sign_combine(y, x);
if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y);
return y;
@@ -1663,7 +1666,7 @@ public:
Vec8d y = x;
if (a == 0) return 1.;
if ((b | ~a) & 1) y = abs(y);
- y = ::pow(y, double((long double)a/(long double)b));
+ y = pow(y, double((long double)a/(long double)b));
if (a & b & 1) y = sign_combine(y, x);
if ((a ^ b) >= 0) y = select(x == 0., 0., y);
return y;
@@ -1992,4 +1995,8 @@ static inline Vec8q nan_code(Vec8d const & x) {
#endif // MAX_VECTOR_SIZE >= 512
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORMATH_EXP_H
diff --git a/vectorclass/vectormath_hyp.h b/vectorclass/vectormath_hyp.h
index 948269b..adc8306 100755
--- a/vectorclass/vectormath_hyp.h
+++ b/vectorclass/vectormath_hyp.h
@@ -24,7 +24,7 @@
*
* For detailed instructions, see vectormath_common.h and VectorClass.pdf
*
-* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
******************************************************************************/
#ifndef VECTORMATH_HYP_H
@@ -32,6 +32,9 @@
#include "vectormath_exp.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
/******************************************************************************
* Hyperbolic functions
@@ -733,4 +736,8 @@ static inline Vec16f atanh(Vec16f const & x) {
}
#endif // MAX_VECTOR_SIZE >= 512
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif
diff --git a/vectorclass/vectormath_lib.h b/vectorclass/vectormath_lib.h
index edea799..675769b 100755
--- a/vectorclass/vectormath_lib.h
+++ b/vectorclass/vectormath_lib.h
@@ -1,23 +1,23 @@
/**************************** vectormath_lib.h *****************************
-| Author: Agner Fog
-| Date created: 2012-05-30
-* Last modified: 2014-04-23
-| Version: 1.16
-| Project: vector classes
-| Description:
-| Header file defining mathematical functions on floating point vectors
-| May use Intel SVML library or AMD LIBM library
-|
-| Instructions:
-| Define VECTORMATH to one of the following values:
-| 0: Use ordinary math library (slow)
-| 1: Use AMD LIBM library
-| 2: Use Intel SVML library with any compiler
-| 3: Use Intel SVML library with Intel compiler
-|
-| For detailed instructions, see VectorClass.pdf
-|
-| (c) Copyright 2012-2014 GNU General Public License http://www.gnu.org/licenses
+* Author: Agner Fog
+* Date created: 2012-05-30
+* Last modified: 2016-04-26
+* Version: 1.22
+* Project: vector classes
+* Description:
+* Header file defining mathematical functions on floating point vectors
+* May use Intel SVML library or AMD LIBM library
+*
+* Instructions:
+* Define VECTORMATH to one of the following values:
+* 0: Use ordinary math library (slow)
+* 1: Use AMD LIBM library
+* 2: Use Intel SVML library with any compiler
+* 3: Use Intel SVML library with Intel compiler
+*
+* For detailed instructions, see VectorClass.pdf
+*
+* (c) Copyright 2012-2016 GNU General Public License http://www.gnu.org/licenses
\*****************************************************************************/
// check combination of header files
@@ -34,13 +34,18 @@
#endif // __INTEL_COMPILER
#endif // VECTORMATH
+#include <math.h>
+
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+
/*****************************************************************************
*
* VECTORMATH = 0. Use ordinary library (scalar)
*
*****************************************************************************/
#if VECTORMATH == 0
-#include <math.h>
#ifndef VECTORMATH_COMMON_H
// exponential and power functions
@@ -52,7 +57,7 @@ static inline Vec4f exp (Vec4f const & x) {
static inline Vec2d exp (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(exp(xx[0]), exp(xx[1]));
+ return Vec2d(::exp(xx[0]), ::exp(xx[1]));
}
// There is no certain way to know which functions are available, but at least some (Gnu)
@@ -105,30 +110,30 @@ static inline Vec4f pow (Vec4f const & a, Vec4f const & b) {
static inline Vec2d pow (Vec2d const & a, Vec2d const & b) {
double aa[4], bb[4];
a.store(aa); b.store(bb);
- return Vec2d(pow(aa[0],bb[0]), pow(aa[1],bb[1]));
+ return Vec2d(::pow(aa[0],bb[0]), ::pow(aa[1],bb[1]));
}
static inline Vec4f log (Vec4f const & x) {
float xx[4];
x.store(xx);
- return Vec4f(log(xx[0]), log(xx[1]), log(xx[2]), log(xx[3]));
+ return Vec4f(logf(xx[0]), logf(xx[1]), logf(xx[2]), logf(xx[3]));
}
static inline Vec2d log (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(log(xx[0]), log(xx[1]));
+ return Vec2d(::log(xx[0]), ::log(xx[1]));
}
#ifdef HAVE_LOG1P
static inline Vec4f log1p (Vec4f const & x) {
float xx[4];
x.store(xx);
- return Vec4f(log1p(xx[0]), log1p(xx[1]), log1p(xx[2]), log1p(xx[3]));
+ return Vec4f(::log1p(xx[0]), ::log1p(xx[1]), ::log1p(xx[2]), ::log1p(xx[3]));
}
static inline Vec2d log1p (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(log1p(xx[0]), log1p(xx[1]));
+ return Vec2d(::log1p(xx[0]), ::log1p(xx[1]));
}
#endif
@@ -147,7 +152,7 @@ static inline Vec4f log10 (Vec4f const & x) { // logarithm base 10
static inline Vec2d log10 (Vec2d const & x) { // logarithm base 10
double xx[4];
x.store(xx);
- return Vec2d(log10(xx[0]), log10(xx[1]));
+ return Vec2d(::log10(xx[0]), ::log10(xx[1]));
}
// trigonometric functions
@@ -159,7 +164,7 @@ static inline Vec4f sin(Vec4f const & x) {
static inline Vec2d sin (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(sin(xx[0]), sin(xx[1]));
+ return Vec2d(::sin(xx[0]), ::sin(xx[1]));
}
static inline Vec4f cos(Vec4f const & x) {
@@ -170,7 +175,7 @@ static inline Vec4f cos(Vec4f const & x) {
static inline Vec2d cos (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(cos(xx[0]), cos(xx[1]));
+ return Vec2d(::cos(xx[0]), ::cos(xx[1]));
}
static inline Vec4f sincos (Vec4f * pcos, Vec4f const & x) { // sine and cosine. sin(x) returned, cos(x) in pcos
@@ -190,7 +195,7 @@ static inline Vec4f tan(Vec4f const & x) {
static inline Vec2d tan (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(tan(xx[0]), tan(xx[1]));
+ return Vec2d(::tan(xx[0]), ::tan(xx[1]));
}
// inverse trigonometric functions
@@ -202,7 +207,7 @@ static inline Vec4f asin(Vec4f const & x) {
static inline Vec2d asin (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(asin(xx[0]), asin(xx[1]));
+ return Vec2d(::asin(xx[0]), ::asin(xx[1]));
}
static inline Vec4f acos(Vec4f const & x) {
@@ -213,7 +218,7 @@ static inline Vec4f acos(Vec4f const & x) {
static inline Vec2d acos (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(acos(xx[0]), acos(xx[1]));
+ return Vec2d(::acos(xx[0]), ::acos(xx[1]));
}
static inline Vec4f atan(Vec4f const & x) {
@@ -224,7 +229,7 @@ static inline Vec4f atan(Vec4f const & x) {
static inline Vec2d atan (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(atan(xx[0]), atan(xx[1]));
+ return Vec2d(::atan(xx[0]), ::atan(xx[1]));
}
static inline Vec4f atan2 (Vec4f const & a, Vec4f const & b) { // inverse tangent of a/b
@@ -235,7 +240,7 @@ static inline Vec4f atan2 (Vec4f const & a, Vec4f const & b) { // inverse tang
static inline Vec2d atan2 (Vec2d const & a, Vec2d const & b) { // inverse tangent of a/b
double aa[4], bb[4];
a.store(aa); b.store(bb);
- return Vec2d(atan2(aa[0],bb[0]), atan2(aa[1],bb[1]));
+ return Vec2d(::atan2(aa[0],bb[0]), ::atan2(aa[1],bb[1]));
}
#endif // VECTORMATH_COMMON_H
@@ -248,7 +253,7 @@ static inline Vec4f sinh(Vec4f const & x) { // hyperbolic sine
static inline Vec2d sinh (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(sinh(xx[0]), sinh(xx[1]));
+ return Vec2d(::sinh(xx[0]), ::sinh(xx[1]));
}
static inline Vec4f cosh(Vec4f const & x) { // hyperbolic cosine
@@ -259,7 +264,7 @@ static inline Vec4f cosh(Vec4f const & x) { // hyperbolic cosine
static inline Vec2d cosh (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(cosh(xx[0]), cosh(xx[1]));
+ return Vec2d(::cosh(xx[0]), ::cosh(xx[1]));
}
static inline Vec4f tanh(Vec4f const & x) { // hyperbolic tangent
@@ -270,7 +275,7 @@ static inline Vec4f tanh(Vec4f const & x) { // hyperbolic tangent
static inline Vec2d tanh (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(tanh(xx[0]), tanh(xx[1]));
+ return Vec2d(::tanh(xx[0]), ::tanh(xx[1]));
}
// error function
@@ -278,12 +283,12 @@ static inline Vec2d tanh (Vec2d const & x) {
static inline Vec4f erf(Vec4f const & x) {
float xx[4];
x.store(xx);
- return Vec4f(erf(xx[0]), erf(xx[1]), erf(xx[2]), erf(xx[3]));
+ return Vec4f(::erf(xx[0]), ::erf(xx[1]), ::erf(xx[2]), ::erf(xx[3]));
}
static inline Vec2d erf (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(erf(xx[0]), erf(xx[1]));
+ return Vec2d(::erf(xx[0]), ::erf(xx[1]));
}
#endif
@@ -291,12 +296,12 @@ static inline Vec2d erf (Vec2d const & x) {
static inline Vec4f erfc(Vec4f const & x) {
float xx[4];
x.store(xx);
- return Vec4f(erfc(xx[0]), erfc(xx[1]), erfc(xx[2]), erfc(xx[3]));
+ return Vec4f(::erfc(xx[0]), ::erfc(xx[1]), ::erfc(xx[2]), ::erfc(xx[3]));
}
static inline Vec2d erfc (Vec2d const & x) {
double xx[4];
x.store(xx);
- return Vec2d(erfc(xx[0]), erfc(xx[1]));
+ return Vec2d(::erfc(xx[0]), ::erfc(xx[1]));
}
#endif
@@ -312,8 +317,8 @@ static inline Vec4f cexp (Vec4f const & x) { // complex exponential function
static inline Vec2d cexp (Vec2d const & x) { // complex exponential function
double xx[2];
x.store(xx);
- Vec2d z(cos(xx[1]), sin(xx[1]));
- return z * exp(xx[0]);
+ Vec2d z(::cos(xx[1]), ::sin(xx[1]));
+ return z * ::exp(xx[0]);
}
#if defined (VECTORF256_H) // 256 bit vectors defined
@@ -440,11 +445,11 @@ static inline Vec4d atan (Vec4d const & x) { // inverse tangent
return Vec4d(atan(x.get_low()), atan(x.get_high()));
}
-static inline Vec8f atan (Vec8f const & a, Vec8f const & b) { // inverse tangent of a/b
- return Vec8f(atan(a.get_low(),b.get_low()), atan(a.get_high(),b.get_high()));
+static inline Vec8f atan2 (Vec8f const & a, Vec8f const & b) { // inverse tangent of a/b
+ return Vec8f(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}
-static inline Vec4d atan (Vec4d const & a, Vec4d const & b) { // inverse tangent of a/b
- return Vec4d(atan(a.get_low(),b.get_low()), atan(a.get_high(),b.get_high()));
+static inline Vec4d atan2 (Vec4d const & a, Vec4d const & b) { // inverse tangent of a/b
+ return Vec4d(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}
#endif // VECTORMATH_COMMON_H
@@ -2104,4 +2109,8 @@ static inline Vec4d cexp (Vec4d const & x) { // complex exponential function
#endif // VECTORF256_H == 1
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif // VECTORMATH_LIB_H
diff --git a/vectorclass/vectormath_trig.h b/vectorclass/vectormath_trig.h
index 986d2e4..11ab7e3 100755
--- a/vectorclass/vectormath_trig.h
+++ b/vectorclass/vectormath_trig.h
@@ -1,8 +1,8 @@
/**************************** vectormath_trig.h ******************************
* Author: Agner Fog
* Date created: 2014-04-18
-* Last modified: 2015-02-10
-* Version: 1.16
+* Last modified: 2016-05-02
+* Version: 1.22
* Project: vector classes
* Description:
* Header file containing inline version of trigonometric functions
@@ -20,7 +20,7 @@
*
* For detailed instructions, see vectormath_common.h and VectorClass.pdf
*
-* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
******************************************************************************/
#ifndef VECTORMATH_TRIG_H
@@ -28,6 +28,10 @@
#include "vectormath_common.h"
+#ifdef VCL_NAMESPACE
+namespace VCL_NAMESPACE {
+#endif
+
// Different overloaded functions for template resolution.
// These are used to fix the problem that the quadrant index uses
// a vector of 32-bit integers which doesn't fit the size of the
@@ -40,7 +44,7 @@
// define overloaded truncate functions
static inline Vec4i vm_truncate_low_to_int(Vec2d const & x) {
- return truncate_to_int(x,x);
+ return truncate_to_int(x, x);
}
#if MAX_VECTOR_SIZE >= 256
@@ -85,21 +89,21 @@ template<class ITYPE, class ITYPEH>
static inline ITYPE vm_half_int_vector_to_full(ITYPEH const & x);
template<>
-inline Vec2q vm_half_int_vector_to_full<Vec2q,Vec4i>(Vec4i const & x) {
+inline Vec2q vm_half_int_vector_to_full<Vec2q, Vec4i>(Vec4i const & x) {
return extend_low(x);
}
#if MAX_VECTOR_SIZE >= 256
template<>
-inline Vec4q vm_half_int_vector_to_full<Vec4q,Vec4i>(Vec4i const & x) {
- return extend_low(Vec8i(x,x));
+inline Vec4q vm_half_int_vector_to_full<Vec4q, Vec4i>(Vec4i const & x) {
+ return extend_low(Vec8i(x, x));
}
#endif // MAX_VECTOR_SIZE >= 256
#if MAX_VECTOR_SIZE >= 512
template<>
-inline Vec8q vm_half_int_vector_to_full<Vec8q,Vec8i>(Vec8i const & x) {
- return extend_low(Vec16i(x,x));
+inline Vec8q vm_half_int_vector_to_full<Vec8q, Vec8i>(Vec8i const & x) {
+ return extend_low(Vec16i(x, x));
}
#endif // MAX_VECTOR_SIZE >= 512
@@ -117,25 +121,25 @@ inline Vec8q vm_half_int_vector_to_full<Vec8q,Vec8i>(Vec8i const & x) {
// Paramterers:
// xx = input x (radians)
// cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE, int SC>
+template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE, int SC>
static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const & xx) {
// define constants
- const double ONEOPIO4 = 4./VM_PI;
+ const double ONEOPIO4 = 4. / VM_PI;
- const double P0sin =-1.66666666666666307295E-1;
+ const double P0sin = -1.66666666666666307295E-1;
const double P1sin = 8.33333333332211858878E-3;
- const double P2sin =-1.98412698295895385996E-4;
+ const double P2sin = -1.98412698295895385996E-4;
const double P3sin = 2.75573136213857245213E-6;
- const double P4sin =-2.50507477628578072866E-8;
+ const double P4sin = -2.50507477628578072866E-8;
const double P5sin = 1.58962301576546568060E-10;
const double P0cos = 4.16666666666665929218E-2;
- const double P1cos =-1.38888888888730564116E-3;
+ const double P1cos = -1.38888888888730564116E-3;
const double P2cos = 2.48015872888517045348E-5;
- const double P3cos =-2.75573141792967388112E-7;
+ const double P3cos = -2.75573141792967388112E-7;
const double P4cos = 2.08757008419747316778E-9;
- const double P5cos =-1.13585365213876817300E-11;
+ const double P5cos = -1.13585365213876817300E-11;
const double DP1 = 7.853981554508209228515625E-1;
const double DP2 = 7.94662735614792836714E-9;
@@ -176,7 +180,7 @@ static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const & xx) {
c = mul_add(x2 * x2, c, nmul_add(x2, 0.5, 1.0)); // c = 1.0 - x2 * 0.5 + (x2 * x2) * c;
// correct for quadrant
- qq = vm_half_int_vector_to_full<ITYPE,ITYPEH>(q);
+ qq = vm_half_int_vector_to_full<ITYPE, ITYPEH>(q);
swap = BVTYPE((qq & 2) != 0);
// check for overflow
@@ -256,23 +260,23 @@ static inline Vec8d sincos(Vec8d * cosret, Vec8d const & x) {
// Paramterers:
// xx = input x (radians)
// cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class BVTYPE, int SC>
+template<class VTYPE, class ITYPE, class BVTYPE, int SC>
static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
// define constants
- const float ONEOPIO4f = (float)(4./VM_PI);
+ const float ONEOPIO4f = (float)(4. / VM_PI);
const float DP1F = 0.78515625f;
const float DP2F = 2.4187564849853515625E-4f;
- const float DP3F = 3.77489497744594108E-8f;
+ const float DP3F = 3.77489497744594108E-8f;
const float P0sinf = -1.6666654611E-1f;
- const float P1sinf = 8.3321608736E-3f;
+ const float P1sinf = 8.3321608736E-3f;
const float P2sinf = -1.9515295891E-4f;
- const float P0cosf = 4.166664568298827E-2f;
+ const float P0cosf = 4.166664568298827E-2f;
const float P1cosf = -1.388731625493765E-3f;
- const float P2cosf = 2.443315711809948E-5f;
+ const float P2cosf = 2.443315711809948E-5f;
VTYPE xa, x, y, x2, s, c, sin1, cos1; // data vectors
ITYPE q, signsin, signcos; // integer vectors
@@ -299,7 +303,7 @@ static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
// Taylor expansion of sin and cos, valid for -pi/4 <= x <= pi/4
x2 = x * x;
- s = polynomial_2(x2, P0sinf, P1sinf, P2sinf) * (x*x2) + x;
+ s = polynomial_2(x2, P0sinf, P1sinf, P2sinf) * (x*x2) + x;
c = polynomial_2(x2, P0cosf, P1cosf, P2cosf) * (x2*x2) + nmul_add(0.5f, x2, 1.0f);
// correct for quadrant
@@ -322,7 +326,7 @@ static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
signcos = ((q + 2) << 29) & (1 << 31);
cos1 ^= reinterpret_f(signcos);
}
- if (SC == 1) return sin1;
+ if (SC == 1) return sin1;
else if (SC == 2) return cos1;
else if (SC == 3) { // calculate both. cos returned through pointer
*cosret = cos1;
@@ -396,19 +400,19 @@ static inline Vec16f tan(Vec16f const & x) {
// BVTYPE: boolean vector type
// Paramterers:
// x = input x (radians)
-template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE>
+template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE>
static inline VTYPE tan_d(VTYPE const & x) {
// define constants
- const double ONEOPIO4 = 4./VM_PI;
+ const double ONEOPIO4 = 4. / VM_PI;
const double DP1 = 7.853981554508209228515625E-1;
const double DP2 = 7.94662735614792836714E-9;
const double DP3 = 3.06161699786838294307E-17;
- const double P2tan=-1.30936939181383777646E4;
- const double P1tan=1.15351664838587416140E6;
- const double P0tan=-1.79565251976484877988E7;
+ const double P2tan = -1.30936939181383777646E4;
+ const double P1tan = 1.15351664838587416140E6;
+ const double P0tan = -1.79565251976484877988E7;
const double Q3tan = 1.36812963470692954678E4;
const double Q2tan = -1.32089234440210967447E6;
@@ -439,14 +443,14 @@ static inline VTYPE tan_d(VTYPE const & x) {
// Pade expansion of tan, valid for -pi/4 <= x <= pi/4
zz = z * z;
- px = polynomial_2 (zz, P0tan, P1tan, P2tan);
+ px = polynomial_2(zz, P0tan, P1tan, P2tan);
qx = polynomial_4n(zz, Q0tan, Q1tan, Q2tan, Q3tan);
// qx cannot be 0 for x <= pi/4
tn = mul_add(px / qx, z * zz, z); // tn = z + z * zz * px / qx;
// if (q&2) tn = -1/tn
- qq = vm_half_int_vector_to_full<ITYPE,ITYPEH>(q);
+ qq = vm_half_int_vector_to_full<ITYPE, ITYPEH>(q);
doinvert = BVTYPE((qq & 2) != 0);
xzero = (xa == 0.);
// avoid division by 0. We will not be using recip anyway if xa == 0.
@@ -472,20 +476,20 @@ static inline Vec2d tan(Vec2d const & x) {
}
#if MAX_VECTOR_SIZE >= 256
-static inline Vec4d tan(Vec4d const & x) {
+static inline Vec4d tan(Vec4d const & x) {
return tan_d<Vec4d, Vec4q, Vec4i, Vec4db>(x);
}
#endif // MAX_VECTOR_SIZE >= 256
#if MAX_VECTOR_SIZE >= 512
-static inline Vec8d tan(Vec8d const & x) {
+static inline Vec8d tan(Vec8d const & x) {
return tan_d<Vec8d, Vec8q, Vec8i, Vec8db>(x);
}
#endif // MAX_VECTOR_SIZE >= 512
/*
-This is removed for the single precision version.
+This is removed for the single precision version.
It is faster to use tan(x) = sin(x)/cos(x)
// *************************************************************
@@ -498,7 +502,7 @@ It is faster to use tan(x) = sin(x)/cos(x)
// Paramterers:
// x = input x (radians)
// cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class BVTYPE>
+template<class VTYPE, class ITYPE, class BVTYPE>
static inline VTYPE tan_f(VTYPE const & x) {
// define constants
@@ -544,7 +548,7 @@ static inline VTYPE tan_f(VTYPE const & x) {
doinvert = (q & 2) != 0;
xzero = (xa == 0.f);
// avoid division by 0. We will not be using recip anyway if xa == 0.
- // tn never becomes exactly 0 when x = pi/2 so we only have to make
+ // tn never becomes exactly 0 when x = pi/2 so we only have to make
// a special case for x == 0.
recip = (-1.f) / select(xzero, VTYPE(-1.f), tn);
tn = select(doinvert, recip, tn);
@@ -557,7 +561,7 @@ static inline VTYPE tan_f(VTYPE const & x) {
static inline Vec4f tan(Vec4f const & x) {
return tan_f<Vec4f, Vec4i, Vec4fb>(x);
-}
+}
static inline Vec8f tan(Vec8f const & x) {
return tan_f<Vec8f, Vec8i, Vec8fb>(x);
@@ -573,39 +577,39 @@ static inline Vec8f tan(Vec8f const & x) {
// AC: 0 = asin, 1 = acos
// Paramterers:
// x = input x
-template<class VTYPE, class BVTYPE, int AC>
+template<class VTYPE, class BVTYPE, int AC>
static inline VTYPE asin_d(VTYPE const & x) {
// define constants
- const double R4asin = 2.967721961301243206100E-3;
+ const double R4asin = 2.967721961301243206100E-3;
const double R3asin = -5.634242780008963776856E-1;
- const double R2asin = 6.968710824104713396794E0;
+ const double R2asin = 6.968710824104713396794E0;
const double R1asin = -2.556901049652824852289E1;
- const double R0asin = 2.853665548261061424989E1;
+ const double R0asin = 2.853665548261061424989E1;
const double S3asin = -2.194779531642920639778E1;
- const double S2asin = 1.470656354026814941758E2;
+ const double S2asin = 1.470656354026814941758E2;
const double S1asin = -3.838770957603691357202E2;
- const double S0asin = 3.424398657913078477438E2;
+ const double S0asin = 3.424398657913078477438E2;
- const double P5asin = 4.253011369004428248960E-3;
+ const double P5asin = 4.253011369004428248960E-3;
const double P4asin = -6.019598008014123785661E-1;
- const double P3asin = 5.444622390564711410273E0;
+ const double P3asin = 5.444622390564711410273E0;
const double P2asin = -1.626247967210700244449E1;
- const double P1asin = 1.956261983317594739197E1;
+ const double P1asin = 1.956261983317594739197E1;
const double P0asin = -8.198089802484824371615E0;
const double Q4asin = -1.474091372988853791896E1;
- const double Q3asin = 7.049610280856842141659E1;
+ const double Q3asin = 7.049610280856842141659E1;
const double Q2asin = -1.471791292232726029859E2;
- const double Q1asin = 1.395105614657485689735E2;
+ const double Q1asin = 1.395105614657485689735E2;
const double Q0asin = -4.918853881490881290097E1;
VTYPE xa, xb, x1, x2, x3, x4, x5, px, qx, rx, sx, vx, wx, y1, yb, z, z1, z2;
BVTYPE big;
bool dobig, dosmall;
- xa = abs(x);
+ xa = abs(x);
big = xa >= 0.625;
/*
@@ -614,7 +618,7 @@ static inline VTYPE asin_d(VTYPE const & x) {
x = xa * xa;
px = PX(x);
qx = QX(x);
- y1 = x*px/qx;
+ y1 = x*px/qx;
y1 = xa * y1 + xa;
Big: xa >= 0.625
@@ -630,7 +634,7 @@ static inline VTYPE asin_d(VTYPE const & x) {
// select a common x for all polynomials
// This allows sharing of powers of x through common subexpression elimination
- x1 = select(big, 1.0 - xa, xa * xa);
+ x1 = select(big, 1.0 - xa, xa * xa);
// calculate powers of x1 outside branches to make sure they are only calculated once
x2 = x1 * x1;
@@ -639,30 +643,30 @@ static inline VTYPE asin_d(VTYPE const & x) {
x3 = x2 * x1;
dosmall = !horizontal_and(big); // at least one element is small
- dobig = horizontal_or(big) ; // at least one element is big
+ dobig = horizontal_or(big); // at least one element is big
// calculate polynomials (reuse powers of x)
if (dosmall) {
// px = polynomial_5 (x1, P0asin, P1asin, P2asin, P3asin, P4asin, P5asin);
// qx = polynomial_5n(x1, Q0asin, Q1asin, Q2asin, Q3asin, Q4asin);
- px = mul_add(x3,P3asin,P0asin) + mul_add(x4,P4asin,x1*P1asin) + mul_add(x5,P5asin,x2*P2asin);
- qx = mul_add(x4,Q4asin,x5) + mul_add(x3,Q3asin,x1*Q1asin) + mul_add(x2,Q2asin,Q0asin);
+ px = mul_add(x3, P3asin, P0asin) + mul_add(x4, P4asin, x1*P1asin) + mul_add(x5, P5asin, x2*P2asin);
+ qx = mul_add(x4, Q4asin, x5) + mul_add(x3, Q3asin, x1*Q1asin) + mul_add(x2, Q2asin, Q0asin);
}
if (dobig) {
// rx = polynomial_4 (x1, R0asin, R1asin, R2asin, R3asin, R4asin);
// sx = polynomial_4n(x1, S0asin, S1asin, S2asin, S3asin);
- rx = mul_add(x3,R3asin,x2*R2asin) + mul_add(x4,R4asin,mul_add(x1,R1asin,R0asin));
- sx = mul_add(x3,S3asin,x4) + mul_add(x2,S2asin,mul_add(x1,S1asin,S0asin));
+ rx = mul_add(x3, R3asin, x2*R2asin) + mul_add(x4, R4asin, mul_add(x1, R1asin, R0asin));
+ sx = mul_add(x3, S3asin, x4) + mul_add(x2, S2asin, mul_add(x1, S1asin, S0asin));
}
// select and divide outside branches to avoid dividing twice
vx = select(big, rx, px);
wx = select(big, sx, qx);
- y1 = vx/wx * x1;
+ y1 = vx / wx * x1;
// results for big
if (dobig) { // avoid square root if all are small
- xb = sqrt(x1+x1); // this produces NAN if xa > 1 so we don't need a special case for xa > 1
+ xb = sqrt(x1 + x1); // this produces NAN if xa > 1 so we don't need a special case for xa > 1
z1 = mul_add(xb, y1, xb); // yb = xb * y1; z1 = xb + yb;
}
@@ -694,21 +698,21 @@ static inline Vec2d acos(Vec2d const & x) {
}
#if MAX_VECTOR_SIZE >= 256
-static inline Vec4d asin(Vec4d const & x) {
+static inline Vec4d asin(Vec4d const & x) {
return asin_d<Vec4d, Vec4db, 0>(x);
}
-static inline Vec4d acos(Vec4d const & x) {
+static inline Vec4d acos(Vec4d const & x) {
return asin_d<Vec4d, Vec4db, 1>(x);
}
#endif // MAX_VECTOR_SIZE >= 256
#if MAX_VECTOR_SIZE >= 512
-static inline Vec8d asin(Vec8d const & x) {
+static inline Vec8d asin(Vec8d const & x) {
return asin_d<Vec8d, Vec8db, 0>(x);
}
-static inline Vec8d acos(Vec8d const & x) {
+static inline Vec8d acos(Vec8d const & x) {
return asin_d<Vec8d, Vec8db, 1>(x);
}
#endif // MAX_VECTOR_SIZE >= 512
@@ -723,7 +727,7 @@ static inline Vec8d acos(Vec8d const & x) {
// AC: 0 = asin, 1 = acos
// Paramterers:
// x = input x
-template<class VTYPE, class BVTYPE, int AC>
+template<class VTYPE, class BVTYPE, int AC>
static inline VTYPE asin_f(VTYPE const & x) {
// define constants
@@ -736,11 +740,11 @@ static inline VTYPE asin_f(VTYPE const & x) {
VTYPE xa, x1, x2, x3, x4, xb, z, z1, z2;
BVTYPE big;
- xa = abs(x);
+ xa = abs(x);
big = xa > 0.5f;
x1 = 0.5f * (1.0f - xa);
- x2 = xa * xa;
+ x2 = xa * xa;
x3 = select(big, x1, x2);
//if (horizontal_or(big))
@@ -757,12 +761,12 @@ static inline VTYPE asin_f(VTYPE const & x) {
if (AC) { // acos
z1 = select(x < 0., float(VM_PI) - z1, z1);
z2 = float(VM_PI_2) - sign_combine(z, x);
- z = select(big, z1, z2);
+ z = select(big, z1, z2);
}
else { // asin
z1 = float(VM_PI_2) - z1;
- z = select(big, z1, z);
- z = sign_combine(z, x);
+ z = select(big, z1, z);
+ z = sign_combine(z, x);
}
return z;
@@ -779,19 +783,19 @@ static inline Vec4f acos(Vec4f const & x) {
}
#if MAX_VECTOR_SIZE >= 256
-static inline Vec8f asin(Vec8f const & x) {
+static inline Vec8f asin(Vec8f const & x) {
return asin_f<Vec8f, Vec8fb, 0>(x);
}
-static inline Vec8f acos(Vec8f const & x) {
+static inline Vec8f acos(Vec8f const & x) {
return asin_f<Vec8f, Vec8fb, 1>(x);
}
#endif // MAX_VECTOR_SIZE >= 256
#if MAX_VECTOR_SIZE >= 512
-static inline Vec16f asin(Vec16f const & x) {
+static inline Vec16f asin(Vec16f const & x) {
return asin_f<Vec16f, Vec16fb, 0>(x);
}
-static inline Vec16f acos(Vec16f const & x) {
+static inline Vec16f acos(Vec16f const & x) {
return asin_f<Vec16f, Vec16fb, 1>(x);
}
#endif // MAX_VECTOR_SIZE >= 512
@@ -809,7 +813,7 @@ static inline Vec16f acos(Vec16f const & x) {
// result is between -pi/2 and +pi/2 when x > 0
// result is between -pi and -pi/2 or between pi/2 and pi when x < 0 for atan2
// atan2(0,0) gives NAN. Future versions may give 0
-template<class VTYPE, class BVTYPE, int T2>
+template<class VTYPE, class BVTYPE, int T2>
static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
// define constants
@@ -818,17 +822,17 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
const double MOREBITSO2 = MOREBITS * 0.5;
const double T3PO8 = VM_SQRT2 + 1.; // 2.41421356237309504880;
- const double P4atan = -8.750608600031904122785E-1;
- const double P3atan = -1.615753718733365076637E1;
- const double P2atan = -7.500855792314704667340E1;
- const double P1atan = -1.228866684490136173410E2;
- const double P0atan = -6.485021904942025371773E1;
+ const double P4atan = -8.750608600031904122785E-1;
+ const double P3atan = -1.615753718733365076637E1;
+ const double P2atan = -7.500855792314704667340E1;
+ const double P1atan = -1.228866684490136173410E2;
+ const double P0atan = -6.485021904942025371773E1;
- const double Q4atan = 2.485846490142306297962E1;
- const double Q3atan = 1.650270098316988542046E2;
- const double Q2atan = 4.328810604912902668951E2;
- const double Q1atan = 4.853903996359136964868E2;
- const double Q0atan = 1.945506571482613964425E2;
+ const double Q4atan = 2.485846490142306297962E1;
+ const double Q3atan = 1.650270098316988542046E2;
+ const double Q2atan = 4.328810604912902668951E2;
+ const double Q1atan = 4.853903996359136964868E2;
+ const double Q0atan = 1.945506571482613964425E2;
VTYPE t, x1, x2, y1, y2, s, fac, a, b, z, zz, px, qx, re; // data vectors
BVTYPE swapxy, notbig, notsmal; // boolean vectors
@@ -840,8 +844,8 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
swapxy = (y1 > x1);
// swap x and y if y1 > x1
x2 = select(swapxy, y1, x1);
- y2 = select(swapxy, x1, y1);
- t = y2 / x2; // x = y = 0 gives NAN here
+ y2 = select(swapxy, x1, y1);
+ t = y2 / x2; // x = y = 0 gives NAN here
}
else { // atan(y)
t = abs(y);
@@ -850,7 +854,7 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
// small: t < 0.66
// medium: 0.66 <= t <= 2.4142 (1+sqrt(2))
// big: t > 2.4142
- notbig = t <= T3PO8; // t <= 2.4142
+ notbig = t <= T3PO8; // t <= 2.4142
notsmal = t >= 0.66; // t >= 0.66
s = select(notbig, VTYPE(VM_PI_4), VTYPE(VM_PI_2));
@@ -869,7 +873,7 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
zz = z * z;
- px = polynomial_4 (zz, P0atan, P1atan, P2atan, P3atan, P4atan);
+ px = polynomial_4(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
qx = polynomial_5n(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
re = mul_add(px / qx, z * zz, z); // re = (px / qx) * (z * zz) + z;
@@ -878,7 +882,7 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
if (T2) { // atan2(y,x)
// move back in place
re = select(swapxy, VM_PI_2 - re, re);
- re = select(x < 0., VM_PI - re, re);
+ re = select(x < 0., VM_PI - re, re);
re = select((x | y) == 0., 0., re); // atan2(0,0) = 0 by convention
}
// get sign bit
@@ -931,13 +935,13 @@ static inline Vec8d atan(Vec8d const & y) {
// result is between -pi/2 and +pi/2 when x > 0
// result is between -pi and -pi/2 or between pi/2 and pi when x < 0 for atan2
// atan2(0,0) gives NAN. Future versions may give 0
-template<class VTYPE, class BVTYPE, int T2>
+template<class VTYPE, class BVTYPE, int T2>
static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
// define constants
- const float P3atanf = 8.05374449538E-2f;
+ const float P3atanf = 8.05374449538E-2f;
const float P2atanf = -1.38776856032E-1f;
- const float P1atanf = 1.99777106478E-1f;
+ const float P1atanf = 1.99777106478E-1f;
const float P0atanf = -3.33329491539E-1f;
VTYPE t, x1, x2, y1, y2, s, a, b, z, zz, re; // data vectors
@@ -953,7 +957,7 @@ static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
y2 = select(swapxy, x1, y1);
// do we need to protect against x = y = 0? It will just produce NAN, probably without delay
- t = y2 / x2;
+ t = y2 / x2;
}
else { // atan(y)
t = abs(y);
@@ -963,8 +967,8 @@ static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
// medium: 0.4142 <= t <= 2.4142
// big: t > 2.4142 (not for atan2)
if (!T2) { // atan(y)
- notsmal = t >= float(VM_SQRT2-1.); // t >= tan pi/8
- notbig = t <= float(VM_SQRT2+1.); // t <= tan 3pi/8
+ notsmal = t >= float(VM_SQRT2 - 1.); // t >= tan pi/8
+ notbig = t <= float(VM_SQRT2 + 1.); // t <= tan 3pi/8
s = select(notbig, VTYPE(float(VM_PI_4)), VTYPE(float(VM_PI_2)));
s = notsmal & s; // select(notsmal, s, 0.);
@@ -981,7 +985,7 @@ static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
else { // atan2(y,x)
// small: z = t / 1.0;
// medium: z = (t-1.0) / (t+1.0);
- notsmal = t >= float(VM_SQRT2-1.);
+ notsmal = t >= float(VM_SQRT2 - 1.);
a = if_add(notsmal, t, -1.f);
b = if_add(notsmal, 1.f, t);
s = notsmal & VTYPE(float(VM_PI_4));
@@ -997,7 +1001,7 @@ static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
if (T2) { // atan2(y,x)
// move back in place
re = select(swapxy, float(VM_PI_2) - re, re);
- re = select(x < 0., float(VM_PI) - re, re);
+ re = select(x < 0., float(VM_PI) - re, re);
re = select((x | y) == 0.f, 0.f, re); // atan2(0,0) = 0 by convention
}
// get sign bit
@@ -1038,4 +1042,8 @@ static inline Vec16f atan(Vec16f const & y) {
#endif // MAX_VECTOR_SIZE >= 512
+#ifdef VCL_NAMESPACE
+}
+#endif
+
#endif
diff --git a/vectorf64.h b/vectorf64.h
new file mode 100644
index 0000000..ed8f73e
--- /dev/null
+++ b/vectorf64.h
@@ -0,0 +1,377 @@
+/*
+
+ Abstract class for 64-bit floating point numbers
+ Based on vectorclass (VCL), mainly for templating with double
+
+ @author: minh
+ @date: 2016-09-24
+
+
+*/
+
+#ifndef VECTORF64_H
+#define VECTORF64_H
+
+
+//typedef int64_t Vec1db;
+//typedef bool Vec1db;
+
+
+/*****************************************************************************
+*
+* Vec1db: Vector of 1 Booleans for use with Vec1d
+*
+*****************************************************************************/
+
+class Vec1db {
+public:
+ bool xmm; // Double vector
+ // Default constructor:
+ Vec1db() {
+ }
+ // Constructor to broadcast scalar value:
+ Vec1db(bool b) {
+ xmm = b;
+ }
+ // Assignment operator to broadcast scalar value:
+ Vec1db & operator = (bool b) {
+ *this = Vec1db(b);
+ return *this;
+ }
+private: // Prevent constructing from int, etc.
+ Vec1db(int b);
+ Vec1db & operator = (int x);
+public:
+ // Member function to change a single element in vector
+ // Note: This function is inefficient. Use load function if changing more than one element
+ Vec1db const & insert(uint32_t index, bool value) {
+ xmm = value;
+ return *this;
+ }
+ // Member function extract a single element from vector
+ bool extract(uint32_t index) const {
+ return xmm;
+ }
+ // Extract a single element. Operator [] can only read an element, not write.
+ bool operator [] (uint32_t index) const {
+ return extract(index);
+ }
+ static int size() {
+ return 1;
+ }
+};
+
+
+/*****************************************************************************
+*
+* Operators for Vec1db
+*
+*****************************************************************************/
+
+// vector operator & : bitwise and
+static inline Vec1db operator & (Vec1db const & a, Vec1db const & b) {
+ return Vec1db(a.xmm && b.xmm);
+}
+static inline Vec1db operator && (Vec1db const & a, Vec1db const & b) {
+ return Vec1db(a.xmm && b.xmm);
+}
+
+// vector operator &= : bitwise and
+static inline Vec1db & operator &= (Vec1db & a, Vec1db const & b) {
+ a = a & b;
+ return a;
+}
+
+
+
+
+/*****************************************************************************
+*
+* Vec1d: Vector of 1 double precision floating point values
+*
+*****************************************************************************/
+
+class Vec1d {
+public:
+ double xmm; // double vector
+ // Default constructor:
+ Vec1d() {
+ }
+ // Constructor to broadcast the same value into all elements:
+ Vec1d(double d) {
+ xmm = d;
+ }
+
+ // Member function to load from array (unaligned)
+ Vec1d & load(double const * p) {
+ xmm = *p;
+ return *this;
+ }
+ // Member function to load from array, aligned by 8
+ Vec1d const & load_a(double const * p) {
+ xmm = *p;
+ return *this;
+ }
+ // Partial load. Load n elements and set the rest to 0
+ Vec1d & load_partial(int n, double const * p) {
+ switch (n) {
+ case 1:
+ xmm = *p; break;
+ default:
+ xmm = 0.0;
+ }
+ return *this;
+ }
+ // Member function to store into array (unaligned)
+ void store(double * p) const {
+ *p = xmm;
+ }
+ // Member function to store into array, aligned by 8
+ void store_a(double * p) const {
+ *p = xmm;
+ }
+
+ // cut off vector to n elements. The last 4-n elements are set to zero
+ Vec1d & cutoff(int n) {
+ if (n == 0)
+ xmm = 0.0;
+ return *this;
+ }
+
+ // Member function to change a single element in vector
+ // Note: This function is inefficient. Use load function if changing more than one element
+ Vec1d const & insert(uint32_t index, double value) {
+ xmm = value;
+ return *this;
+ };
+ // Member function extract a single element from vector
+ double extract(uint32_t index) const {
+ return xmm;
+ }
+ // Extract a single element. Use store function if extracting more than one element.
+ // Operator [] can only read an element, not write.
+ double operator [] (uint32_t index) const {
+ return extract(index);
+ }
+
+ static int size() {
+ return 1;
+ }
+};
+
+/*****************************************************************************
+*
+* Operators for Vec1d
+*
+*****************************************************************************/
+
+// vector operator + : add element by element
+static inline Vec1d operator + (Vec1d const & a, Vec1d const & b) {
+ return Vec1d(a.xmm + b.xmm);
+}
+
+// vector operator + : add vector and scalar
+static inline Vec1d operator + (Vec1d const & a, double b) {
+ return a + Vec1d(b);
+}
+static inline Vec1d operator + (double a, Vec1d const & b) {
+ return Vec1d(a) + b;
+}
+
+// vector operator += : add
+static inline Vec1d & operator += (Vec1d & a, Vec1d const & b) {
+ a = a + b;
+ return a;
+}
+
+// postfix operator ++
+static inline Vec1d operator ++ (Vec1d & a, int) {
+ Vec1d a0 = a;
+ a = a + 1.0;
+ return a0;
+}
+
+// prefix operator ++
+static inline Vec1d & operator ++ (Vec1d & a) {
+ a = a + 1.0;
+ return a;
+}
+
+// vector operator - : subtract element by element
+static inline Vec1d operator - (Vec1d const & a, Vec1d const & b) {
+ return Vec1d(a.xmm - b.xmm);
+}
+
+// vector operator - : subtract vector and scalar
+static inline Vec1d operator - (Vec1d const & a, double b) {
+ return a - Vec1d(b);
+}
+static inline Vec1d operator - (double a, Vec1d const & b) {
+ return Vec1d(a) - b;
+}
+
+// vector operator - : unary minus
+// Change sign bit, even for 0, INF and NAN
+static inline Vec1d operator - (Vec1d const & a) {
+ return Vec1d(-a.xmm);
+}
+
+// vector operator -= : subtract
+static inline Vec1d & operator -= (Vec1d & a, Vec1d const & b) {
+ a = a - b;
+ return a;
+}
+
+// postfix operator --
+static inline Vec1d operator -- (Vec1d & a, int) {
+ Vec1d a0 = a;
+ a = a - 1.0;
+ return a0;
+}
+
+// prefix operator --
+static inline Vec1d & operator -- (Vec1d & a) {
+ a = a - 1.0;
+ return a;
+}
+
+// vector operator * : multiply element by element
+static inline Vec1d operator * (Vec1d const & a, Vec1d const & b) {
+ return Vec1d(a.xmm * b.xmm);
+}
+
+// vector operator * : multiply vector and scalar
+static inline Vec1d operator * (Vec1d const & a, double b) {
+ return a * Vec1d(b);
+}
+static inline Vec1d operator * (double a, Vec1d const & b) {
+ return Vec1d(a) * b;
+}
+
+// vector operator *= : multiply
+static inline Vec1d & operator *= (Vec1d & a, Vec1d const & b) {
+ a = a * b;
+ return a;
+}
+
+// vector operator / : divide all elements by same integer
+static inline Vec1d operator / (Vec1d const & a, Vec1d const & b) {
+ return Vec1d(a.xmm/b.xmm);
+}
+
+// vector operator / : divide vector and scalar
+static inline Vec1d operator / (Vec1d const & a, double b) {
+ return a / Vec1d(b);
+}
+static inline Vec1d operator / (double a, Vec1d const & b) {
+ return Vec1d(a) / b;
+}
+
+// vector operator /= : divide
+static inline Vec1d & operator /= (Vec1d & a, Vec1d const & b) {
+ a = a / b;
+ return a;
+}
+
+// vector operator == : returns true for elements for which a == b
+static inline Vec1db operator == (Vec1d const & a, Vec1d const & b) {
+ return Vec1db(a.xmm == b.xmm);
+}
+
+// vector operator != : returns true for elements for which a != b
+static inline Vec1db operator != (Vec1d const & a, Vec1d const & b) {
+ return Vec1db(a.xmm != b.xmm);
+}
+
+// vector operator < : returns true for elements for which a < b
+static inline Vec1db operator < (Vec1d const & a, Vec1d const & b) {
+ return Vec1db(a.xmm < b.xmm);
+}
+
+// vector operator <= : returns true for elements for which a <= b
+static inline Vec1db operator <= (Vec1d const & a, Vec1d const & b) {
+ return Vec1db(a.xmm <= b.xmm);
+}
+
+// vector operator > : returns true for elements for which a > b
+static inline Vec1db operator > (Vec1d const & a, Vec1d const & b) {
+ return b < a;
+}
+
+// vector operator >= : returns true for elements for which a >= b
+static inline Vec1db operator >= (Vec1d const & a, Vec1d const & b) {
+ return b <= a;
+}
+
+// General arithmetic functions, etc.
+
+// Horizontal add: Calculates the sum of all vector elements.
+static inline double horizontal_add (Vec1d const & a) {
+ return a.xmm;
+}
+
+// function max: a > b ? a : b
+static inline Vec1d max(Vec1d const & a, Vec1d const & b) {
+ return max(a.xmm,b.xmm);
+}
+
+// function min: a < b ? a : b
+static inline Vec1d min(Vec1d const & a, Vec1d const & b) {
+ return min(a.xmm,b.xmm);
+}
+
+
+// function abs: absolute value
+// Removes sign bit, even for -0.0f, -INF and -NAN
+static inline Vec1d abs(Vec1d const & a) {
+ return Vec1d(fabs(a.xmm));
+}
+
+// function log: logarithm
+// Removes sign bit, even for -0.0f, -INF and -NAN
+static inline Vec1d log(Vec1d const & a) {
+ return Vec1d(log(a.xmm));
+}
+
+// Fused multiply and add functions
+
+// Multiply and add
+static inline Vec1d mul_add(Vec1d const & a, Vec1d const & b, Vec1d const & c) {
+ return a * b + c;
+}
+
+// Multiply and subtract
+static inline Vec1d mul_sub(Vec1d const & a, Vec1d const & b, Vec1d const & c) {
+ return a * b - c;
+}
+
+// Multiply and inverse subtract
+static inline Vec1d nmul_add(Vec1d const & a, Vec1d const & b, Vec1d const & c) {
+ return c - a * b;
+}
+
+
+/*****************************************************************************
+*
+* Horizontal Boolean functions
+*
+*****************************************************************************/
+
+// horizontal_and. Returns true if all bits are 1
+static inline bool horizontal_and (Vec1db const & a) {
+ return a.xmm;
+}
+
+// horizontal_or. Returns true if at least one bit is 1
+static inline bool horizontal_or (Vec1db const & a) {
+ return a.xmm;
+}
+
+// instances of exp_d template
+static inline Vec1d exp(Vec1d const & x) {
+ return Vec1d(exp(x.xmm));
+}
+
+
+
+#endif //VECTORF64_H
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git
More information about the debian-med-commit
mailing list