[med-svn] [raxml-ng] 01/02: New upstream version 0.1.0

Mon Mar 20 12:38:06 UTC 2017

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository raxml-ng.

commit 4e776aae51363f416ba1eaf8f13c83d0dd8e0412
Author: Andreas Tille <tille at debian.org>
Date:   Mon Mar 20 13:37:20 2017 +0100

    New upstream version 0.1.0
---
 .gitignore                           |  38 ++
 .gitmodules                          |   3 +
 CMakeLists.txt                       | 101 +++++
 README.md                            |   2 +
 libs/CMakeLists.txt                  |  13 +
 src/CMakeLists.txt                   |  47 +++
 src/Checkpoint.cpp                   | 260 ++++++++++++
 src/Checkpoint.hpp                   | 113 +++++
 src/CommandLineParser.cpp            | 487 +++++++++++++++++++++
 src/CommandLineParser.hpp            |  39 ++
 src/LoadBalancer.cpp                 | 187 ++++++++
 src/LoadBalancer.hpp                 |  38 ++
 src/MSA.cpp                          | 289 +++++++++++++
 src/MSA.hpp                          |  91 ++++
 src/Model.cpp                        | 553 ++++++++++++++++++++++++
 src/Model.hpp                        | 135 ++++++
 src/Optimizer.cpp                    | 245 +++++++++++
 src/Optimizer.hpp                    |  23 +
 src/Options.cpp                      | 152 +++++++
 src/Options.hpp                      |  94 +++++
 src/ParallelContext.cpp              | 298 +++++++++++++
 src/ParallelContext.hpp              |  87 ++++
 src/PartitionAssignment.cpp          |  32 ++
 src/PartitionAssignment.hpp          |  80 ++++
 src/PartitionInfo.cpp                | 128 ++++++
 src/PartitionInfo.hpp                | 119 ++++++
 src/PartitionedMSA.cpp               | 128 ++++++
 src/PartitionedMSA.hpp               |  58 +++
 src/Tree.cpp                         | 265 ++++++++++++
 src/Tree.hpp                         | 104 +++++
 src/TreeInfo.cpp                     | 471 +++++++++++++++++++++
 src/TreeInfo.hpp                     |  74 ++++
 src/bootstrap/BootstrapGenerator.cpp |  60 +++
 src/bootstrap/BootstrapGenerator.hpp |  31 ++
 src/bootstrap/BootstrapTree.cpp      | 106 +++++
 src/bootstrap/BootstrapTree.hpp      |  27 ++
 src/common.h                         |  75 ++++
 src/constants.hpp                    |  27 ++
 src/io/NewickStream.cpp              |  35 ++
 src/io/binary_io.cpp                 | 102 +++++
 src/io/binary_io.hpp                 | 162 +++++++
 src/io/file_io.hpp                   |  88 ++++
 src/io/msa_streams.cpp               | 278 ++++++++++++
 src/io/part_info.cpp                 |  79 ++++
 src/log.cpp                          |  88 ++++
 src/log.hpp                          | 109 +++++
 src/main.cpp                         | 798 +++++++++++++++++++++++++++++++++++
 src/sysutil.cpp                      | 274 ++++++++++++
 src/types.hpp                        | 104 +++++
 src/version.h                        |   2 +
 test/src/CMakeLists.txt              |  49 +++
 test/src/CommandLineParserTest.cpp   | 158 +++++++
 test/src/ModelTest.cpp               |  81 ++++
 test/src/RaxmlTest.hpp               |  39 ++
 test/src/mainTest.cpp                |  34 ++
 55 files changed, 7560 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d7931fd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+# binaries
+build/
+bin/
+
+# Eclipse project files
+.cproject
+.project
+.settings/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..5bef3cc
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "libs/pll-modules"]
+	path = libs/pll-modules
+	url = https://github.com/ddarriba/pll-modules.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..7bb0caa
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,101 @@
+cmake_minimum_required (VERSION 2.8.10 FATAL_ERROR)
+
+set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
+set(CMAKE_DISABLE_SOURCE_CHANGES  ON)
+
+set (ENABLE_PTHREADS ON CACHE BOOL "Enable multi-threading support (PTHREADS)")
+set (ENABLE_MPI ON CACHE BOOL "Enable MPI support")
+set (ENABLE_VCF OFF)
+
+# leave this disabled to build a portable binary 
+# (don't worry, libpll will still have full SIMD support!)
+set (ENABLE_RAXML_SIMD OFF)
+
+# build a portable static binary
+set(STATIC_BUILD ON)
+
+project (raxml-ng C CXX)
+
+#check for minimum compiler versions
+message(STATUS "Compier: ${CMAKE_CXX_COMPILER_ID}")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  set(RAXML_COMPILER_TARGET_VERSION "4.8")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS RAXML_COMPILER_TARGET_VERSION)
+    message (FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} compiler too old! Minimum required: ${RAXML_COMPILER_TARGET_VERSION}")
+  endif()
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  set(RAXML_COMPILER_TARGET_VERSION "3.8")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS RAXML_COMPILER_TARGET_VERSION)
+    message (FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} compiler too old! Minimum required: ${RAXML_COMPILER_TARGET_VERSION}")
+  endif()
+endif()
+
+set (raxml-ng_VERSION_MAJOR 0)
+set (raxml-ng_VERSION_MINOR 1)
+
+set (CMAKE_BUILD_TYPE DEBUG)
+# set (CMAKE_BUILD_TYPE RELEASE)
+# set (CMAKE_VERBOSE_MAKEFILE ON)
+
+message (STATUS "Building ${CMAKE_BUILD_TYPE}")
+
+set (WARN_FLAGS               "-Wall -Wextra")
+set (CMAKE_CXX_FLAGS          "-std=c++11 ${WARN_FLAGS}")
+
+set (CMAKE_CXX_FLAGS_DEBUG    "-g")
+set (CMAKE_CXX_FLAGS_RELEASE  "-O3 -DNDEBUG")
+
+if (ENABLE_RAXML_SIMD)
+  include(CheckCXXCompilerFlag)
+  CHECK_CXX_COMPILER_FLAG(-mavx HAS_AVX)
+  CHECK_CXX_COMPILER_FLAG(-msse3 HAS_SSE3)
+  if(HAS_AVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -D__AVX")
+  elseif(HAS_SSE3)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3 -D__SSE3")
+  endif()
+endif()
+
+if(ENABLE_PTHREADS)
+  find_package(Threads REQUIRED)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_RAXML_PTHREADS -pthread")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_RAXML_PTHREADS -pthread")
+endif()
+
+if(ENABLE_MPI)
+  find_package(MPI)
+  if(MPI_CXX_FOUND)
+    # set( ENV{OMPI_CXX}            "clang++" PARENT_SCOPE )
+    set(CMAKE_CXX_COMPILER       "mpicxx")
+    include_directories(${MPI_CXX_INCLUDE_PATH})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_CXX_COMPILER_FLAGS} -D_RAXML_MPI")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}")
+
+    # disable static build for MPI
+    set(STATIC_BUILD OFF)
+  endif()
+endif()
+
+message(STATUS "Using flags: ${CMAKE_CXX_FLAGS}")
+
+#include_directories(${PROJECT_SOURCE_DIR}/libs/cereal/include)
+#include_directories(${PROJECT_SOURCE_DIR}/libs/cxxopts/src)
+#include_directories(${PROJECT_SOURCE_DIR}/libs/include/libpll)
+
+
+set(pll_dir ${PROJECT_SOURCE_DIR}/libs/pll-modules/libs/libpll/src)
+set(pll_binary_dir ${PROJECT_SOURCE_DIR}/libs/pll-modules/src/binary)
+set(pll_optimize_dir ${PROJECT_SOURCE_DIR}/libs/pll-modules/src/optimize)
+set(pll_msa_dir ${PROJECT_SOURCE_DIR}/libs/pll-modules/src/msa)
+
+# build dependencies
+set(RAXML_LOCALDEPS_DIR ${PROJECT_BINARY_DIR}/localdeps)
+add_subdirectory(${PROJECT_SOURCE_DIR}/libs)
+
+include_directories(${RAXML_LOCALDEPS_DIR}/include)
+
+add_subdirectory(${PROJECT_SOURCE_DIR}/src)
+
+enable_testing()
+add_subdirectory(${PROJECT_SOURCE_DIR}/test/src)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2d7524b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+# raxml-ng
+RAxML Next Generation: faster, easier-to-use and more flexible
diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt
new file mode 100644
index 0000000..b548427
--- /dev/null
+++ b/libs/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+message(STATUS "Building dependencies in: ${RAXML_LOCALDEPS_DIR}")
+
+# build LIBPLL and LIBPLL-MODULES
+add_custom_command(
+    OUTPUT ${RAXML_LOCALDEPS_DIR}/lib/libpll.a
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/pll-modules
+    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/pll-modules/install-with-libpll.sh ${RAXML_LOCALDEPS_DIR}
+)
+
+add_custom_target(libpll ALL
+    DEPENDS ${RAXML_LOCALDEPS_DIR}/lib/libpll.a
+)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..4b8ebf1
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,47 @@
+file (GLOB_RECURSE raxml_sources ${PROJECT_SOURCE_DIR}/src/*.cpp)
+
+set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
+
+# enable static linking
+if (STATIC_BUILD)
+  SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+  SET(BUILD_SHARED_LIBRARIES OFF)
+  SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
+
+  if(ENABLE_PTHREADS)
+    SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive")
+  endif()
+endif()
+
+add_executable        (raxml_module ${raxml_sources})
+
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_algorithm.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_binary.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_optimize.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_msa.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_tree.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_util.a)
+target_link_libraries (raxml_module ${RAXML_LOCALDEPS_DIR}/lib/libpll.a)
+target_link_libraries (raxml_module m)
+
+if(MPI_FOUND)
+target_link_libraries (raxml_module ${MPI_CXX_LIBRARIES})
+endif()
+
+if(MPI_COMPILE_FLAGS)
+  set_target_properties(raxml_module PROPERTIES
+  COMPILE_FLAGS "${MPI_COMPILE_FLAGS}")
+endif()
+
+if(MPI_LINK_FLAGS)
+  set_target_properties(raxml_module PROPERTIES
+    LINK_FLAGS "${MPI_LINK_FLAGS}")
+endif()
+
+if(MPI_FOUND)
+  set_target_properties (raxml_module PROPERTIES OUTPUT_NAME raxml-ng-mpi)
+else()
+  set_target_properties (raxml_module PROPERTIES OUTPUT_NAME raxml-ng)
+endif()
+
+set_target_properties (raxml_module PROPERTIES PREFIX "")
diff --git a/src/Checkpoint.cpp b/src/Checkpoint.cpp
new file mode 100644
index 0000000..eb1e170
--- /dev/null
+++ b/src/Checkpoint.cpp
@@ -0,0 +1,260 @@
+#include <stdio.h>
+
+#include "Checkpoint.hpp"
+#include "io/binary_io.hpp"
+#include "io/file_io.hpp"
+
+using namespace std;
+
+void CheckpointManager::write(const std::string& ckp_fname) const
+{
+  backup();
+
+  BinaryFileStream fs(ckp_fname, std::ios::out);
+
+  fs << _checkp;
+
+  remove_backup();
+}
+
+bool CheckpointManager::read(const std::string& ckp_fname)
+{
+  if (sysutil_file_exists(ckp_fname))
+  {
+    try
+    {
+      BinaryFileStream fs(ckp_fname, std::ios::in);
+
+      fs >> _checkp;
+
+      return true;
+    }
+    catch (runtime_error& e)
+    {
+      _checkp.search_state = SearchState();
+      LOG_DEBUG << "Error reading checkpoint: " << e.what() << endl;
+      return false;
+    }
+  }
+  else
+    return false;
+}
+
+void CheckpointManager::remove()
+{
+  if (sysutil_file_exists(_ckp_fname))
+    std::remove(_ckp_fname.c_str());
+}
+
+void CheckpointManager::backup() const
+{
+  if (sysutil_file_exists(_ckp_fname))
+    std::rename(_ckp_fname.c_str(), backup_fname().c_str());
+}
+
+void CheckpointManager::remove_backup() const
+{
+  if (sysutil_file_exists(backup_fname()))
+    std::remove(backup_fname().c_str());
+}
+
+SearchState& CheckpointManager::search_state()
+{
+  if (_active)
+    return _checkp.search_state;
+  else
+  {
+    _empty_search_state = SearchState();
+    return _empty_search_state;
+  }
+};
+
+void CheckpointManager::reset_search_state()
+{
+  ParallelContext::thread_barrier();
+
+  if (ParallelContext::master_thread())
+    _checkp.search_state = SearchState();
+
+  ParallelContext::thread_barrier();
+};
+
+void CheckpointManager::save_ml_tree()
+{
+  if (ParallelContext::master_thread())
+  {
+    _checkp.save_ml_tree();
+    if (_active)
+      write();
+  }
+}
+
+void CheckpointManager::save_bs_tree()
+{
+  if (ParallelContext::master_thread())
+  {
+    _checkp.save_bs_tree();
+    if (_active)
+      write();
+  }
+}
+
+void CheckpointManager::update_and_write(const TreeInfo& treeinfo)
+{
+  if (!_active)
+    return;
+
+  if (ParallelContext::master_thread())
+    _updated_models.clear();
+
+  ParallelContext::barrier();
+
+  for (auto p: treeinfo.parts_master())
+  {
+    /* we will modify a global map -> define critical section */
+    ParallelContext::UniqueLock lock;
+
+    assign(_checkp.models.at(p), treeinfo, p);
+
+    /* remember which models were updated but this rank ->
+     * will be used later to collect them at the master */
+    _updated_models.insert(p);
+  }
+
+  ParallelContext::barrier();
+
+  if (ParallelContext::num_ranks() > 1)
+    gather_model_params();
+
+  if (ParallelContext::master())
+  {
+    assign_tree(_checkp, treeinfo);
+    write();
+  }
+}
+
+void CheckpointManager::gather_model_params()
+{
+  /* send callback -> worker ranks */
+  auto worker_cb = [this](void * buf, size_t buf_size) -> int
+      {
+        BinaryStream bs((char*) buf, buf_size);
+        bs << _updated_models.size();
+        for (auto p: _updated_models)
+        {
+          bs << p << _checkp.models.at(p);
+        }
+        return (int) bs.pos();
+      };
+
+  /* receive callback -> master rank */
+  auto master_cb = [this](void * buf, size_t buf_size)
+     {
+       BinaryStream bs((char*) buf, buf_size);
+       auto model_count = bs.get<size_t>();
+       for (size_t m = 0; m < model_count; ++m)
+       {
+         size_t part_id;
+         bs >> part_id;
+
+         // read parameter estimates from binary stream
+         bs >> _checkp.models[part_id];
+       }
+     };
+
+  ParallelContext::mpi_gather_custom(worker_cb, master_cb);
+}
+
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const Checkpoint& ckp)
+{
+  stream << ckp.version;
+
+  // NB: accumulated runtime from past runs + current elapsed time
+  stream << ckp.elapsed_seconds + sysutil_elapsed_seconds();
+
+  stream << ckp.search_state;
+
+  stream << ckp.tree.topology();
+
+  stream << ckp.models.size();
+  for (const auto& m: ckp.models)
+    stream << m.first << m.second;
+
+  stream << ckp.ml_trees;
+
+  stream << ckp.bs_trees;
+
+  return stream;
+}
+
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, Checkpoint& ckp)
+{
+  stream >> ckp.version;
+
+  if (ckp.version < CKP_MIN_SUPPORTED_VERSION)
+  {
+    throw runtime_error("Unsupported checkpoint file version!");
+  }
+
+  stream >> ckp.elapsed_seconds;
+
+  stream >> ckp.search_state;
+
+//  auto topol = fs.get<TreeTopology>();
+//
+//  printf("READ topology size: %u\n", topol.size());
+
+  ckp.tree.topology(stream.get<TreeTopology>());
+
+  size_t num_models, part_id;
+  stream >> num_models;
+  assert(num_models == ckp.models.size());
+  for (size_t m = 0; m < num_models; ++m)
+  {
+    stream >> part_id;
+    stream >> ckp.models[part_id];
+  }
+
+  stream >> ckp.ml_trees;
+
+  stream >> ckp.bs_trees;
+
+  return stream;
+}
+
+void assign_tree(Checkpoint& ckp, const TreeInfo& treeinfo)
+{
+  ckp.tree = treeinfo.tree();
+}
+
+void assign_model(Checkpoint& ckp, const TreeInfo& treeinfo, size_t index)
+{
+  assign(ckp.models.at(index), treeinfo, index);
+}
+
+void assign_models(Checkpoint& ckp, const TreeInfo& treeinfo)
+{
+  for (auto p: treeinfo.parts_master())
+    assign_model(ckp, treeinfo, p);
+}
+
+void assign(Checkpoint& ckp, const TreeInfo& treeinfo)
+{
+  assign_tree(ckp, treeinfo);
+  assign_models(ckp, treeinfo);
+}
+
+void assign(TreeInfo& treeinfo, const Checkpoint& ckp)
+{
+  treeinfo.tree(ckp.tree);
+
+  const pllmod_treeinfo_t& pll_treeinfo = treeinfo.pll_treeinfo();
+  for (auto& m: ckp.models)
+  {
+    if (!pll_treeinfo.partitions[m.first])
+      continue;
+
+    treeinfo.model(m.first, m.second);
+  }
+}
+
diff --git a/src/Checkpoint.hpp b/src/Checkpoint.hpp
new file mode 100644
index 0000000..7e5718e
--- /dev/null
+++ b/src/Checkpoint.hpp
@@ -0,0 +1,113 @@
+#ifndef RAXML_CHECKPOINT_HPP_
+#define RAXML_CHECKPOINT_HPP_
+
+#include "common.h"
+#include "TreeInfo.hpp"
+#include "io/binary_io.hpp"
+
+constexpr int CKP_VERSION = 1;
+constexpr int CKP_MIN_SUPPORTED_VERSION = 1;
+
+enum class CheckpointStep
+{
+  start,
+  brlenOpt,
+  modOpt1,
+  radiusDetect,
+  modOpt2,
+  fastSPR,
+  modOpt3,
+  slowSPR,
+  modOpt4,
+  finish
+};
+
+struct SearchState
+{
+  SearchState() : step(CheckpointStep::start), loglh(0.), iteration(0), fast_spr_radius(0) {}
+
+  CheckpointStep step;
+  double loglh;
+
+  int iteration;
+  spr_round_params spr_params;
+  int fast_spr_radius;
+};
+
+struct Checkpoint
+{
+  Checkpoint() : version(CKP_VERSION), elapsed_seconds(0.), search_state(), tree(), models() {}
+
+  Checkpoint(const Checkpoint& other) = delete;
+  Checkpoint& operator=(const Checkpoint& other) = delete;
+  Checkpoint(Checkpoint&& other) = default;
+  Checkpoint& operator=(Checkpoint&& other) = default;
+
+  int version;
+
+  double elapsed_seconds;
+
+  SearchState search_state;
+
+  Tree tree;
+  std::unordered_map<size_t, Model> models;
+
+  TreeCollection ml_trees;
+  TreeCollection bs_trees;
+
+  double loglh() const { return search_state.loglh; }
+
+  void save_ml_tree() { ml_trees.push_back(loglh(), tree); }
+  void save_bs_tree() { bs_trees.push_back(loglh(), tree); }
+};
+
+class CheckpointManager
+{
+public:
+  CheckpointManager(const std::string& ckp_fname) : _active(true), _ckp_fname(ckp_fname) {}
+
+  const Checkpoint& checkpoint() { return _checkp; }
+  void checkpoint(Checkpoint&& ckp) { _checkp = std::move(ckp); }
+
+  //TODO: this is not very elegant, but should do the job for now
+  SearchState& search_state();
+  void reset_search_state();
+
+  void enable() { _active = true; }
+  void disable() { _active = false; }
+
+  void update_and_write(const TreeInfo& treeinfo);
+
+  void save_ml_tree();
+  void save_bs_tree();
+
+  bool read() { return read(_ckp_fname); }
+  bool read(const std::string& ckp_fname);
+  void write() const { write(_ckp_fname); }
+  void write(const std::string& ckp_fname) const;
+
+  void remove();
+  void backup() const;
+  void remove_backup() const;
+
+private:
+  bool _active;
+  std::string _ckp_fname;
+  Checkpoint _checkp;
+  IDSet _updated_models;
+  SearchState _empty_search_state;
+
+  void gather_model_params();
+  std::string backup_fname() const { return _ckp_fname + ".bk"; }
+};
+
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const Checkpoint& ckp);
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, Checkpoint& ckp);
+
+void assign_tree(Checkpoint& ckp, const TreeInfo& treeinfo);
+void assign_models(Checkpoint& ckp, const TreeInfo& treeinfo);
+
+void assign(Checkpoint& ckp, const TreeInfo& treeinfo);
+void assign(TreeInfo& treeinfo, const Checkpoint& ckp);
+
+#endif /* RAXML_CHECKPOINT_HPP_ */
diff --git a/src/CommandLineParser.cpp b/src/CommandLineParser.cpp
new file mode 100644
index 0000000..8a52a79
--- /dev/null
+++ b/src/CommandLineParser.cpp
@@ -0,0 +1,487 @@
+#include "CommandLineParser.hpp"
+
+#include <getopt.h>
+
+#ifdef _RAXML_PTHREADS
+#include <thread>
+#endif
+
+using namespace std;
+
+static struct option long_options[] =
+{
+  {"help",               no_argument,       0, 0 },  /*  0 */
+  {"version",            no_argument,       0, 0 },  /*  1 */
+  {"evaluate",           no_argument,       0, 0 },  /*  2 */
+  {"search",             no_argument,       0, 0 },  /*  3 */
+
+  {"msa",                required_argument, 0, 0 },  /*  4 */
+  {"tree",               required_argument, 0, 0 },  /*  5 */
+  {"prefix",             required_argument, 0, 0 },  /*  6 */
+  {"model",              required_argument, 0, 0 },  /*  7 */
+  {"data-type",          required_argument, 0, 0 },  /*  8 */
+
+  {"opt-model",          required_argument, 0, 0 },  /*  9 */
+  {"opt-branches",       required_argument, 0, 0 },  /*  10 */
+  {"prob-msa",           required_argument, 0, 0 },  /*  11 */
+  {"pat-comp",           required_argument, 0, 0 },  /*  12 */
+  {"tip-inner",          required_argument, 0, 0 },  /*  13 */
+  {"brlen",              required_argument, 0, 0 },  /*  14 */
+  {"spr-radius",         required_argument, 0, 0 },  /*  15 */
+  {"spr-cutoff",         required_argument, 0, 0 },  /*  16 */
+  {"lh-epsilon",         required_argument, 0, 0 },  /*  17 */
+
+  {"seed",               required_argument, 0, 0 },  /*  18 */
+  {"threads",            required_argument, 0, 0 },  /*  19 */
+  {"simd",               required_argument, 0, 0 },  /*  20 */
+
+  {"msa-format",         required_argument, 0, 0 },  /*  21 */
+  {"rate-scalers",       required_argument, 0, 0 },  /*  22 */
+  {"log",                required_argument, 0, 0 },  /*  23 */
+
+  {"bootstrap",          no_argument,       0, 0 },  /*  24 */
+  {"all",                no_argument,       0, 0 },  /*  25 */
+  {"bs-trees",           required_argument, 0, 0 },  /*  26 */
+  {"redo",               no_argument,       0, 0 },  /*  27 */
+
+  { 0, 0, 0, 0 }
+};
+
+static std::string get_cmdline(int argc, char** argv)
+{
+  ostringstream s;
+  for (int i = 0; i < argc; ++i)
+    s << argv[i] << (i < argc-1 ? " " : "");
+  return s.str();
+}
+
+void CommandLineParser::parse_options(int argc, char** argv, Options &opts)
+{
+  opts.cmdline = get_cmdline(argc, argv);
+
+  /* if no command specified, default to --search (or --help if no args were given) */
+  opts.command = (argc > 1) ? Command::search : Command::help;
+  opts.start_tree = StartingTree::random;
+  opts.random_seed = (long)time(NULL);
+
+  /* compress alignment patterns by default */
+  opts.use_pattern_compression = true;
+  /* use tip-inner case optimization by default */
+  opts.use_tip_inner = true;
+
+  /* do not use per-rate-category CLV scalers */
+  opts.use_rate_scalers = false;
+
+  /* use probabilistic MSA _if available_ (e.g. CATG file was provided) */
+  opts.use_prob_msa = true;
+
+  /* optimize model and branch lengths */
+  opts.optimize_model = true;
+  opts.optimize_brlen = true;
+
+  /* data type: default autodetect */
+//  useropt->data_type = RAXML_DATATYPE_AUTO;
+
+  /* initialize LH epsilon with default value */
+  opts.lh_epsilon = DEF_LH_EPSILON;
+
+  /* default: autodetect best SPR radius */
+  opts.spr_radius = -1;
+  opts.spr_cutoff = 1.0;
+
+  /* default: scaled branch lengths */
+  opts.brlen_linkage = PLLMOD_TREE_BRLEN_LINKED;
+
+  /* use all available cores per default */
+#if defined(_RAXML_PTHREADS) && !defined(_RAXML_MPI)
+  opts.num_threads = std::max(1u, std::thread::hardware_concurrency());
+#else
+  opts.num_threads = 1;
+#endif
+
+//  opts.model_file = "GTR+G+F";
+  opts.model_file = "";
+
+  // autodetect CPU instruction set and use respective SIMD kernels
+  opts.simd_arch = sysutil_simd_autodetect();
+
+  opts.num_searches = 0;
+
+  opts.redo_mode = false;
+
+  bool log_level_set = false;
+
+  int option_index = 0;
+  int c;
+  int num_commands = 0;
+
+  /* getopt_long_only() uses this global variable to track progress;
+   * need this re-initialization to make function re-enterable... */
+  optind = 0;
+
+  while ((c = getopt_long_only(argc, argv, "", long_options, &option_index)) == 0)
+  {
+    switch (option_index)
+    {
+      case 0:
+        opts.command = Command::help;
+        num_commands++;
+        break;
+
+      case 1:
+        opts.command = Command::version;
+        num_commands++;
+        break;
+
+      case 2:
+        opts.command = Command::evaluate;
+        num_commands++;
+        break;
+
+      case 3:
+        opts.command = Command::search;
+        num_commands++;
+        break;
+
+      case 4: /* alignment file */
+        opts.msa_file = optarg;
+        break;
+
+      case 5: /* starting tree */
+        if (strcasecmp(optarg, "rand") == 0 || strcasecmp(optarg, "random") == 0 ||
+            sscanf(optarg, "rand{%u}", &opts.num_searches) == 1 ||
+            sscanf(optarg, "random{%u}", &opts.num_searches) == 1)
+        {
+          opts.start_tree = StartingTree::random;
+        }
+        else if (strcasecmp(optarg, "pars") == 0 || strcasecmp(optarg, "parsimony") == 0 ||
+            sscanf(optarg, "pars{%u}", &opts.num_searches) == 1 ||
+            sscanf(optarg, "parsimony{%u}", &opts.num_searches) == 1)
+        {
+          opts.start_tree = StartingTree::parsimony;
+        }
+        else
+        {
+          opts.start_tree = StartingTree::user;
+          opts.tree_file = optarg;
+        }
+        break;
+
+      case 6: /* set prefix for output files */
+        opts.outfile_prefix = optarg;
+        break;
+
+      case 7:  /* model */
+        opts.model_file = optarg;
+        break;
+
+      case 8:  /* data-type */
+        if (strcasecmp(optarg, "dna") == 0)
+          opts.data_type = DataType::dna;
+        else if (strcasecmp(optarg, "aa") == 0)
+          opts.data_type = DataType::protein;
+        else if (strcasecmp(optarg, "binary") == 0)
+          opts.data_type = DataType::binary;
+        else if (strcasecmp(optarg, "diploid10") == 0)
+          opts.data_type = DataType::diploid10;
+        else if (strcasecmp(optarg, "multi") == 0)
+          opts.data_type = DataType::multistate;
+        else if (strcasecmp(optarg, "auto") == 0)
+          opts.data_type = DataType::autodetect;
+        else
+          throw InvalidOptionValueException("Unknown data type: " + string(optarg));
+        break;
+
+      case 9: /* optimize model */
+        opts.optimize_model = !optarg || (strcasecmp(optarg, "off") != 0);
+        break;
+
+      case 10: /* optimize branches */
+        opts.optimize_brlen = !optarg || (strcasecmp(optarg, "off") != 0);
+        break;
+
+      case 11:  /* prob-msa = use probabilitic MSA */
+        if (!optarg || (strcasecmp(optarg, "off") != 0))
+        {
+          opts.use_prob_msa = true;
+          opts.use_pattern_compression = false;
+          opts.use_tip_inner = false;
+        }
+        else
+          opts.use_prob_msa = false;
+        break;
+
+      case 12: /* disable pattern compression */
+        opts.use_pattern_compression = !optarg || (strcasecmp(optarg, "off") != 0);
+        break;
+
+      case 13: /* disable tip-inner optimization */
+        opts.use_tip_inner = !optarg || (strcasecmp(optarg, "off") != 0);
+        break;
+
+      case 14: /* branch length linkage mode */
+        if (strcasecmp(optarg, "scaled") == 0)
+          opts.brlen_linkage = PLLMOD_TREE_BRLEN_SCALED;
+        else if (strcasecmp(optarg, "linked") == 0)
+          opts.brlen_linkage = PLLMOD_TREE_BRLEN_LINKED;
+        else if (strcasecmp(optarg, "unlinked") == 0)
+        {
+          opts.brlen_linkage = PLLMOD_TREE_BRLEN_UNLINKED;
+          throw OptionException("Unlinked branch lengths not supported yet!");
+        }
+        else
+          throw InvalidOptionValueException("Unknown branch linkage mode: " + string(optarg));
+        break;
+
+      case 15:  /* spr-radius = maximum radius for fast SPRs */
+        if (sscanf(optarg, "%u", &opts.spr_radius) != 1)
+        {
+          throw InvalidOptionValueException("Invalid SPR radius: " + string(optarg) +
+                                            ", please provide a positive integer!");
+        }
+        break;
+      case 16:  /* spr-cutoff = relative LH cutoff to discard subtrees */
+        if (strcasecmp(optarg, "off") == 0)
+        {
+          opts.spr_cutoff = 0.;
+        }
+        else if (sscanf(optarg, "%lf", &opts.spr_cutoff) != 1)
+        {
+          throw InvalidOptionValueException("Invalid SPR cutoff: " + string(optarg) +
+                                            ", please provide a real number!");
+        }
+        break;
+
+      case 17: /* LH epsilon */
+        if(sscanf(optarg, "%lf", &opts.lh_epsilon) != 1 || opts.lh_epsilon < 0.)
+          throw InvalidOptionValueException("Invalid LH epsilon parameter value: " +
+                                            string(optarg) +
+                                            ", please provide a positive real number.");
+        break;
+
+      case 18: /* random seed */
+        opts.random_seed = atol(optarg);
+        break;
+
+      case 19:  /* number of threads */
+        if (sscanf(optarg, "%u", &opts.num_threads) != 1 || opts.num_threads == 0)
+        {
+          throw InvalidOptionValueException("Invalid number of threads: %s " + string(optarg) +
+                                            ", please provide a positive integer number!");
+        }
+        break;
+      case 20: /* SIMD instruction set */
+        if (strcasecmp(optarg, "none") == 0 || strcasecmp(optarg, "scalar") == 0)
+        {
+          opts.simd_arch = PLL_ATTRIB_ARCH_CPU;
+        }
+        else if (strcasecmp(optarg, "sse3") == 0 || strcasecmp(optarg, "sse") == 0)
+        {
+          opts.simd_arch = PLL_ATTRIB_ARCH_SSE;
+        }
+        else if (strcasecmp(optarg, "avx") == 0)
+        {
+          opts.simd_arch = PLL_ATTRIB_ARCH_AVX;
+        }
+        else if (strcasecmp(optarg, "avx2") == 0)
+        {
+          opts.simd_arch = PLL_ATTRIB_ARCH_AVX2;
+        }
+        else
+        {
+          throw InvalidOptionValueException("Unknown SIMD instruction set: " + string(optarg));
+        }
+        break;
+      case 21: /* MSA file format */
+        if (strcasecmp(optarg, "auto") == 0 )
+        {
+          opts.msa_format = FileFormat::autodetect;
+        }
+        else if (strcasecmp(optarg, "fasta") == 0)
+        {
+          opts.msa_format = FileFormat::fasta;
+        }
+        else if (strcasecmp(optarg, "phylip") == 0)
+        {
+          opts.msa_format = FileFormat::phylip;
+        }
+        else if (strcasecmp(optarg, "vcf") == 0)
+        {
+          opts.msa_format = FileFormat::vcf;
+        }
+        else if (strcasecmp(optarg, "catg") == 0)
+        {
+          opts.msa_format = FileFormat::catg;
+        }
+        else if (strcasecmp(optarg, "binary") == 0)
+        {
+          opts.msa_format = FileFormat::binary;
+        }
+        else
+        {
+          throw InvalidOptionValueException("Unknown MSA file format: " + string(optarg));
+        }
+        break;
+      case 22: /* enable per-rate scalers */
+        opts.use_rate_scalers = !optarg || (strcasecmp(optarg, "off") != 0);
+        break;
+      case 23: /* log level */
+        log_level_set = true;
+        if (strcasecmp(optarg, "error") == 0 )
+          opts.log_level = LogLevel::error;
+        else if (strcasecmp(optarg, "warning") == 0)
+          opts.log_level = LogLevel::warning;
+        else if (strcasecmp(optarg, "info") == 0)
+          opts.log_level = LogLevel::info;
+        else if (strcasecmp(optarg, "progress") == 0)
+          opts.log_level = LogLevel::progress;
+        else if (strcasecmp(optarg, "verbose") == 0)
+          opts.log_level = LogLevel::verbose;
+        else if (strcasecmp(optarg, "debug") == 0)
+          opts.log_level = LogLevel::debug;
+        else
+          throw InvalidOptionValueException("Unknown log level: " + string(optarg));
+        break;
+      case 24:
+        opts.command = Command::bootstrap;
+        num_commands++;
+        break;
+      case 25:
+        opts.command = Command::all;
+        num_commands++;
+        break;
+      case 26:  /* number of bootstrap replicates */
+        if (sscanf(optarg, "%u", &opts.num_bootstraps) != 1 || opts.num_bootstraps == 0)
+        {
+          throw InvalidOptionValueException("Invalid number of num_bootstraps: " + string(optarg) +
+              ", please provide a positive integer number!");
+        }
+        break;
+      case 27:
+        opts.redo_mode = true;
+        break;
+      default:
+        throw  OptionException("Internal error in option parsing");
+    }
+  }
+
+  if (c != -1)
+    exit(EXIT_FAILURE);
+
+  /* if more than one independent command, fail */
+  if (num_commands > 1)
+    throw OptionException("More than one command specified");
+
+  /* check for mandatory options for each command */
+  if (opts.command == Command::evaluate || opts.command == Command::search ||
+      opts.command == Command::bootstrap || opts.command == Command::all)
+  {
+    if (opts.msa_file.empty())
+      throw OptionException("You must specify a multiple alignment file with --msa switch");
+
+    if (opts.model_file.empty())
+      throw OptionException("You must specify an evolutionary model with --model switch");
+  }
+
+  if (opts.command == Command::evaluate)
+  {
+    if (opts.tree_file.empty())
+      throw OptionException("Mandatory switch --tree");
+  }
+
+  if (opts.command != Command::bootstrap && opts.command != Command::all)
+  {
+    opts.num_bootstraps = 0;
+  }
+
+  if (opts.num_searches == 0)
+  {
+    opts.num_searches = (opts.command == Command::all) ? 20 : 1;
+    if (opts.start_tree == StartingTree::user)
+      opts.num_searches = 1;
+  }
+
+  /* set default log output level  */
+  if (!log_level_set)
+  {
+    opts.log_level = (opts.num_searches > 1 || opts.num_bootstraps > 1) ?
+        LogLevel::info : LogLevel::progress;
+  }
+
+  /* set default  names for output files */
+  opts. set_default_outfiles();
+}
+
+void CommandLineParser::print_help()
+{
+  cout << "Usage: raxml-ng [OPTIONS]\n";
+
+  cout << "\n"
+            "Commands (mutually exclusive):\n"
+            "  --help                                     display help information.\n"
+            "  --version                                  display version information.\n"
+            "  --evaluate                                 evaluate the likelihood of a tree.\n"
+            "  --search                                   ML tree search.\n"
+            "  --bootstrap                                bootstrapping.\n"
+            "  --all                                      All-in-one (ML search + bootstrapping).\n"
+            "\n"
+            "Input and output options:\n"
+            "  --tree         FILE | rand{N} | pars{N}    starting tree: rand(om), pars(imony) or user-specified (newick file)\n"
+            "                                             N = number of trees (default: 20 in 'all-in-one' mode, 1 otherwise)\n"
+            "  --msa          FILE                        alignment file\n"
+            "  --msa-format   VALUE                       alignment file type: FASTA, PHYLIP, VCF, CATG or AUTO-detect (default)\n"
+            "  --data-type    VALUE                       data type: DNA, AA, MULTI-state or AUTO-detect (default)\n"
+            "  --prefix       STRING                      prefix for output files (default: MSA file name)\n"
+            "  --log          VALUE                       log verbosity: ERROR,WARNING,INFO,PROGRESS,DEBUG (default: PROGRESS)\n"
+            "  --redo                                     overwrite existing result files and ignore checkpoints (default: OFF)\n"
+            "\n"
+            "General options:\n"
+            "  --seed         VALUE                       seed for pseudo-random number generator (default: current time)\n"
+            "  --pat-comp     on | off                    alignment pattern compression (default: ON)\n"
+            "  --tip-inner    on | off                    tip-inner case optimization (default: ON)\n"
+            "  --threads      VALUE                       number of parallel threads to use (default: 2).\n"
+            "  --simd         none | sse3 | avx | avx2    vector instruction set to use (default: auto-detect).\n"
+            "  --rate-scalers on | off                    use individual CLV scalers for each rate category (default: OFF).\n"
+            "\n"
+            "Model options:\n"
+            "  --model        <name>+G[n]+<Freqs> | FILE  model specification OR partition file (default: GTR+G4)\n"
+            "  --brlen        linked | scaled | unlinked  branch length linkage between partitions (default: scaled)\n"
+            "  --opt-model    on | off                    ML optimization of all model parameters (default: ON)\n"
+            "  --opt-branches on | off                    ML optimization of all branch lengths (default: ON)\n"
+            "  --prob-msa     on | off                    use probabilistic alignment (works with CATG and VCF)\n"
+            "  --lh-epsilon   VALUE                       log-likelihood epsilon for optimization/tree search (default: 0.1)\n"
+            "\n"
+            "Topology search options:\n"
+            "  --spr-radius   VALUE                       SPR re-insertion radius for fast iterations (default: AUTO)\n"
+            "  --spr-cutoff   VALUE | off                 Relative LH cutoff for descending into subtrees (default: 1.0)\n"
+            "\n"
+            "Bootstrapping options:\n"
+            "  --bs-trees     VALUE                       Number of bootstraps replicates (default: 100)\n";
+
+  cout << "\n"
+            "EXAMPLES:\n"
+            "  1. Perform single tree inference on DNA alignment \n"
+            "     (random starting tree, general time-reversible model, ML estimate of substitution rates and\n"
+            "      nucleotide frequencies, discrete GAMMA model of rate heterogeneity with 4 categories):\n"
+            "\n"
+            "     ./raxml-ng --msa testDNA.fa --model GTR+G\n"
+            "\n";
+
+  cout << "\n"
+            "  2. Perform an all-in-one analysis (ML tree search + non-parametric bootstrap) \n"
+            "     (10 randomized parsimony starting trees, fixed empirical substitution matrix (LG),\n"
+            "      empirical aminoacid frequencies from alignment, 8 discrete GAMMA categories,\n"
+            "      200 bootstrap replicates):\n"
+            "\n"
+            "     ./raxml-ng --all --msa testAA.fa --model LG+G8+F --tree pars{10} --bs-trees 200\n"
+            "\n";
+
+  cout << "\n"
+            "  3. Optimize branch lengths and free model parameters on a fixed topology\n"
+            "     (using multiple partitions with proportional branch lengths)\n"
+            "\n"
+            "     ./raxml-ng --evaluate --msa testAA.fa --model partitions.txt --tree test.tree --brlen scaled\n"
+            "\n";
+
+}
+
diff --git a/src/CommandLineParser.hpp b/src/CommandLineParser.hpp
new file mode 100644
index 0000000..81b1c91
--- /dev/null
+++ b/src/CommandLineParser.hpp
@@ -0,0 +1,39 @@
+#ifndef SRC_COMMANDLINEPARSER_HPP_
+#define SRC_COMMANDLINEPARSER_HPP_
+
+#include "Options.hpp"
+
+class OptionException : public RaxmlException
+{
+public:
+  OptionException(const std::string& message)
+  : RaxmlException(message)
+  {
+  }
+};
+
+
+class InvalidOptionValueException : public OptionException
+{
+public:
+  InvalidOptionValueException(const std::string& message) : OptionException(message) {}
+
+  InvalidOptionValueException(const std::string& format, const std::string& value) :
+    OptionException("")
+  {
+    _message.reserve(format.size() + value.size());
+    sprintf(&_message[0], format.c_str(), value.c_str());
+  }
+};
+
+class CommandLineParser
+{
+public:
+  CommandLineParser() {};
+  ~CommandLineParser() = default;
+
+  void parse_options(int argc, char** argv, Options &opts);
+  void print_help();
+};
+
+#endif /* SRC_COMMANDLINEPARSER_HPP_ */
diff --git a/src/LoadBalancer.cpp b/src/LoadBalancer.cpp
new file mode 100644
index 0000000..6daecd8
--- /dev/null
+++ b/src/LoadBalancer.cpp
@@ -0,0 +1,187 @@
+#include <assert.h>
+#include <stack>
+#include <algorithm>
+#include <stdexcept>
+
+#include "LoadBalancer.hpp"
+
+using namespace std;
+
+LoadBalancer::LoadBalancer ()
+{
+  // TODO Auto-generated constructor stub
+
+}
+
+LoadBalancer::~LoadBalancer ()
+{
+  // TODO Auto-generated destructor stub
+}
+
+PartitionAssignmentList LoadBalancer::get_all_assignments(const PartitionAssignment& part_sizes,
+                                                          size_t num_procs)
+{
+  if (num_procs == 1)
+    return PartitionAssignmentList(1, part_sizes);
+  else
+    return compute_assignments(part_sizes, num_procs);
+}
+
+PartitionAssignment LoadBalancer::get_proc_assignments(const PartitionAssignment& part_sizes,
+                                                       size_t num_procs, size_t proc_id)
+{
+  if (proc_id >= num_procs)
+    throw std::out_of_range("Process ID out of range");
+
+  if (num_procs == 1)
+    return part_sizes;
+  else
+    return compute_assignments(part_sizes, num_procs).at(proc_id);
+}
+
+PartitionAssignmentList SimpleLoadBalancer::compute_assignments(const PartitionAssignment& part_sizes,
+                                                                    size_t num_procs)
+{
+  PartitionAssignmentList part_assign(num_procs);
+
+  size_t proc_id = 0;
+  for (auto& proc_assign: part_assign)
+  {
+    for (auto const& full_range: part_sizes)
+    {
+      const size_t total_sites = full_range.length;
+      const size_t proc_sites = total_sites / num_procs;
+      auto part_id = full_range.part_id;
+      auto start = full_range.start + proc_id * proc_sites;
+      auto length = (proc_id == num_procs-1) ? total_sites - start : proc_sites;
+      proc_assign.assign_sites(part_id, start, length);
+    }
+    ++proc_id;
+  }
+
+  assert(proc_id == part_assign.size());
+
+  return part_assign;
+}
+
+PartitionAssignmentList KassianLoadBalancer::compute_assignments(const PartitionAssignment& part_sizes,
+                                                                 size_t num_procs)
+{
+  PartitionAssignmentList bins(num_procs);
+
+  // Sort the partitions by size in ascending order
+  vector<const PartitionRange*> sorted_partitions;
+  for (auto const& range: part_sizes)
+    sorted_partitions.push_back(&range);
+  sort(sorted_partitions.begin(), sorted_partitions.end(),
+       [](const PartitionRange *r1, const PartitionRange *r2) { return (r1->length < r2->length);} );
+
+  // Compute the maximum number of sites per Bin
+  size_t total_sites = 0;
+  for (auto const& range: part_sizes)
+  {
+    total_sites += range.length;
+  }
+
+  unsigned int max_sites = (total_sites - 1) / bins.size() + 1;
+  unsigned int r = (max_sites * bins.size()) - total_sites;
+  unsigned int curr_part = 0; // index in sorted_partitons (AND NOT IN _partitions)
+  unsigned int full_bins = 0;
+  unsigned int current_bin = 0;
+
+//  vector<size_t> weights(num_procs, 0);
+  vector<bool> full(num_procs, false);
+
+  // Assign partitions in a cyclic manner to bins until one is too big
+  for (; curr_part < sorted_partitions.size(); ++curr_part)
+  {
+    const PartitionRange *partition = sorted_partitions[curr_part];
+    current_bin = curr_part % bins.size();
+    if (partition->length + bins[current_bin].weight() > max_sites)
+    {
+      // the partition exceeds the current bin's size, go to the next step of the algo
+      break;
+    }
+    // add the partition !
+    bins[current_bin].assign_sites(partition->part_id, 0, partition->length);
+    if (bins[current_bin].weight() == max_sites)
+    {
+      // one more bin is exactly full
+      if (++full_bins == (bins.size() - r))
+      {
+        // border case : the remaining bins should not exceed max_sites - 1
+        max_sites--;
+      }
+      // flag it as full (its harder to rely on max_sites because its value changes)
+      full[current_bin] = true;
+    }
+  }
+
+  stack<PartitionAssignment *> qlow_;
+  stack<PartitionAssignment *> *qlow = &qlow_; // hack to assign qhigh to qlow when qlow is empty
+  stack<PartitionAssignment *> qhigh;
+  for (unsigned int i = 0; i < current_bin; ++i)
+  {
+    if (!full[current_bin])
+    {
+      qhigh.push(&bins[i]);
+    }
+  }
+  for (unsigned int i = current_bin; i < bins.size(); ++i)
+  {
+    if (!full[current_bin])
+    {
+      qlow->push(&bins[i]);
+    }
+  }
+  unsigned int remaining = sorted_partitions[curr_part]->length;
+  while (curr_part < sorted_partitions.size() && (qlow->size() || qhigh.size()))
+  {
+    const PartitionRange *partition = sorted_partitions[curr_part];
+    // try to dequeue a process from Qhigh and to fill it
+    if (qhigh.size() && (qhigh.top()->weight() + remaining >= max_sites))
+    {
+      PartitionAssignment * bin = qhigh.top();
+      qhigh.pop();
+      unsigned int toassign = max_sites - bin->weight();
+      bin->assign_sites(partition->part_id, partition->length - remaining, toassign);
+      remaining -= toassign;
+      if (++full_bins == (bins.size() - r)) {
+        max_sites--;
+      }
+    }
+    else if ((qlow->top()->weight() + remaining >= max_sites))
+    { // same with qlow
+      PartitionAssignment * bin = qlow->top();
+      qlow->pop();
+      unsigned int toassign = max_sites - bin->weight();
+      bin->assign_sites(partition->part_id, partition->length - remaining, toassign);
+      remaining -= toassign;
+      if (++full_bins == (bins.size() - r)) {
+        max_sites--;
+      }
+    }
+    else
+    {
+      PartitionAssignment * bin = qlow->top();
+      qlow->pop();
+      bin->assign_sites(partition->part_id, partition->length - remaining, remaining);
+      remaining = 0;
+      qhigh.push(bin);
+    }
+
+    if (!qlow->size())
+    {
+      qlow = &qhigh;
+    }
+    if (!remaining)
+    {
+      if (++curr_part < sorted_partitions.size())
+      {
+        remaining = sorted_partitions[curr_part]->length;
+      }
+    }
+  }
+
+  return bins;
+}
diff --git a/src/LoadBalancer.hpp b/src/LoadBalancer.hpp
new file mode 100644
index 0000000..7ac1af0
--- /dev/null
+++ b/src/LoadBalancer.hpp
@@ -0,0 +1,38 @@
+#ifndef RAXML_LOADBALANCER_HPP_
+#define RAXML_LOADBALANCER_HPP_
+
+#include "PartitionAssignment.hpp"
+
+class LoadBalancer
+{
+public:
+  LoadBalancer ();
+  virtual
+  ~LoadBalancer ();
+
+  PartitionAssignmentList get_all_assignments(const PartitionAssignment& part_sizes,
+                                              size_t num_procs);
+
+  PartitionAssignment get_proc_assignments(const PartitionAssignment& part_sizes,
+                                           size_t num_procs, size_t proc_id);
+
+protected:
+  virtual PartitionAssignmentList compute_assignments(const PartitionAssignment& part_sizes,
+                                                      size_t num_procs) = 0;
+};
+
+class SimpleLoadBalancer : public LoadBalancer
+{
+protected:
+  virtual PartitionAssignmentList compute_assignments(const PartitionAssignment& part_sizes,
+                                                      size_t num_procs);
+};
+
+class KassianLoadBalancer : public LoadBalancer
+{
+protected:
+  virtual PartitionAssignmentList compute_assignments(const PartitionAssignment& part_sizes,
+                                                      size_t num_procs);
+};
+
+#endif /* RAXML_LOADBALANCER_HPP_ */
diff --git a/src/MSA.cpp b/src/MSA.cpp
new file mode 100644
index 0000000..597dc0e
--- /dev/null
+++ b/src/MSA.cpp
@@ -0,0 +1,289 @@
+#include <stdexcept>
+#include <algorithm>
+
+#include "MSA.hpp"
+
+using namespace std;
+
+MSA::MSA(const pll_msa_t *pll_msa) :
+    _length(0), _num_sites(pll_msa->length), _states(0), _pll_msa(nullptr)
+{
+  for (auto i = 0; i < pll_msa->count; ++i)
+  {
+    append(string(pll_msa->sequence[i], pll_msa->length), pll_msa->label ? pll_msa->label[i] : "");
+  }
+
+  update_pll_msa();
+}
+
+MSA::MSA(MSA&& other) : _length(other._length), _num_sites(other._num_sites),
+    _sequences(move(other._sequences)), _labels(move(other._labels)),
+    _label_id_map(move(other._label_id_map)), _weights(move(other._weights)),
+    _probs(move(other._probs)), _states(other._states), _pll_msa(other._pll_msa),
+    _dirty(other._dirty)
+{
+  other._length = other._num_sites = 0;
+  other._pll_msa = nullptr;
+  other._dirty = false;
+};
+
+MSA::~MSA()
+{
+  free_pll_msa();
+}
+
+MSA& MSA::operator=(MSA&& other)
+{
+  if (this != &other)
+  {
+    // release the current object’s resources
+    free_pll_msa();
+    _weights.clear();
+    _sequences.clear();
+    _labels.clear();
+    _label_id_map.clear();
+
+    // steal other’s resource
+    _length = other._length;
+    _num_sites = other._num_sites;
+    _pll_msa = other._pll_msa;
+    _weights = std::move(other._weights);
+    _sequences = std::move(other._sequences);
+    _labels = std::move(other._labels);
+    _label_id_map = std::move(other._label_id_map);
+    _probs = std::move(other._probs);
+    _states = other._states;
+    _dirty = other._dirty;
+
+    // reset other
+    other._length = other._num_sites = other._states = 0;
+    other._pll_msa = nullptr;
+    other._dirty = false;
+  }
+  return *this;
+}
+
+void MSA::free_pll_msa() noexcept
+{
+  if (_pll_msa)
+  {
+    free(_pll_msa->sequence);
+    if (_pll_msa->label)
+      free(_pll_msa->label);
+    free(_pll_msa);
+    _pll_msa = nullptr;
+  }
+}
+
+void MSA::append(const string& sequence, const string& header)
+{
+  if(_length && sequence.length() != (size_t) _length)
+    throw runtime_error{string("Tried to insert sequence to MSA of unequal length: ") + sequence};
+
+  _sequences.push_back(sequence);
+
+  if (!header.empty())
+  {
+    _labels.push_back(header);
+    _label_id_map[header] = _labels.size() - 1;
+  }
+
+  if (!_length)
+  {
+    _length = sequence.length();
+    if (!_num_sites)
+      _num_sites = _length;
+    _weights.assign(_length, 1.);
+  }
+
+  _dirty = true;
+}
+
+void MSA::compress_patterns(const unsigned int * charmap)
+{
+  update_pll_msa();
+
+  assert(_pll_msa->count && _pll_msa->length);
+
+  const unsigned int * w = pll_compress_site_patterns(_pll_msa->sequence,
+                                                      charmap,
+                                                      _pll_msa->count,
+                                                      &(_pll_msa->length));
+
+  if (!w)
+    throw runtime_error("Pattern compression failed!");
+
+  _length = _pll_msa->length;
+  _weights = WeightVector(w, w + _pll_msa->length);
+
+  _dirty = false;
+}
+
+
+const pll_msa_t * MSA::pll_msa() const
+{
+  update_pll_msa();
+  return _pll_msa;
+}
+
+void MSA::update_pll_msa() const
+{
+  if (!_pll_msa)
+  {
+    _pll_msa = (pll_msa_t *) calloc(1, sizeof(pll_msa_t));
+    _dirty = true;
+  }
+
+  assert(_labels.empty() || _labels.size() == _sequences.size());
+
+  if (_dirty)
+  {
+    _pll_msa->count = size();
+    _pll_msa->length = length();
+
+    size_t i = 0;
+    _pll_msa->sequence = (char **) calloc(_pll_msa->count, sizeof(char *));
+    for (const auto& entry : _sequences)
+    {
+      _pll_msa->sequence[i] = (char *) entry.c_str();
+      ++i;
+    }
+    assert(i == size());
+
+    if (!_labels.empty())
+    {
+      i = 0;
+      _pll_msa->label = (char **) calloc(_pll_msa->count, sizeof(char *));
+      for (const auto& entry : _labels)
+      {
+        _pll_msa->label[i] = (char *) entry.c_str();
+        ++i;
+      }
+      assert(i == size());
+    }
+
+    _dirty = false;
+  }
+}
+
+void MSA::states(size_t states)
+{
+  _states = states;
+  if (size() > 0)
+  {
+    _probs.resize(size());
+    for (auto& v: _probs)
+    {
+      v.resize(_length * _states);
+    }
+  }
+}
+
+ProbVector::const_iterator MSA::probs(size_t index, size_t site) const
+{
+  return _probs.at(index).cbegin() + site * _states;
+}
+
+ProbVector::iterator MSA::probs(size_t index, size_t site)
+{
+  return _probs.at(index).begin() + site * _states;
+}
+
+bool MSA::normalized() const
+{
+  if (!probabilistic())
+    return true;
+
+  for (const auto& pv: _probs)
+  {
+    for (auto p: pv)
+    {
+      if (p > 1.)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+doubleVector MSA::state_freqs() const
+{
+  assert(_states > 0);
+
+  double sum = 0;
+  doubleVector freqs(_states, 0.);
+  for (const auto& pv: _probs)
+  {
+    for (auto p = pv.cbegin(); p != pv.cend();)
+    {
+      for (size_t i = 0; i < _states; ++i, ++p)
+      {
+        freqs[i] += *p;
+        sum += *p;
+      }
+    }
+  }
+
+  assert(sum > 0.);
+
+//  double total_bases = _length * size();
+  for (size_t i = 0; i < _states; ++i)
+    freqs[i] /= sum;
+
+  return freqs;
+}
+
+void MSA::remove_sites(const std::vector<size_t>& site_indices)
+{
+  if (site_indices.empty())
+    return;
+
+  assert(_length);
+
+  auto sorted_indicies = site_indices;
+
+  std::sort(sorted_indicies.begin(), sorted_indicies.end());
+
+  if (sorted_indicies.back() >= _length)
+    throw out_of_range("Invalid site index");
+
+  auto new_length = _length - sorted_indicies.size();
+  for (auto& s: _sequences)
+  {
+    size_t pos = 0;
+    auto ignore = sorted_indicies.cbegin();
+    for (size_t i = 0; i < s.size(); ++i)
+    {
+      if (i != *ignore)
+      {
+        s[pos++] = s[i];
+      }
+      else
+        ignore++;
+    }
+    assert(pos == new_length);
+    s.resize(new_length);
+  }
+
+  if (!_weights.empty())
+  {
+    assert(_weights.size() == _length);
+    size_t pos = 0;
+    auto ignore = sorted_indicies.cbegin();
+    for (size_t i = 0; i < _weights.size(); ++i)
+    {
+      if (i != *ignore)
+      {
+        _weights[pos++] = _weights[i];
+      }
+      else
+        ignore++;
+    }
+    assert(pos == new_length);
+    _weights.resize(new_length);
+  }
+
+  _length = new_length;
+  _dirty = true;
+}
+
diff --git a/src/MSA.hpp b/src/MSA.hpp
new file mode 100644
index 0000000..4bba3c3
--- /dev/null
+++ b/src/MSA.hpp
@@ -0,0 +1,91 @@
+#ifndef RAXML_MSA_HPP_
+#define RAXML_MSA_HPP_
+
+#include "common.h"
+
+typedef unsigned int WeightType;
+typedef std::vector<WeightType> WeightVector;
+typedef std::vector<WeightVector> WeightVectorList;
+typedef std::unordered_map<size_t, WeightVector> WeightVectorMap;
+
+typedef std::vector<double> ProbVector;
+typedef std::vector<ProbVector> ProbVectorList;
+
+class MSA
+{
+public:
+//  typedef std::unordered_map<std::string, std::string> container;
+  typedef std::vector<std::string> container;
+  typedef typename container::iterator        iterator;
+  typedef typename container::const_iterator  const_iterator;
+
+  MSA() : _length(0), _num_sites(0), _states(0), _pll_msa(NULL), _dirty(false) {};
+  MSA(const unsigned int num_sites) : _length(0), _num_sites(num_sites),
+      _states(0), _pll_msa(nullptr), _dirty(false) {};
+  MSA(const pll_msa_t * pll_msa);
+  MSA(MSA&& other);
+  MSA(const MSA& other) = delete;
+
+  ~MSA();
+
+  MSA& operator=(MSA&& other);
+  MSA& operator=(const MSA& other) = delete;
+
+  void append(const std::string& sequence, const std::string& header = "");
+  void compress_patterns(const unsigned int * charmap);
+
+  size_t size() const { return _sequences.size(); }
+  size_t length() const { return _length; }
+  size_t num_sites() const { return _num_sites; }
+  size_t num_patterns() const { return _weights.size(); }
+  const WeightVector& weights() const {return _weights; }
+  const NameIdMap& label_id_map() const { return _label_id_map; }
+  const pll_msa_t * pll_msa() const;
+
+  const std::string& label(size_t index) const { return _labels.at(index); }
+  const std::string& at(const std::string& label) const
+  { return _sequences.at(_label_id_map.at(label)); }
+  const std::string& at(size_t index) const { return _sequences.at(index); }
+  const std::string& operator[](const std::string& label) const { return at(label); }
+  const std::string& operator[](size_t index) const { return at(index); }
+  std::string& operator[](size_t index) { return _sequences.at(index); }
+
+  bool probabilistic() const { return _states > 0; }
+  bool normalized() const;
+  size_t states() const { return _states; }
+  void states(size_t states);
+  const ProbVector& probs(size_t index) const { return _probs.at(index); }
+  ProbVector::const_iterator probs(size_t index, size_t site) const;
+  ProbVector::iterator probs(size_t index, size_t site);
+
+  doubleVector state_freqs() const;
+
+  void num_sites(const unsigned int sites) { _num_sites = sites; }
+  void remove_sites(const std::vector<size_t>& site_indices);
+
+  //Iterator Compatibility
+  iterator begin() { return _sequences.begin(); }
+  iterator end() { return _sequences.end(); }
+  const_iterator begin() const { return _sequences.cbegin(); }
+  const_iterator end() const { return _sequences.cend(); }
+  const_iterator cbegin() { return _sequences.cbegin(); }
+  const_iterator cend() { return _sequences.cend(); }
+
+private:
+  // Data Members
+  size_t _length;
+  size_t _num_sites;
+  container _sequences;
+  container _labels;
+  NameIdMap _label_id_map;
+  WeightVector _weights;
+  ProbVectorList _probs;
+  size_t _states;
+  mutable pll_msa_t * _pll_msa;
+  mutable bool _dirty;
+
+  void update_pll_msa() const;
+  void free_pll_msa() noexcept;
+};
+
+#endif /* RAXML_MSA_HPP_ */
diff --git a/src/Model.cpp b/src/Model.cpp
new file mode 100644
index 0000000..2ff89ff
--- /dev/null
+++ b/src/Model.cpp
@@ -0,0 +1,553 @@
+#include "Model.hpp"
+
+using namespace std;
+
+const vector<int> ALL_MODEL_PARAMS = {PLLMOD_OPT_PARAM_FREQUENCIES, PLLMOD_OPT_PARAM_SUBST_RATES,
+                                      PLLMOD_OPT_PARAM_PINV, PLLMOD_OPT_PARAM_ALPHA,
+                                      PLLMOD_OPT_PARAM_FREE_RATES, PLLMOD_OPT_PARAM_RATE_WEIGHTS,
+                                      PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER,
+                                      PLLMOD_OPT_PARAM_BRANCHES_ITERATIVE};
+
+const unordered_map<DataType,unsigned int,EnumClassHash>  DATATYPE_STATES { {DataType::dna, 4},
+                                                                            {DataType::protein, 20},
+                                                                            {DataType::binary, 2},
+                                                                            {DataType::diploid10, 10}
+                                                                          };
+
+const unordered_map<DataType, const unsigned int *,EnumClassHash>  DATATYPE_MAPS {
+  {DataType::dna, pll_map_nt},
+  {DataType::protein, pll_map_aa},
+  {DataType::binary, pll_map_bin},
+  {DataType::diploid10, pll_map_diploid10}
+};
+
+Model::Model (DataType data_type, const std::string &model_string) :
+    _data_type(data_type)
+{
+  // RAxML compatibility hack, TODO: generic model name aliases
+  const string model_string_tmp = model_string == "DNA" ? "GTR+G+F" : model_string;
+
+  init_from_string(model_string_tmp);
+}
+
+const unsigned int * Model::charmap() const
+{
+  return DATATYPE_MAPS.at(_data_type);
+}
+
+void Model::init_from_string(const std::string &model_string)
+{
+  size_t pos = model_string.find_first_of("+");
+  const string model_name = pos == string::npos ? model_string : model_string.substr(0, pos);
+  const string model_opts = pos == string::npos ? "" : model_string.substr(pos);
+
+  /* guess data type */
+  autodetect_data_type(model_name);
+
+  /* set number of states based on datatype (unless already set) */
+  _num_states = DATATYPE_STATES.at(_data_type);
+
+  pllmod_mixture_model_t * mix_model = init_mix_model(model_name);
+
+  init_model_opts(model_opts, *mix_model);
+
+  pllmod_util_model_mixture_destroy(mix_model);
+}
+
+
+void Model::autodetect_data_type(const std::string &model_name)
+{
+  if (_data_type == DataType::autodetect)
+  {
+    if (pllmod_util_model_exists_genotype(model_name.c_str()))
+      _data_type = DataType::diploid10;
+    else if (pllmod_util_model_exists_protein(model_name.c_str()) ||
+             pllmod_util_model_exists_protmix(model_name.c_str()))
+    {
+      _data_type = DataType::protein;
+    }
+    else
+    {
+      _data_type = DataType::dna;
+    }
+  }
+}
+
+pllmod_mixture_model_t * Model::init_mix_model(const std::string &model_name)
+{
+  const char * model_cstr = model_name.c_str();
+  pllmod_mixture_model_t * mix_model = nullptr;
+
+  if (pllmod_util_model_exists_protmix(model_cstr))
+  {
+    mix_model = pllmod_util_model_info_protmix(model_cstr);
+  }
+  else
+  {
+    pllmod_subst_model_t * modinfo =  NULL;
+
+    /* initialize parameters from the model */
+    if (_data_type == DataType::protein)
+    {
+      modinfo =  pllmod_util_model_info_protein(model_cstr);
+    }
+    else if (_data_type == DataType::dna)
+    {
+      modinfo =  pllmod_util_model_info_dna(model_cstr);
+    }
+    else if (_data_type == DataType::diploid10)
+    {
+      modinfo =  pllmod_util_model_info_genotype(model_cstr);
+    }
+
+    // TODO: user models must be defined explicitly
+//    /* pre-defined model not found; assume model string encodes rate symmetries */
+//    if (!modinfo)
+//      modinfo =  pllmod_util_model_create_custom("USER", _num_states, NULL, NULL, model_cstr, NULL);
+
+    if (!modinfo)
+      throw runtime_error("Invalid model name: " + model_name);
+
+    /* create pseudo-mixture with 1 component */
+    mix_model = pllmod_util_model_mixture_create(modinfo->name, 1, &modinfo, NULL, NULL,
+                                                  PLLMOD_UTIL_MIXTYPE_FIXED);
+  }
+
+  return mix_model;
+}
+
+void Model::init_model_opts(const std::string &model_opts, const pllmod_mixture_model_t& mix_model)
+{
+  _alpha = 1.0;
+  _pinv = 0.0;
+  _brlen_scaler = 1.0;
+  _name = string(mix_model.name);
+
+  /* set rate heterogeneity defaults from model */
+  _num_ratecats = mix_model.ncomp;
+  _num_submodels = mix_model.ncomp;
+  _rate_het = mix_model.mix_type;
+
+  /* allocate space for all subst matrices */
+  for (size_t i = 0; i < mix_model.ncomp; ++i)
+    _submodels.emplace_back(*mix_model.models[i]);
+
+  /* set default param optimization modes */
+  for (auto param: ALL_MODEL_PARAMS)
+    _param_mode[param] = ParamValue::undefined;
+
+  _param_mode[PLLMOD_OPT_PARAM_FREQUENCIES] =
+      mix_model.models[0]->freqs ? ParamValue::model : ParamValue::ML;
+
+  _param_mode[PLLMOD_OPT_PARAM_SUBST_RATES] =
+      mix_model.models[0]->rates ? ParamValue::model : ParamValue::ML;
+
+  const char *s = model_opts.c_str();
+  const char *tmp;
+
+  /* parse the rest and set additional model params */
+  int rate_cats;
+  ParamValue param_mode;
+  while(*s != '\0')
+  {
+    // skip "+"
+    s++;
+
+    switch(toupper(*s))
+    {
+      case '\0':
+      case '+':
+        break;
+      case 'F':
+        switch (toupper(*(s+1)))
+        {
+          case '\0':
+          case '+':
+          case 'C':
+            param_mode = ParamValue::empirical;
+            break;
+          case 'O':
+            param_mode = ParamValue::ML;
+            break;
+          case 'E':
+            param_mode = ParamValue::equal;
+            break;
+          default:
+            throw runtime_error(string("Invalid frequencies specification: ") + s);
+        }
+        _param_mode[PLLMOD_OPT_PARAM_FREQUENCIES] = param_mode;
+        break;
+      case 'I':
+        switch (toupper(*(s+1)))
+        {
+          case '\0':
+          case '+':
+          case 'O':
+            param_mode = ParamValue::ML;
+            break;
+          case 'C':
+            param_mode = ParamValue::empirical;
+            break;
+          case '{':
+            if (sscanf(s+1, "{%lf}", &_pinv) == 1 && ((tmp = strchr(s, '}')) != NULL))
+            {
+              param_mode = ParamValue::user;
+              s = tmp+1;
+            }
+            else
+              throw runtime_error(string("Invalid p-inv specification: ") + s);
+            break;
+          default:
+            throw runtime_error(string("Invalid p-inv specification: ") + s);
+        }
+        _param_mode[PLLMOD_OPT_PARAM_PINV] = param_mode;
+        break;
+      case 'R':
+      case 'G':
+        /* allow to override mixture ratehet mode for now */
+        _rate_het = toupper(*s) == 'R' ? PLLMOD_UTIL_MIXTYPE_FREE : PLLMOD_UTIL_MIXTYPE_GAMMA;
+        if (sscanf(s+1, "%d", &rate_cats) == 1)
+        {
+          _num_ratecats = rate_cats;
+        }
+        else if (_num_ratecats == 1)
+          _num_ratecats = 4;
+        break;
+      default:
+        throw runtime_error("Wrong model specification: " + model_opts);
+    }
+
+    // rewind to next term
+    while (*s != '+' && *s != '\0')
+      s++;
+  }
+
+  switch (_param_mode.at(PLLMOD_OPT_PARAM_FREQUENCIES))
+  {
+    case ParamValue::user:
+    case ParamValue::empirical:
+    case ParamValue::model:
+      /* nothing to do here */
+      break;
+    case ParamValue::equal:
+    case ParamValue::ML:
+      /* use equal frequencies as s a starting value for ML optimization */
+      for (auto& m: _submodels)
+        m.base_freqs(doubleVector(_num_states, 1.0 / _num_states));
+      break;
+    default:
+      assert(0);
+  }
+
+  const size_t num_srates = pllmod_util_subst_rate_count(_num_states);
+  switch (_param_mode.at(PLLMOD_OPT_PARAM_SUBST_RATES))
+  {
+    case ParamValue::user:
+    case ParamValue::empirical:
+    case ParamValue::model:
+      /* nothing to do here */
+      break;
+    case ParamValue::equal:
+    case ParamValue::ML:
+      /* use equal rates as s a starting value for ML optimization */
+      for (auto& m: _submodels)
+        m.subst_rates(doubleVector(num_srates, 1.0));
+      break;
+    default:
+      assert(0);
+  }
+
+  /* default: equal rates & weights */
+  _ratecat_rates.assign(_num_ratecats, 1.0);
+  _ratecat_weights.assign(_num_ratecats, 1.0 / _num_ratecats);
+  _ratecat_submodels.assign(_num_ratecats, 0);
+
+  if (_num_ratecats > 1)
+  {
+    /* init rate & weights according to the selected mode */
+    switch (_rate_het)
+    {
+      case PLLMOD_UTIL_MIXTYPE_FIXED:
+        assert(_num_ratecats == mix_model.ncomp);
+        /* set rates and weights from the mixture model definition */
+        _ratecat_rates.assign(mix_model.mix_rates, mix_model.mix_rates + _num_ratecats);
+        _ratecat_weights.assign(mix_model.mix_weights, mix_model.mix_weights + _num_ratecats);
+        break;
+
+      case PLLMOD_UTIL_MIXTYPE_GAMMA:
+        /* compute the discretized category rates from a gamma distribution
+           with given alpha shape and store them in rate_cats  */
+        pll_compute_gamma_cats(_alpha, _num_ratecats, _ratecat_rates.data());
+        if (_param_mode[PLLMOD_OPT_PARAM_ALPHA] == ParamValue::undefined)
+          _param_mode[PLLMOD_OPT_PARAM_ALPHA] = ParamValue::ML;
+        break;
+
+      case PLLMOD_UTIL_MIXTYPE_FREE:
+        /* use GAMMA rates as inital values -> can be changed */
+        pll_compute_gamma_cats(_alpha, _num_ratecats, _ratecat_rates.data());
+        _param_mode[PLLMOD_OPT_PARAM_FREE_RATES] = ParamValue::ML;
+        _param_mode[PLLMOD_OPT_PARAM_RATE_WEIGHTS] = ParamValue::ML;
+        break;
+
+      default:
+        throw runtime_error("Unknown rate heterogeneity model");
+    }
+
+    /* link rate categories to corresponding mixture components (R-matrix + freqs)*/
+    if (_num_submodels == _num_ratecats)
+    {
+      for (size_t i = 0; i < _num_ratecats; ++i)
+        _ratecat_submodels[i] = i;
+    }
+  }
+}
+
+std::string Model::to_string() const
+{
+  stringstream model_string;
+  model_string << name();
+
+  switch(_param_mode.at(PLLMOD_OPT_PARAM_FREQUENCIES))
+  {
+    case ParamValue::empirical:
+      model_string << "+F";
+      break;
+    case ParamValue::ML:
+      model_string << "+FO";
+      break;
+    case ParamValue::equal:
+      model_string << "+FE";
+      break;
+    default:
+      break;
+  }
+
+  switch(_param_mode.at(PLLMOD_OPT_PARAM_PINV))
+  {
+    case ParamValue::empirical:
+      model_string << "+IC";
+      break;
+    case ParamValue::ML:
+      model_string << "+I";
+      break;
+    case ParamValue::user:
+      model_string << "+I{" << _pinv << "}";
+      break;
+    default:
+      break;
+  }
+
+  if (_num_ratecats > 1)
+  {
+    if (_rate_het == PLLMOD_UTIL_MIXTYPE_GAMMA)
+    {
+      model_string << "+G" << _num_ratecats;
+      if (_param_mode.at(PLLMOD_OPT_PARAM_ALPHA) == ParamValue::user)
+        model_string << "{" << _alpha << "}";
+    }
+    else if (_rate_het == PLLMOD_UTIL_MIXTYPE_FREE)
+    {
+      model_string << "+R" << _num_ratecats;
+    }
+  }
+
+  return model_string.str();
+}
+
+int Model::params_to_optimize() const
+{
+  int params_to_optimize = 0;
+
+  for (auto param: ALL_MODEL_PARAMS)
+  {
+    if (_param_mode.at(param) == ParamValue::ML)
+      params_to_optimize |= param;
+  }
+
+  return params_to_optimize;
+}
+
+void assign(Model& model, const pllmod_msa_stats_t * stats)
+{
+  /* either compute empirical P-inv, or set the fixed user-specified value */
+  switch (model.param_mode(PLLMOD_OPT_PARAM_PINV))
+  {
+    case ParamValue::empirical:
+      model.pinv(stats->inv_prop);
+      break;
+    case ParamValue::ML:
+      /* use half of empirical pinv as a starting value */
+      model.pinv(stats->inv_prop / 2);
+      break;
+    case ParamValue::user:
+    case ParamValue::undefined:
+      /* nothing to do here */
+      break;
+    default:
+      assert(0);
+  }
+
+   /* assign empirical base frequencies */
+  switch (model.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES))
+  {
+    case ParamValue::empirical:
+//      if (_model.data_type == DataType::diploid10)
+//        /* for now, use a separate function to compute emp. freqs for diploid10 data*/
+//        alloc_freqs = base_freqs = get_diploid10_empirircal_freqs(msa);
+//      else
+      {
+//        base_freqs = pllmod_msa_empirical_frequencies(partition);
+        assert(stats->freqs && stats->states == model.num_states());
+        model.base_freqs(doubleVector(stats->freqs, stats->freqs + stats->states));
+      }
+      break;
+    case ParamValue::user:
+    case ParamValue::equal:
+    case ParamValue::ML:
+    case ParamValue::model:
+      /* nothing to do here */
+      break;
+    default:
+      assert(0);
+  }
+
+  /* assign empirical substitution rates */
+  switch (model.param_mode(PLLMOD_OPT_PARAM_SUBST_RATES))
+  {
+    case ParamValue::empirical:
+    {
+//      double * subst_rates = pllmod_msa_empirical_subst_rates(partition);
+      size_t n_subst_rates = pllmod_util_subst_rate_count(stats->states);
+      model.subst_rates(doubleVector(stats->subst_rates,
+                                     stats->subst_rates + n_subst_rates));
+      break;
+    }
+    case ParamValue::equal:
+    case ParamValue::user:
+    case ParamValue::ML:
+    case ParamValue::model:
+      /* nothing to do here */
+      break;
+    default:
+      assert(0);
+  }
+}
+
+void assign(Model& model, const pll_partition_t * partition)
+{
+  if (model.num_states() == partition->states &&
+      model.num_submodels() == partition->rate_matrices)
+  {
+    model.pinv(partition->prop_invar[0]);
+    for (size_t i = 0; i < model.num_submodels(); ++i)
+    {
+      model.base_freqs(i, doubleVector(partition->frequencies[i],
+                                       partition->frequencies[i] + partition->states));
+
+      size_t n_subst_rates = pllmod_util_subst_rate_count(partition->states);
+      model.subst_rates(i, doubleVector(partition->subst_params[i],
+                                        partition->subst_params[i] + n_subst_rates));
+    }
+
+    if (partition->rate_cats > 1)
+    {
+      model.ratecat_rates(doubleVector(partition->rates,
+                                       partition->rates + partition->rate_cats));
+      model.ratecat_weights(doubleVector(partition->rate_weights,
+                                         partition->rate_weights + partition->rate_cats));
+    }
+  }
+  else
+    throw runtime_error("incompatible partition!");
+}
+
+void assign(pll_partition_t * partition, const Model& model)
+{
+  if (model.num_states() == partition->states &&
+      model.num_submodels() == partition->rate_matrices)
+  {
+    /* set rate categories & weights */
+    pll_set_category_rates(partition, model.ratecat_rates().data());
+    pll_set_category_weights(partition, model.ratecat_weights().data());
+
+    /* now iterate over rate matrices and set all params */
+    for (size_t i = 0; i < partition->rate_matrices; ++i)
+    {
+      /* set base frequencies */
+      assert(!model.base_freqs(i).empty());
+      pll_set_frequencies(partition, i, model.base_freqs(i).data());
+
+      /* set substitution rates */
+      assert(!model.subst_rates(i).empty());
+      pll_set_subst_params(partition, i, model.subst_rates(i).data());
+
+      /* set p-inv value */
+      pll_update_invariant_sites_proportion (partition, i, model.pinv());
+    }
+  }
+  else
+    throw runtime_error("incompatible partition!");
+}
+
+static string get_param_mode_str(ParamValue mode)
+{
+  return ParamValueNames[(size_t) mode];
+}
+
+static string get_ratehet_mode_str(const Model& m)
+{
+  if (m.num_ratecats() == 1)
+    return "NONE";
+  else
+    return (m.ratehet_mode() == PLLMOD_UTIL_MIXTYPE_GAMMA) ? "GAMMA" :
+            (m.ratehet_mode() == PLLMOD_UTIL_MIXTYPE_FREE) ? "FREE" : "FIXED";
+}
+
+LogStream& operator<<(LogStream& stream, const Model& m)
+{
+  if (m.param_mode(PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER) != ParamValue::undefined)
+    stream << "   Speed ("  << get_param_mode_str(m.param_mode(PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER))
+             << "): " << m.brlen_scaler() << endl;
+
+  stream << "   Rate heterogeneity: " << get_ratehet_mode_str(m);
+  if (m.num_ratecats() > 1)
+  {
+    stream << " (" << m.num_ratecats() << " cats)";
+    if (m.ratehet_mode() == PLLMOD_UTIL_MIXTYPE_GAMMA)
+      stream << ",  alpha: " << setprecision(3) << m.alpha() << " ("  << get_param_mode_str(m.param_mode(PLLMOD_OPT_PARAM_ALPHA))
+                 << ")";
+    stream << ",  weights&rates: ";
+    for (size_t i = 0; i < m.num_ratecats(); ++i)
+      stream << "(" << m.ratecat_weights()[i] << "," << m.ratecat_rates()[i] << ") ";
+  }
+  stream << endl;
+
+  if (m.param_mode(PLLMOD_OPT_PARAM_PINV) != ParamValue::undefined)
+    stream << "   P-inv (" << get_param_mode_str(m.param_mode(PLLMOD_OPT_PARAM_PINV)) << "): " <<
+               m.pinv() << endl;
+
+  stream << "   Base frequencies (" << get_param_mode_str(m.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES)) << "): ";
+  for (size_t i = 0; i < m.num_submodels(); ++i)
+  {
+    if (m.num_submodels() > 1)
+      stream << "\nM" << i << ": ";
+
+    for (size_t j = 0; j < m.base_freqs(i).size(); ++j)
+      stream << setprecision(3) << m.base_freqs(i)[j] << " ";
+  }
+  stream << endl;
+
+  stream << "   Substitution rates (" << get_param_mode_str(m.param_mode(PLLMOD_OPT_PARAM_SUBST_RATES)) << "): ";
+  for (size_t i = 0; i < m.num_submodels(); ++i)
+  {
+    if (m.num_submodels() > 1)
+      stream << "\nM " << i << ": ";
+
+    for (size_t j = 0; j < m.subst_rates(i).size(); ++j)
+      stream << m.subst_rates(i)[j] << " ";
+  }
+  stream << endl;
+
+  return stream;
+}
+
diff --git a/src/Model.hpp b/src/Model.hpp
new file mode 100644
index 0000000..283803c
--- /dev/null
+++ b/src/Model.hpp
@@ -0,0 +1,135 @@
+#ifndef RAXML_MODEL_H_
+#define RAXML_MODEL_H_
+
+#include "common.h"
+
+class SubstitutionModel
+{
+public:
+  SubstitutionModel(const pllmod_subst_model_t& sm) :
+    _states(sm.states), _name(sm.name)
+  {
+    if (sm.freqs)
+      _base_freqs.assign(sm.freqs, sm.freqs + sm.states);
+    if (sm.rates)
+      _subst_rates.assign(sm.rates, sm.rates + sm.states*(sm.states-1)/2);
+    if (sm.rate_sym)
+      _rate_sym.assign(sm.rate_sym, sm.rate_sym + sm.states*(sm.states-1)/2);
+    if (sm.freq_sym)
+      _freq_sym.assign(sm.freq_sym, sm.freq_sym + sm.states);
+  };
+
+  // getters
+  unsigned int states() const;
+  std::string name() const;
+  const doubleVector& base_freqs() const { return _base_freqs; };
+  const doubleVector& subst_rates() const { return _subst_rates; };
+  const intVector& rate_sym() const { return _rate_sym; };
+  const intVector& freq_sym() const { return _freq_sym; };
+
+  // setters
+  void base_freqs(const doubleVector& v)
+  {
+//    std::cout << "expected: " << _states << ", got: " << v.size() << std::endl;
+    if (v.size() != _states)
+      throw std::invalid_argument("Invalid size of base_freqs vector!");
+
+    _base_freqs = v;
+  };
+
+  void subst_rates(const doubleVector& v)
+  {
+    if (v.size() != _states * (_states - 1) / 2)
+      throw std::invalid_argument("Invalid size of subst_rates vector!");
+
+    _subst_rates = v;
+  };
+
+private:
+  unsigned int _states;
+  std::string _name;
+  doubleVector _base_freqs;
+  doubleVector _subst_rates;
+  intVector _rate_sym;
+  intVector _freq_sym;
+};
+
+class Model
+{
+public:
+  Model (DataType data_type = DataType::autodetect, const std::string &model_string = "GTR");
+  Model (const std::string &model_string) : Model(DataType::autodetect, model_string) {};
+
+  Model(const Model& other) = default;
+
+  /* getters */
+  DataType data_type() const { return _data_type; };
+  unsigned int num_states() const { return _num_states; };
+  std::string name() const { return _name; };
+
+  const unsigned int* charmap() const;
+  const SubstitutionModel submodel(size_t i) const { return _submodels.at(i); };
+
+  unsigned int ratehet_mode() const { return _rate_het; };
+  unsigned int num_ratecats() const { return _num_ratecats; };
+  unsigned int num_submodels() const { return _num_submodels; };
+  const doubleVector& ratecat_rates() const { return _ratecat_rates; };
+  const doubleVector& ratecat_weights() const { return _ratecat_weights; };
+  const std::vector<unsigned int>& ratecat_submodels() const { return _ratecat_submodels; };
+
+  double alpha() const { return _alpha; };
+  double pinv() const { return _pinv; };
+  double brlen_scaler() const { return _brlen_scaler; };
+  const doubleVector& base_freqs(unsigned int i) const { return _submodels.at(i).base_freqs(); };
+  const doubleVector& subst_rates(unsigned int i) const { return _submodels.at(i).subst_rates(); };
+
+  std::string to_string() const;
+  int params_to_optimize() const;
+  ParamValue param_mode(int param) const { return _param_mode.at(param); };
+
+  /* setters */
+  void alpha(double value) { _alpha = value; };
+  void pinv(double value) { _pinv = value; };
+  void brlen_scaler(double value) { _brlen_scaler = value; };
+  void base_freqs(size_t i, const doubleVector& value) { _submodels.at(i).base_freqs(value); };
+  void subst_rates(size_t i, const doubleVector& value) { _submodels.at(i).subst_rates(value); };
+  void base_freqs(const doubleVector& value) { for (SubstitutionModel& s: _submodels) s.base_freqs(value); };
+  void subst_rates(const doubleVector& value) { for (SubstitutionModel& s: _submodels) s.subst_rates(value); };
+  void ratecat_rates(doubleVector const& value) { _ratecat_rates = value; };
+  void ratecat_weights(doubleVector const& value) { _ratecat_weights = value; };
+
+  /* initialization */
+  void init_from_string(const std::string& model_string);
+
+private:
+  std::string _name;
+  DataType _data_type;
+  unsigned int _num_states;
+
+  unsigned int _rate_het;
+  unsigned int _num_ratecats;
+  unsigned int _num_submodels;
+  doubleVector _ratecat_rates;
+  doubleVector _ratecat_weights;
+  std::vector<unsigned int> _ratecat_submodels;
+
+  double _alpha;
+  double _pinv;
+  double _brlen_scaler;
+
+  std::vector<SubstitutionModel> _submodels;
+
+  std::unordered_map<int,ParamValue> _param_mode;
+
+  void autodetect_data_type(const std::string& model_name);
+  pllmod_mixture_model_t * init_mix_model(const std::string& model_name);
+  void init_model_opts(const std::string& model_opts, const pllmod_mixture_model_t& mix_model);
+};
+
+void assign(Model& model, const pllmod_msa_stats_t * stats);
+void assign(Model& model, const pll_partition_t * partition);
+void assign(pll_partition_t * partition, const Model& model);
+
+LogStream& operator<<(LogStream& stream, const Model& m);
+
+#endif /* RAXML_MODEL_H_ */
diff --git a/src/Optimizer.cpp b/src/Optimizer.cpp
new file mode 100644
index 0000000..2bcdcab
--- /dev/null
+++ b/src/Optimizer.cpp
@@ -0,0 +1,245 @@
+#include "Optimizer.hpp"
+
+using namespace std;
+
+Optimizer::Optimizer (const Options &opts) :
+    _lh_epsilon(opts.lh_epsilon), _spr_radius(opts.spr_radius), _spr_cutoff(opts.spr_cutoff)
+{
+}
+
+Optimizer::~Optimizer ()
+{
+  // TODO Auto-generated destructor stub
+}
+
+double Optimizer::optimize(TreeInfo& treeinfo, double lh_epsilon)
+{
+  double new_loglh = treeinfo.loglh();
+
+//  if (!params_to_optimize)
+//    return new_loglh;
+
+  int iter_num = 0;
+  double cur_loglh;
+  do
+  {
+    cur_loglh = new_loglh;
+
+    treeinfo.optimize_params_all(lh_epsilon);
+
+    new_loglh = treeinfo.loglh();
+
+//      printf("old: %f, new: %f\n", cur_loglh, new_loglh);
+    assert(cur_loglh - new_loglh < -new_loglh * RAXML_DOUBLE_TOLERANCE);
+
+    iter_num++;
+//    LOG_PROGRESS(new_loglh) << "Iteration %d: logLH = %f\n", iter_num, new_loglh);
+  }
+  while (new_loglh - cur_loglh > lh_epsilon);
+
+  return new_loglh;
+}
+
+double Optimizer::optimize_topology(TreeInfo& treeinfo, CheckpointManager& cm)
+{
+  const double fast_modopt_eps = 10.;
+  const double interim_modopt_eps = 3.;
+
+  SearchState local_search_state = cm.search_state();
+  auto& search_state = ParallelContext::master_thread() ? cm.search_state() : local_search_state;
+  ParallelContext::barrier();
+
+  /* set references such that we can work directly with checkpoint values */
+  double &loglh = search_state.loglh;
+  int& iter = search_state.iteration;
+  spr_round_params& spr_params = search_state.spr_params;
+  int& best_fast_radius = search_state.fast_spr_radius;
+
+  CheckpointStep resume_step = search_state.step;
+
+  /* Compute initial LH of the starting tree */
+  loglh = treeinfo.loglh();
+
+  auto do_step = [&search_state,resume_step](CheckpointStep step) -> bool
+      {
+        if (step >= resume_step)
+        {
+          search_state.step = step;
+          return true;
+        }
+        else
+          return false;;
+      };
+
+  if (do_step(CheckpointStep::brlenOpt))
+  {
+    cm.update_and_write(treeinfo);
+    LOG_PROGRESS(loglh) << "Initial branch length optimization" << endl;
+    loglh = treeinfo.optimize_branches(fast_modopt_eps, 1);
+  }
+
+  /* Initial fast model optimization */
+  if (do_step(CheckpointStep::modOpt1))
+  {
+    cm.update_and_write(treeinfo);
+    LOG_PROGRESS(loglh) << "Model parameter optimization (eps = " << fast_modopt_eps << ")" << endl;
+    loglh = optimize(treeinfo, fast_modopt_eps);
+  //  print_model_params(treeinfo, useropt);
+
+    /* start spr rounds from the beginning */
+    iter = 0;
+  }
+
+  // do SPRs
+  const int radius_limit = min(22, (int) treeinfo.pll_treeinfo().tip_count - 3 );
+  const int radius_step = 5;
+
+//  treeinfo->counter = 0;
+
+  if (_spr_radius > 0)
+    best_fast_radius = _spr_radius;
+  else
+  {
+    /* auto detect best radius for fast SPRs */
+
+    if (do_step(CheckpointStep::radiusDetect))
+    {
+      if (iter == 0)
+      {
+        spr_params.thorough = 0;
+        spr_params.radius_min = 1;
+        best_fast_radius = spr_params.radius_max = 5;
+        spr_params.ntopol_keep = 0;
+        spr_params.subtree_cutoff = 0.;
+      }
+
+      double best_loglh = loglh;
+
+      while (spr_params.radius_min < radius_limit)
+      {
+        cm.update_and_write(treeinfo);
+
+        ++iter;
+        LOG_PROGRESS(best_loglh) << "AUTODETECT spr round " << iter << " (radius: " <<
+            spr_params.radius_max << ")" << endl;
+        loglh = treeinfo.spr_round(spr_params);
+
+        if (!loglh)
+          throw runtime_error("ERROR in SPR round: " + string(pll_errmsg));
+
+        if (loglh - best_loglh > 0.1)
+        {
+          /* LH improved, try to increase the radius */
+          best_fast_radius = spr_params.radius_max;
+          spr_params.radius_min += radius_step;
+          spr_params.radius_max += radius_step;
+          best_loglh = loglh;
+        }
+        else
+          break;
+      }
+    }
+  }
+
+  LOG_PROGRESS(loglh) << "SPR radius for FAST iterations: " << best_fast_radius << " (" <<
+                 (_spr_radius > 0 ? "user-specified" : "autodetect") << ")" << endl;
+
+  if (do_step(CheckpointStep::modOpt2))
+  {
+    cm.update_and_write(treeinfo);
+
+    /* optimize model parameters a bit more thoroughly */
+    LOG_PROGRESS(loglh) << "Model parameter optimization (eps = " <<
+                                                            interim_modopt_eps << ")" << endl;
+    loglh = optimize(treeinfo, interim_modopt_eps);
+
+    /* reset iteration counter for fast SPRs */
+    iter = 0;
+
+    /* initialize search params */
+    spr_params.thorough = 0;
+    spr_params.radius_min = 1;
+    spr_params.radius_max = best_fast_radius;
+    spr_params.ntopol_keep = 20;
+    spr_params.subtree_cutoff = _spr_cutoff;
+    spr_params.reset_cutoff_info(loglh);
+  }
+
+  double old_loglh;
+
+  if (do_step(CheckpointStep::fastSPR))
+  {
+    do
+    {
+      cm.update_and_write(treeinfo);
+      ++iter;
+      old_loglh = loglh;
+      LOG_PROGRESS(old_loglh) << (spr_params.thorough ? "SLOW" : "FAST") <<
+          " spr round " << iter << " (radius: " << spr_params.radius_max << ")" << endl;
+      loglh = treeinfo.spr_round(spr_params);
+
+      /* optimize ALL branches */
+      loglh = treeinfo.optimize_branches(_lh_epsilon, 1);
+    }
+    while (loglh - old_loglh > _lh_epsilon);
+  }
+
+  if (do_step(CheckpointStep::modOpt3))
+  {
+    cm.update_and_write(treeinfo);
+    LOG_PROGRESS(loglh) << "Model parameter optimization (eps = " << 1.0 << ")" << endl;
+    loglh = optimize(treeinfo, 1.0);
+
+    /* init slow SPRs */
+    spr_params.thorough = 1;
+    spr_params.radius_min = 1;
+    spr_params.radius_max = radius_step;
+    iter = 0;
+  }
+
+  if (do_step(CheckpointStep::slowSPR))
+  {
+    do
+    {
+      cm.update_and_write(treeinfo);
+      ++iter;
+      old_loglh = loglh;
+      LOG_PROGRESS(old_loglh) << (spr_params.thorough ? "SLOW" : "FAST") <<
+          " spr round " << iter << " (radius: " << spr_params.radius_max << ")" << endl;
+      loglh = treeinfo.spr_round(spr_params);
+
+      /* optimize ALL branches */
+      loglh = treeinfo.optimize_branches(_lh_epsilon, 1);
+
+      bool impr = (loglh - old_loglh > _lh_epsilon);
+      if (impr)
+      {
+        /* got improvement in thorough mode: reset min radius to 1 */
+        spr_params.radius_min = 1;
+        /* reset max radius to 5; or maybe better keep old value? */
+        spr_params.radius_max = radius_step;
+      }
+      else
+      {
+        /* no improvement in thorough mode: set min radius to old max,
+         * and increase max radius by the step */
+        spr_params.radius_min = spr_params.radius_max + 1;
+        spr_params.radius_max += radius_step;
+      }
+    }
+    while (spr_params.radius_min >= 0 && spr_params.radius_min < radius_limit);
+  }
+
+  /* Final thorough model optimization */
+  if (do_step(CheckpointStep::modOpt4))
+  {
+    cm.update_and_write(treeinfo);
+    LOG_PROGRESS(loglh) << "Model parameter optimization (eps = " << _lh_epsilon << ")" << endl;
+    loglh = optimize(treeinfo, _lh_epsilon);
+  }
+
+  if (do_step(CheckpointStep::finish))
+    cm.update_and_write(treeinfo);
+
+  return loglh;
+}
diff --git a/src/Optimizer.hpp b/src/Optimizer.hpp
new file mode 100644
index 0000000..0831019
--- /dev/null
+++ b/src/Optimizer.hpp
@@ -0,0 +1,23 @@
+#ifndef RAXML_OPTIMIZER_H_
+#define RAXML_OPTIMIZER_H_
+
+#include "TreeInfo.hpp"
+#include "Checkpoint.hpp"
+
+class Optimizer
+{
+public:
+  Optimizer (const Options& opts);
+  virtual
+  ~Optimizer ();
+
+  double optimize(TreeInfo& treeinfo, double lh_epsilon);
+  double optimize(TreeInfo& treeinfo) { return optimize(treeinfo, _lh_epsilon); };
+  double optimize_topology(TreeInfo& treeinfo, CheckpointManager& cm);
+private:
+  double _lh_epsilon;
+  int _spr_radius;
+  double _spr_cutoff;
+};
+
+#endif /* RAXML_OPTIMIZER_H_ */
diff --git a/src/Options.cpp b/src/Options.cpp
new file mode 100644
index 0000000..dd46b27
--- /dev/null
+++ b/src/Options.cpp
@@ -0,0 +1,152 @@
+#include "Options.hpp"
+//#include <stdlib.h>
+
+using namespace std;
+
+string Options::output_fname(const string& suffix) const
+{
+  return (outfile_prefix.empty() ? msa_file : outfile_prefix) + ".raxml." + suffix;
+}
+
+void Options::set_default_outfile(std::string& fname, const std::string& suffix)
+{
+  if (fname.empty())
+    fname = output_fname(suffix);
+}
+
+void Options::set_default_outfiles()
+{
+  set_default_outfile(outfile_names.log, "log");
+  set_default_outfile(outfile_names.checkpoint, "ckp");
+  set_default_outfile(outfile_names.start_tree, "startTree");
+  set_default_outfile(outfile_names.best_tree, "bestTree");
+  set_default_outfile(outfile_names.ml_trees, "mlTrees");
+  set_default_outfile(outfile_names.bootstrap_trees, "bootstraps");
+  set_default_outfile(outfile_names.support_tree, "support");
+}
+
+bool Options::result_files_exist()
+{
+  return sysutil_file_exists(best_tree_file()) || sysutil_file_exists(bootstrap_trees_file()) ||
+      sysutil_file_exists(support_tree_file());
+}
+
+static string get_simd_arch_name(unsigned int simd_arch)
+{
+  switch(simd_arch)
+  {
+    case PLL_ATTRIB_ARCH_CPU:
+      return "NONE";
+      break;
+    case PLL_ATTRIB_ARCH_SSE:
+      return "SSE3";
+      break;
+    case PLL_ATTRIB_ARCH_AVX:
+      return "AVX";
+      break;
+    case PLL_ATTRIB_ARCH_AVX2:
+      return "AVX2";
+      break;
+    case PLL_ATTRIB_ARCH_AVX512:
+      return "AVX512";
+      break;
+    default:
+      return "UNKNOWN";
+  }
+}
+
+std::ostream& operator<<(std::ostream& stream, const Options& opts)
+{
+  stream << "RAxML-NG was called as follows:" << endl << endl << opts.cmdline << endl << endl;
+
+  stream << "Analysis options:" << endl;
+
+  stream << "  run mode: ";
+  switch(opts.command)
+  {
+    case Command::search:
+      stream << "ML tree search";
+      break;
+    case Command::evaluate:
+      stream << "Evaluate tree likelihood";
+      break;
+    case Command::bootstrap:
+      stream << "Bootstrapping";
+      break;
+    case Command::all:
+      stream << "ML tree search + bootstrapping";
+      break;
+    default:
+      break;
+  }
+  stream << endl;
+
+  stream << "  start tree(s): ";
+  switch(opts.start_tree)
+  {
+    case StartingTree::random:
+      stream << "random";
+      break;
+    case StartingTree::parsimony:
+      stream << "parsimony";
+      break;
+    case StartingTree::user:
+      stream << "user";
+      break;
+  }
+  if (opts.num_searches > 1)
+    stream << " (" << opts.num_searches << ")";
+  stream << endl;
+
+  stream << "  random seed: " << opts.random_seed << endl;
+  stream << "  tip-inner: " << (opts.use_tip_inner ? "ON" : "OFF") << endl;
+  stream << "  pattern compression: " << (opts.use_pattern_compression ? "ON" : "OFF") << endl;
+
+  if (opts.command == Command::search)
+  {
+    if (opts.spr_radius > 0)
+      stream << "  fast spr radius: " << opts.spr_radius << endl;
+    else
+      stream << "  fast spr radius: AUTO" << endl;
+
+    if (opts.spr_cutoff > 0.)
+      stream << "  spr subtree cutoff: " << opts.spr_cutoff << endl;
+    else
+      stream << "  spr subtree cutoff: OFF" << endl;
+  }
+
+  stream << "  branch lengths: ";
+  stream << (opts.optimize_brlen ? "ML estimate" : "user-specified") << " (";
+  if (opts.brlen_linkage == PLLMOD_TREE_BRLEN_SCALED)
+    stream << "proportional";
+  else if (opts.brlen_linkage == PLLMOD_TREE_BRLEN_UNLINKED)
+    stream << "unlinked";
+  else
+    stream << "linked";
+
+  stream << ")" << endl;
+
+
+  stream << "  SIMD kernels: " << get_simd_arch_name(opts.simd_arch) << endl;
+
+  stream << "  parallelization: ";
+  if (opts.num_ranks > 1 && opts.num_threads > 1)
+  {
+    stream << "hybrid MPI+PTHREADS (" << opts.num_ranks <<  " ranks x " <<
+        opts.num_threads <<  " threads)" << endl;
+  }
+  else if (opts.num_ranks > 1)
+    stream <<  "MPI (" << opts.num_ranks << " ranks)" << endl;
+  else if (opts.num_threads > 1)
+    stream << "PTHREADS (" << opts.num_threads << " threads)" << endl;
+  else
+    stream << "NONE/sequential" << endl;
+
+  stream << endl;
+
+  return stream;
+}
+
+
+
+
diff --git a/src/Options.hpp b/src/Options.hpp
new file mode 100644
index 0000000..8a37cc7
--- /dev/null
+++ b/src/Options.hpp
@@ -0,0 +1,94 @@
+#ifndef RAXML_OPTIONS_HPP_
+#define RAXML_OPTIONS_HPP_
+
+#include "common.h"
+#include "PartitionedMSA.hpp"
+
+struct OutputFileNames
+{
+  std::string log;
+  std::string checkpoint;       /* checkpoint file */
+  std::string start_tree;
+  std::string best_tree;
+  std::string ml_trees;
+  std::string bootstrap_trees;
+  std::string support_tree;
+};
+
+class Options
+{
+public:
+  Options() : cmdline(""), command(Command::none), use_tip_inner(true),
+  use_pattern_compression(true), use_prob_msa(false), use_rate_scalers(false),
+  optimize_model(true), optimize_brlen(true), redo_mode(false), log_level(LogLevel::progress),
+  msa_format(FileFormat::autodetect), data_type(DataType::autodetect),
+  random_seed(0), start_tree(StartingTree::random), lh_epsilon(DEF_LH_EPSILON), spr_radius(-1),
+  spr_cutoff(1.0), brlen_linkage(PLLMOD_TREE_BRLEN_SCALED), simd_arch(PLL_ATTRIB_ARCH_CPU),
+  num_searches(1), num_bootstraps(100),
+  tree_file(""), msa_file(""), model_file(""), outfile_prefix(""),
+  num_threads(1), num_ranks(1)
+  {};
+
+  ~Options() = default;
+
+  std::string cmdline;
+
+  Command command;
+
+  bool use_tip_inner;
+  bool use_pattern_compression;
+  bool use_prob_msa;
+  bool use_rate_scalers;
+
+  bool optimize_model;
+  bool optimize_brlen;
+
+  bool redo_mode;
+
+  LogLevel log_level;
+  FileFormat msa_format;
+  DataType data_type;
+  long random_seed;
+  StartingTree start_tree;
+  double lh_epsilon;
+  int spr_radius;
+  double spr_cutoff;
+  int brlen_linkage;
+  unsigned int simd_arch;
+
+  unsigned int num_searches;
+  unsigned int num_bootstraps;
+
+  /* I/O */
+  std::string tree_file;
+  std::string msa_file;
+  std::string model_file;     /* could be also model string */
+  std::string outfile_prefix;
+  OutputFileNames outfile_names;
+
+  /* parallelization stuff */
+  unsigned int num_threads;     /* number of threads */
+  unsigned int num_ranks;       /* number of MPI ranks */
+
+  std::string output_fname(const std::string& suffix) const;
+
+  const std::string& log_file() const { return outfile_names.log; }
+  const std::string& checkp_file() const { return outfile_names.checkpoint; }
+  const std::string& start_tree_file() const { return outfile_names.start_tree; }
+  const std::string& best_tree_file() const { return outfile_names.best_tree; }
+  const std::string& ml_trees_file() const { return outfile_names.ml_trees; }
+  const std::string& bootstrap_trees_file() const { return outfile_names.bootstrap_trees; }
+  const std::string& support_tree_file() const { return outfile_names.support_tree; }
+
+  void set_default_outfiles();
+
+  bool result_files_exist();
+
+private:
+  void set_default_outfile(std::string& fname, const std::string& suffix);
+};
+
+std::ostream& operator<<(std::ostream& stream, const Options& opts);
+
+
+#endif /* RAXML_OPTIONS_HPP_ */
diff --git a/src/ParallelContext.cpp b/src/ParallelContext.cpp
new file mode 100644
index 0000000..1faf631
--- /dev/null
+++ b/src/ParallelContext.cpp
@@ -0,0 +1,298 @@
+#include "ParallelContext.hpp"
+
+#include "Options.hpp"
+
+using namespace std;
+
+// TODO: estimate based on #threads and #partitions?
+#define PARALLEL_BUF_SIZE (128 * 1024)
+
+size_t ParallelContext::_num_threads = 1;
+size_t ParallelContext::_num_ranks = 1;
+size_t ParallelContext::_rank_id = 0;
+thread_local size_t ParallelContext::_thread_id = 0;
+std::vector<ThreadType> ParallelContext::_threads;
+std::vector<char> ParallelContext::_parallel_buf;
+std::unordered_map<ThreadIDType, ParallelContext> ParallelContext::_thread_ctx_map;
+MutexType ParallelContext::mtx;
+
+void ParallelContext::init_mpi(int argc, char * argv[])
+{
+#ifdef _RAXML_MPI
+  {
+    int tmp;
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &tmp);
+    _rank_id = (size_t) tmp;
+    MPI_Comm_size(MPI_COMM_WORLD, &tmp);
+    _num_ranks = (size_t) tmp;
+//    printf("size: %lu, rank: %lu\n", _num_ranks, _rank_id);
+  }
+#else
+  UNUSED(argc);
+  UNUSED(argv);
+#endif
+}
+
+void ParallelContext::start_thread(size_t thread_id, const std::function<void()>& thread_main)
+{
+  ParallelContext::_thread_id = thread_id;
+  thread_main();
+}
+
+void ParallelContext::init_pthreads(const Options& opts, const std::function<void()>& thread_main)
+{
+  _num_threads = opts.num_threads;
+  _parallel_buf.reserve(PARALLEL_BUF_SIZE);
+
+#ifdef _RAXML_PTHREADS
+  /* Launch threads */
+  for (size_t i = 1; i < _num_threads; ++i)
+  {
+    _threads.emplace_back(ParallelContext::start_thread, i, thread_main);
+  }
+#endif
+}
+
+void ParallelContext::finalize(bool force)
+{
+#ifdef _RAXML_PTHREADS
+  for (thread& t: _threads)
+  {
+    if (force)
+      t.detach();
+    else
+      t.join();
+  }
+  _threads.clear();
+#endif
+
+#ifdef _RAXML_MPI
+  if (force)
+    MPI_Abort(MPI_COMM_WORLD, -1);
+  else
+    MPI_Barrier(MPI_COMM_WORLD);
+
+  MPI_Finalize();
+#endif
+}
+
+void ParallelContext::barrier()
+{
+#ifdef _RAXML_MPI
+  mpi_barrier();
+#endif
+
+#ifdef _RAXML_PTHREADS
+  thread_barrier();
+#endif
+}
+
+void ParallelContext::mpi_barrier()
+{
+#ifdef _RAXML_MPI
+  if (_thread_id == 0 && _num_ranks > 1)
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+}
+
+void ParallelContext::thread_barrier()
+{
+  static volatile unsigned int barrier_counter = 0;
+  static thread_local volatile int myCycle = 0;
+  static volatile int proceed = 0;
+
+  __sync_fetch_and_add( &barrier_counter, 1);
+
+  if(_thread_id == 0)
+  {
+    while(barrier_counter != ParallelContext::_num_threads);
+    barrier_counter = 0;
+    proceed = !proceed;
+  }
+  else
+  {
+    while(myCycle == proceed);
+    myCycle = !myCycle;
+  }
+}
+
+void ParallelContext::thread_reduce(double * data, size_t size, int op)
+{
+  /* synchronize */
+  thread_barrier();
+
+  double *double_buf = (double*) _parallel_buf.data();
+
+  /* collect data from threads */
+  size_t i, j;
+  for (i = 0; i < size; ++i)
+    double_buf[_thread_id * size + i] = data[i];
+
+  /* synchronize */
+  thread_barrier();
+
+  /* reduce */
+  for (i = 0; i < size; ++i)
+  {
+    switch(op)
+    {
+      case PLLMOD_TREE_REDUCE_SUM:
+      {
+        data[i] = 0.;
+        for (j = 0; j < ParallelContext::_num_threads; ++j)
+          data[i] += double_buf[j * size + i];
+      }
+      break;
+      case PLLMOD_TREE_REDUCE_MAX:
+      {
+        data[i] = _parallel_buf[i];
+        for (j = 1; j < ParallelContext::_num_threads; ++j)
+          data[i] = max(data[i], double_buf[j * size + i]);
+      }
+      break;
+      case PLLMOD_TREE_REDUCE_MIN:
+      {
+        data[i] = _parallel_buf[i];
+        for (j = 1; j < ParallelContext::_num_threads; ++j)
+          data[i] = min(data[i], double_buf[j * size + i]);
+      }
+      break;
+    }
+  }
+
+  //needed?
+//  parallel_barrier(useropt);
+}
+
+
+void ParallelContext::parallel_reduce(double * data, size_t size, int op)
+{
+#ifdef _RAXML_PTHREADS
+  if (_num_threads > 1)
+    thread_reduce(data, size, op);
+#endif
+
+#ifdef _RAXML_MPI
+  if (_num_ranks > 1)
+  {
+    thread_barrier();
+
+    if (_thread_id == 0)
+    {
+      MPI_Op reduce_op;
+      if (op == PLLMOD_TREE_REDUCE_SUM)
+        reduce_op = MPI_SUM;
+      else if (op == PLLMOD_TREE_REDUCE_MAX)
+        reduce_op = MPI_MAX;
+      else if (op == PLLMOD_TREE_REDUCE_MIN)
+        reduce_op = MPI_MIN;
+      else
+        assert(0);
+
+#if 1
+      MPI_Allreduce(MPI_IN_PLACE, data, size, MPI_DOUBLE, reduce_op, MPI_COMM_WORLD);
+#else
+      // not sure if MPI_IN_PLACE will work in all cases...
+      MPI_Allreduce(data, _parallel_buf.data(), size, MPI_DOUBLE, reduce_op, MPI_COMM_WORLD);
+      memcpy(data, _parallel_buf.data(), size * sizeof(double));
+#endif
+    }
+
+    if (_num_threads > 1)
+      thread_broadcast(0, data, size * sizeof(double));
+  }
+#endif
+}
+
+void ParallelContext::parallel_reduce_cb(void * context, double * data, size_t size, int op)
+{
+  ParallelContext::parallel_reduce(data, size, op);
+  UNUSED(context);
+}
+
+void ParallelContext::thread_broadcast(size_t source_id, void * data, size_t size)
+{
+  /* write to buf */
+  if (_thread_id == source_id)
+  {
+    memcpy((void *) _parallel_buf.data(), data, size);
+  }
+
+  /* synchronize */
+  thread_barrier();
+
+//  pthread_barrier_wait(&barrier);
+  __sync_synchronize();
+
+  /* read from buf*/
+  if (_thread_id != source_id)
+  {
+    memcpy(data, (void *) _parallel_buf.data(), size);
+  }
+
+  thread_barrier();
+}
+
+void ParallelContext::thread_send_master(size_t source_id, void * data, size_t size) const
+{
+  /* write to buf */
+  if (_thread_id == source_id && data && size)
+  {
+    memcpy((void *) _parallel_buf.data(), data, size);
+  }
+
+  /* synchronize */
+  barrier();
+
+//  pthread_barrier_wait(&barrier);
+  __sync_synchronize();
+
+  /* read from buf*/
+  if (_thread_id == 0)
+  {
+    memcpy(data, (void *) _parallel_buf.data(), size);
+  }
+
+  barrier();
+}
+
+void ParallelContext::mpi_gather_custom(std::function<int(void*,int)> prepare_send_cb,
+                                        std::function<void(void*,int)> process_recv_cb)
+{
+#ifdef _RAXML_MPI
+  /* we're gonna use _parallel_buf, so make sure other threads don't interfere... */
+  UniqueLock lock;
+
+  if (_rank_id == 0)
+  {
+    for (size_t r = 1; r < _num_ranks; ++r)
+    {
+      int recv_size;
+      MPI_Status status;
+      MPI_Probe(r, 0, MPI_COMM_WORLD, &status);
+      MPI_Get_count(&status, MPI_BYTE, &recv_size);
+
+//      printf("recv: %lu\n", recv_size);
+
+      _parallel_buf.reserve(recv_size);
+
+      MPI_Recv((void*) _parallel_buf.data(), recv_size, MPI_BYTE,
+               r, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+      process_recv_cb(_parallel_buf.data(), recv_size);
+    }
+  }
+  else
+  {
+    auto send_size = prepare_send_cb(_parallel_buf.data(), _parallel_buf.capacity());
+//    printf("sent: %lu\n", send_size);
+
+    MPI_Send(_parallel_buf.data(), send_size, MPI_BYTE, 0, 0, MPI_COMM_WORLD);
+  }
+#else
+  UNUSED(prepare_send_cb);
+  UNUSED(process_recv_cb);
+#endif
+}
+
diff --git a/src/ParallelContext.hpp b/src/ParallelContext.hpp
new file mode 100644
index 0000000..c8c905c
--- /dev/null
+++ b/src/ParallelContext.hpp
@@ -0,0 +1,87 @@
+#ifndef RAXML_PARALLELCONTEXT_HPP_
+#define RAXML_PARALLELCONTEXT_HPP_
+
+#include <vector>
+#include <unordered_map>
+#include <memory>
+
+#ifdef _RAXML_MPI
+#include <mpi.h>
+#endif
+
+#ifdef _RAXML_PTHREADS
+#include <thread>
+#include <mutex>
+typedef std::thread ThreadType;
+typedef std::thread::id ThreadIDType;
+typedef std::mutex MutexType;
+typedef std::unique_lock<std::mutex> LockType;
+#else
+typedef int ThreadType;
+typedef size_t ThreadIDType;
+typedef int MutexType;
+typedef int LockType;
+#endif
+
+class Options;
+
+class ParallelContext
+{
+public:
+  static void init_mpi(int argc, char * argv[]);
+  static void init_pthreads(const Options& opts, const std::function<void()>& thread_main);
+
+  static void finalize(bool force = false);
+
+  static size_t num_procs() { return _num_ranks * _num_threads; }
+  static size_t num_threads() { return _num_threads; }
+  static size_t num_ranks() { return _num_ranks; }
+
+  static void parallel_reduce_cb(void * context, double * data, size_t size, int op);
+  static void thread_reduce(double * data, size_t size, int op);
+  static void thread_broadcast(size_t source_id, void * data, size_t size);
+  void thread_send_master(size_t source_id, void * data, size_t size) const;
+
+  static void mpi_gather_custom(std::function<int(void*,int)> prepare_send_cb,
+                                std::function<void(void*,int)> process_recv_cb);
+
+  static bool master() { return proc_id() == 0; }
+  static bool master_rank() { return _rank_id == 0; }
+  static bool master_thread() { return _thread_id == 0; }
+  static size_t thread_id() { return _thread_id; }
+  static size_t proc_id() { return _rank_id * _num_threads + _thread_id; }
+
+  static void barrier();
+  static void thread_barrier();
+  static void mpi_barrier();
+
+  /* static singleton, no instantiation/copying/moving */
+  ParallelContext() = delete;
+  ParallelContext(const ParallelContext& other) = delete;
+  ParallelContext(ParallelContext&& other) = delete;
+  ParallelContext& operator=(const ParallelContext& other) = delete;
+  ParallelContext& operator=(ParallelContext&& other) = delete;
+
+  class UniqueLock
+  {
+  public:
+    UniqueLock() : _lock(mtx) {}
+  private:
+    LockType _lock;
+  };
+private:
+  static std::vector<ThreadType> _threads;
+  static size_t _num_threads;
+  static size_t _num_ranks;
+  static std::vector<char> _parallel_buf;
+  static std::unordered_map<ThreadIDType, ParallelContext> _thread_ctx_map;
+  static MutexType mtx;
+
+  static size_t _rank_id;
+  static thread_local size_t _thread_id;
+
+  static void start_thread(size_t thread_id, const std::function<void()>& thread_main);
+  static void parallel_reduce(double * data, size_t size, int op);
+};
+
+#endif /* RAXML_PARALLELCONTEXT_HPP_ */
diff --git a/src/PartitionAssignment.cpp b/src/PartitionAssignment.cpp
new file mode 100644
index 0000000..df68fe5
--- /dev/null
+++ b/src/PartitionAssignment.cpp
@@ -0,0 +1,32 @@
+#include "PartitionAssignment.hpp"
+
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignment& pa)
+{
+  stream << "part#\tstart\tlength" << std::endl;
+  for (auto const& range: pa)
+    stream << range.part_id << "\t" << range.start << "\t" << range.length << std::endl;
+  return stream;
+}
+
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignmentList& pal)
+{
+  stream << "thread#\tpart#\tstart\tlength" << std::endl;
+  size_t i = 0;
+  for (auto const& pa: pal)
+  {
+    for (auto const& range: pa)
+      stream << i << "\t" << range.part_id << "\t" <<
+                range.start << "\t" << range.length << std::endl;
+    stream << std::endl;
+    ++i;
+  }
+  return stream;
+}
+
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignmentStats& stats)
+{
+  stream << "partitions/thread: " << stats.min_thread_parts << "-" << stats.max_thread_parts <<
+      ", patterns/thread: " << stats.min_thread_sites << "-" << stats.max_thread_sites;
+  return stream;
+}
+
diff --git a/src/PartitionAssignment.hpp b/src/PartitionAssignment.hpp
new file mode 100644
index 0000000..26a2bb8
--- /dev/null
+++ b/src/PartitionAssignment.hpp
@@ -0,0 +1,80 @@
+#ifndef RAXML_PARTITIONASSIGNMENT_HPP_
+#define RAXML_PARTITIONASSIGNMENT_HPP_
+
+#include <vector>
+#include <ostream>
+#include <algorithm>
+#include <limits>
+
+struct PartitionRange
+{
+  PartitionRange() : part_id(0), start(0), length(0) {}
+  PartitionRange(size_t part_id, size_t start, size_t length):
+    part_id(part_id), start(start), length(length) {};
+
+  bool master() const { return start == 0; };
+
+  size_t part_id;
+  size_t start;
+  size_t length;
+};
+
+struct PartitionAssignment
+{
+  typedef std::vector<PartitionRange>         container;
+  typedef typename container::iterator        iterator;
+  typedef typename container::const_iterator  const_iterator;
+
+  PartitionAssignment() : _weight(0.0) {}
+
+  size_t num_parts() const { return _part_range_list.size(); }
+  size_t weight() const { return (size_t) _weight; }
+  const PartitionRange& operator[](size_t i) const { return _part_range_list.at(i); }
+  const_iterator find(size_t part_id) const
+  {
+    return std::find_if( _part_range_list.cbegin(),  _part_range_list.cend(),
+                         [part_id](const PartitionRange& r) { return (r.part_id == part_id);} );
+  };
+
+  void assign_sites(size_t partition_id, size_t offset, size_t length)
+  {
+    _part_range_list.emplace_back(partition_id, offset, length);
+    _weight += length;
+  }
+
+  const_iterator begin() const { return _part_range_list.cbegin(); };
+  const_iterator end() const { return _part_range_list.cend(); };
+
+private:
+  container _part_range_list;
+  double _weight;
+};
+
+typedef std::vector<PartitionAssignment> PartitionAssignmentList;
+
+struct PartitionAssignmentStats
+{
+  PartitionAssignmentStats(const PartitionAssignmentList& part_assign)
+  {
+    max_thread_sites = max_thread_parts = 0;
+    min_thread_sites = min_thread_parts = std::numeric_limits<size_t>::max();
+    for (auto pa: part_assign)
+    {
+      min_thread_sites = std::min(min_thread_sites, pa.weight());
+      min_thread_parts = std::min(min_thread_parts, pa.num_parts());
+      max_thread_sites = std::max(max_thread_sites, pa.weight());
+      max_thread_parts = std::max(max_thread_parts, pa.num_parts());
+    }
+  }
+
+  size_t min_thread_sites;
+  size_t min_thread_parts;
+  size_t max_thread_sites;
+  size_t max_thread_parts;
+};
+
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignment& pa);
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignmentList& pal);
+std::ostream& operator<<(std::ostream& stream, const PartitionAssignmentStats& stats);
+
+#endif /* RAXML_PARTITIONASSIGNMENT_HPP_ */
diff --git a/src/PartitionInfo.cpp b/src/PartitionInfo.cpp
new file mode 100644
index 0000000..b4066f8
--- /dev/null
+++ b/src/PartitionInfo.cpp
@@ -0,0 +1,128 @@
+#include "PartitionInfo.hpp"
+
+#include "Options.hpp"
+
+using namespace std;
+
+PartitionInfo::~PartitionInfo ()
+{
+  if (_stats)
+    pllmod_msa_destroy_stats(_stats);
+}
+
+size_t PartitionInfo::mark_partition_sites(unsigned int part_num, std::vector<unsigned int>& site_part)
+{
+  size_t start, end, stride;
+  size_t i;
+  size_t sites_assigned = 0;
+  const char * range = _range_string.c_str();
+
+  do
+  {
+    while(*range == ',')
+      range++;
+
+    int read = 0;
+    /* try to parse strided format first */
+    read = sscanf(range, "%lu-%lu\\%lu", &start, &end, &stride);
+    if (read != 3)
+    {
+      /* try to parse contiguous range format first */
+      stride = 1;
+      read = sscanf(range, "%lu-%lu", &start, &end);
+      if (read != 2)
+      {
+        /* finally, check if we have a single column */
+        read = sscanf(range, "%lu", &start);
+        end = start;
+      }
+    }
+    if (read && start >= 1 && end <= site_part.size() && start <= end)
+    {
+      /* remember indices are 1-based in the partition file! */
+      for (i = start-1; i <= end-1; ++i)
+      {
+        if ((i - start + 1) % stride == 0)
+        {
+          if (!site_part[i])
+          {
+            site_part[i] = part_num;
+            sites_assigned++;
+          }
+          else
+            throw MultiplePartitionForSiteException(*this, i+1);
+        }
+      }
+    }
+    else
+      throw InvalidPartitionRangeException(*this);
+  }
+  while ((range = strchr(range, ',')) != NULL);
+
+  return sites_assigned;
+}
+
+void PartitionInfo::compress_patterns()
+{
+  _msa.compress_patterns(model().charmap());
+}
+
+pllmod_msa_stats_t * PartitionInfo::compute_stats(unsigned long stats_mask) const
+{
+  const unsigned int * weights = _msa.weights().empty() ? nullptr : _msa.weights().data();
+  pllmod_msa_stats_t * stats = pllmod_msa_compute_stats(_msa.pll_msa(), _model.num_states(),
+                                                        _model.charmap(), weights, stats_mask);
+
+  if (!stats)
+    throw runtime_error(pll_errmsg);
+
+  if ((stats_mask & PLLMOD_MSA_STATS_FREQS) &&_msa.probabilistic() &&
+      _model.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES) == ParamValue::empirical)
+  {
+    assert(stats->states == _msa.states());
+    assert(stats->freqs);
+
+    auto freqs = _msa.state_freqs();
+    memcpy(stats->freqs, freqs.data(), _msa.states() * sizeof(double));
+  }
+
+  return stats;
+}
+
+const pllmod_msa_stats_t * PartitionInfo::stats() const
+{
+  if (!_stats)
+  {
+    unsigned long stats_mask = PLLMOD_MSA_STATS_GAP_PROP;
+    stats_mask |= PLLMOD_MSA_STATS_FREQS;
+    stats_mask |= PLLMOD_MSA_STATS_INV_PROP;
+
+    if (_model.param_mode(PLLMOD_OPT_PARAM_SUBST_RATES) == ParamValue::empirical)
+      stats_mask |= PLLMOD_MSA_STATS_SUBST_RATES;
+
+    _stats = compute_stats(stats_mask);
+  }
+
+  return _stats;
+};
+
+
+void PartitionInfo::set_modeL_empirical_params()
+{
+  stats();
+
+  // check for zero state frequencies
+  if (_model.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES) == ParamValue::empirical)
+  {
+    for (unsigned int i = 0; i < _stats->states; ++i)
+    {
+      if (!(_stats->freqs[i] > 0.))
+        throw runtime_error("Frequency of state " + to_string(i) +
+                            " in partition " + _name + " is 0!\n"
+                            "Please either change your partitioning scheme or "
+                            "use model state frequencies for this partition!");
+    }
+  }
+
+  assign(_model, stats());
+}
diff --git a/src/PartitionInfo.hpp b/src/PartitionInfo.hpp
new file mode 100644
index 0000000..7c8737b
--- /dev/null
+++ b/src/PartitionInfo.hpp
@@ -0,0 +1,119 @@
+#ifndef RAXML_PARTITIONINFO_HPP_
+#define RAXML_PARTITIONINFO_HPP_
+
+#include "common.h"
+#include "Model.hpp"
+#include "MSA.hpp"
+
+class Options;
+
+class PartitionInfo
+{
+public:
+  PartitionInfo () :
+    _name(""), _range_string(""), _model(), _msa(), _stats(nullptr) {};
+
+  PartitionInfo (const std::string &name, DataType data_type,
+                 const std::string &model_string, const std::string &range_string = "") :
+    _name(name), _range_string(range_string), _model(data_type, model_string), _msa(),
+    _stats(nullptr) {};
+
+  virtual ~PartitionInfo ();
+
+  PartitionInfo (PartitionInfo&& other)
+  {
+    _name = std::move(other._name);
+    _range_string = std::move(other._range_string);
+    _model = std::move(other._model);
+    _msa = std::move(_msa);
+    _stats = other._stats;
+    other._stats = nullptr;
+  }
+
+  // getters
+  const std::string& name() const { return _name; };
+  const Model& model() const { return _model; };
+  const std::string& range_string() const { return _range_string; };
+  const MSA& msa() const { return _msa; };
+  MSA& msa() { return _msa; };
+  const pllmod_msa_stats_t * stats() const;
+  pllmod_msa_stats_t * compute_stats(unsigned long stats_mask) const;
+
+  // setters
+  void msa(MSA&& msa) { _msa = std::move(msa); };
+  void model(Model&& model) { _model = std::move(model); };
+  void model(const Model& model) { _model = model; };
+  void name(const std::string& value) { _name = value; };
+  void range_string(const std::string& value) { _range_string = value; };
+
+  // operations
+  size_t mark_partition_sites(unsigned int part_num, std::vector<unsigned int>& site_part);
+  void compress_patterns();
+  void set_modeL_empirical_params();
+
+private:
+  std::string _name;
+  std::string _range_string;
+  Model _model;
+  MSA _msa;
+  mutable pllmod_msa_stats_t * _stats;
+};
+
+
+class InvalidPartitionRangeException : public RaxmlException
+{
+public:
+  InvalidPartitionRangeException(const PartitionInfo& pinfo) :
+    RaxmlException("")
+  {
+    _message = format_message("Invalid range in partition %s: %s",
+                              pinfo.name().c_str(), pinfo.range_string().c_str());
+  }
+};
+
+class MultiplePartitionForSiteException : public RaxmlException
+{
+public:
+  MultiplePartitionForSiteException(const PartitionInfo& pinfo1, size_t site) :
+    RaxmlException(""), _site(site), part1_name(pinfo1.name())
+  { }
+
+  size_t site() const { return _site; };
+
+  void pinfo2(const PartitionInfo& pinfo2) { part2_name = pinfo2.name(); }
+
+  virtual const std::string message() const
+  {
+    return format_message("Alignment site %u assigned to multiple partitions: \"%s\" and \"%s\"!",
+                          _site, part1_name.c_str(), part2_name.c_str());
+  };
+
+private:
+  size_t _site;
+  std::string part1_name;
+  std::string part2_name;
+};
+
+class MissingPartitionForSiteException : public RaxmlException
+{
+public:
+  MissingPartitionForSiteException() : RaxmlException("")
+  { }
+
+  size_t count() const { return _unassigned_sites.size(); }
+  const std::vector<size_t>& sites() const { return _unassigned_sites; }
+
+  void add_unassigned_site(size_t site) { _unassigned_sites.push_back(site); }
+
+  virtual const std::string message() const
+  {
+    return format_message("Found %u site(s) which are not assigned to any partition.\n"
+        "Please fix your data!", _unassigned_sites.size());
+  };
+
+private:
+  std::vector<size_t> _unassigned_sites;
+};
+
+
+#endif /* RAXML_PARTITIONINFO_HPP_ */
diff --git a/src/PartitionedMSA.cpp b/src/PartitionedMSA.cpp
new file mode 100644
index 0000000..1bb857e
--- /dev/null
+++ b/src/PartitionedMSA.cpp
@@ -0,0 +1,128 @@
+#include "PartitionedMSA.hpp"
+
+using namespace std;
+
+PartitionedMSA::~PartitionedMSA ()
+{
+  // TODO Auto-generated destructor stub
+}
+
+PartitionedMSA& PartitionedMSA::operator=(PartitionedMSA&& other)
+{
+  _part_list = std::move(other._part_list);
+  _full_msa = std::move(other._full_msa);
+   return *this;
+}
+
+std::vector<unsigned int> PartitionedMSA::get_site_part_assignment()
+{
+  const size_t full_len = _full_msa.num_sites();
+
+  std::vector<unsigned int> spa(full_len);
+
+  size_t p = 0;
+  for (auto& pinfo: _part_list)
+  {
+    try
+    {
+      pinfo.mark_partition_sites(p+1, spa);
+    }
+    catch (MultiplePartitionForSiteException& e)
+    {
+      e.pinfo2(_part_list.at(spa[e.site()-1]-1));
+      throw e;
+    }
+    p++;
+  }
+
+  /* check if all sites were assigned to partitions */
+  MissingPartitionForSiteException e_unassinged;
+  for (size_t i = 0; i < full_len; ++i)
+  {
+    if (!spa[i])
+    {
+      LOG_INFO << "ERROR: Alignment site " << i+1 << " is not assigned to any partition!" << endl;
+      e_unassinged.add_unassigned_site(i+1);
+    }
+  }
+
+  if (e_unassinged.count() > 0)
+    throw e_unassinged;
+
+  return spa;
+}
+
+void PartitionedMSA::split_msa()
+{
+  if (part_count() > 1)
+  {
+    auto site_part = get_site_part_assignment();
+
+    /* split MSA into partitions */
+    pll_msa_t ** part_msa_list =
+        pllmod_msa_split(_full_msa.pll_msa(), site_part.data(), part_count());
+
+    for (size_t p = 0; p < part_count(); ++p)
+    {
+      part_msa(p, part_msa_list[p]);
+      pll_msa_destroy(part_msa_list[p]);
+    }
+    free(part_msa_list);
+  }
+  else
+    part_msa(0, std::move(_full_msa));
+}
+
+void PartitionedMSA::compress_patterns()
+{
+  for (PartitionInfo& pinfo: _part_list)
+  {
+    pinfo.compress_patterns();
+  }
+}
+
+size_t PartitionedMSA::total_length() const
+{
+  size_t sum = 0;
+
+  for (const auto& pinfo: _part_list)
+  {
+    sum += pinfo.msa().length();
+  }
+
+  return sum;
+}
+
+void PartitionedMSA::set_model_empirical_params()
+{
+  for (PartitionInfo& pinfo: _part_list)
+  {
+    pinfo.set_modeL_empirical_params();
+  }
+}
+
+std::ostream& operator<<(std::ostream& stream, const PartitionedMSA& part_msa)
+{
+  for (size_t p = 0; p < part_msa.part_count(); ++p)
+  {
+    const PartitionInfo& pinfo = part_msa.part_info(p);
+    stream << "Partition " << p << ": " << pinfo.name() << endl;
+    stream << "Model: " << pinfo.model().to_string() << endl;
+    if (pinfo.msa().num_patterns())
+    {
+      stream << "Alignment sites / patterns: " << pinfo.msa().num_sites() <<
+          " / " << pinfo.msa().num_patterns() << endl;
+    }
+    else
+      stream << "Alignment sites: " << pinfo.msa().num_sites() << endl;
+
+//    stream << fixed;
+    stream << "Gaps: " << setprecision(2) << (pinfo.stats()->gap_prop * 100) << " %" << endl;
+    stream << "Invariant sites: " << setprecision(2) << (pinfo.stats()->inv_prop * 100) << " %" << endl;
+    stream << endl;
+  }
+
+  return stream;
+}
+
+
diff --git a/src/PartitionedMSA.hpp b/src/PartitionedMSA.hpp
new file mode 100644
index 0000000..4d17675
--- /dev/null
+++ b/src/PartitionedMSA.hpp
@@ -0,0 +1,58 @@
+#ifndef RAXML_PARTITIONEDMSA_HPP_
+#define RAXML_PARTITIONEDMSA_HPP_
+
+#include "PartitionInfo.hpp"
+
+class PartitionedMSA
+{
+public:
+  PartitionedMSA () {};
+  virtual
+  ~PartitionedMSA ();
+
+  // copy/move constructors and assignments
+  PartitionedMSA (PartitionedMSA&& other) = default;
+  PartitionedMSA& operator=(PartitionedMSA&& other);
+
+  // getters
+  const MSA& full_msa() const { return (part_count() == 1) ? _part_list.at(0).msa() : _full_msa; };
+  size_t part_count() const { return _part_list.size(); };
+  const PartitionInfo& part_info(size_t index) const { return _part_list.at(index); };
+  const Model& model(size_t index) const { return _part_list.at(index).model(); };
+  const std::vector<PartitionInfo>& part_list() const { return _part_list; };
+  std::vector<PartitionInfo>& part_list() { return _part_list; };
+  size_t total_length() const;
+
+  // setters
+  void full_msa(MSA&& msa) { _full_msa = std::move(msa); };
+  void part_msa(size_t index, MSA&& msa) { _part_list.at(index).msa(std::move(msa)); };
+  void part_msa(size_t index, const pll_msa_t * pll_msa)
+  {
+    _part_list.at(index).msa(MSA(pll_msa));
+  };
+  void model(size_t index, Model&& m) { return _part_list.at(index).model(std::move(m)); };
+  void model(size_t index, const Model& m) { return _part_list.at(index).model(m); };
+
+  // operations
+  void append_part_info(PartitionInfo&& part_info) { _part_list.push_back(std::move(part_info)); };
+
+  template <class... Args>
+  void emplace_part_info (Args&&... args)
+  {
+    _part_list.emplace_back(std::forward<Args>(args)...);
+  }
+
+  void split_msa();
+  void compress_patterns();
+  void set_model_empirical_params();
+
+private:
+  std::vector<PartitionInfo> _part_list;
+  MSA _full_msa;
+
+  std::vector<unsigned int> get_site_part_assignment();
+};
+
+std::ostream& operator<<(std::ostream& stream, const PartitionedMSA& part_msa);
+
+#endif /* RAXML_PARTITIONEDMSA_HPP_ */
diff --git a/src/Tree.cpp b/src/Tree.cpp
new file mode 100644
index 0000000..111c401
--- /dev/null
+++ b/src/Tree.cpp
@@ -0,0 +1,265 @@
+#include <algorithm>
+
+#include "Tree.hpp"
+
+using namespace std;
+
+Tree::Tree (const Tree& other) : BasicTree(other._num_tips),
+    _pll_utree_start(other.pll_utree_copy())
+{
+}
+
+Tree::Tree (Tree&& other) : BasicTree(other._num_tips), _pll_utree_start(other._pll_utree_start)
+{
+  other._num_tips = 0;
+  other._pll_utree_start = nullptr;
+  swap(_pll_utree_tips, other._pll_utree_tips);
+}
+
+Tree& Tree::operator=(const Tree& other)
+{
+  if (this != &other)
+  {
+    if (_pll_utree_start)
+      pll_utree_destroy(_pll_utree_start, nullptr);
+
+    _pll_utree_start = other.pll_utree_copy();
+    _num_tips = other._num_tips;
+  }
+
+  return *this;
+}
+
+Tree& Tree::operator=(Tree&& other)
+{
+  if (this != &other)
+  {
+    _num_tips = 0;
+    _pll_utree_tips.clear();
+    if (_pll_utree_start)
+    {
+      pll_utree_destroy(_pll_utree_start, nullptr);
+     _pll_utree_start = nullptr;
+    }
+
+    swap(_num_tips, other._num_tips);
+    swap(_pll_utree_start, other._pll_utree_start);
+    swap(_pll_utree_tips, other._pll_utree_tips);
+  }
+
+  return *this;
+}
+
+Tree::~Tree ()
+{
+  if (_pll_utree_start)
+    pll_utree_destroy(_pll_utree_start, nullptr);
+}
+
+Tree Tree::buildRandom(size_t num_tips, const char * const* tip_labels)
+{
+  Tree tree;
+
+  tree._num_tips = num_tips;
+  tree._pll_utree_start = pllmod_utree_create_random(num_tips, tip_labels);
+
+  return tree;
+}
+
+Tree Tree::buildRandom(const MSA& msa)
+{
+  return Tree::buildRandom(msa.size(), (const char * const*) msa.pll_msa()->label);
+}
+
+Tree Tree::buildParsimony(const PartitionedMSA& parted_msa, unsigned int random_seed,
+                           unsigned int attributes, unsigned int * score)
+{
+  Tree tree;
+  unsigned int lscore;
+  unsigned int *pscore = score ? score : &lscore;
+
+  const MSA& msa = parted_msa.full_msa();
+
+  // temporary workaround
+  unsigned int num_states = parted_msa.model(0).num_states();
+  const unsigned int * map = parted_msa.model(0).charmap();
+  const unsigned int * weights = msa.weights().empty() ? nullptr : msa.weights().data();
+
+  tree._num_tips = msa.size();
+
+
+  tree._pll_utree_start = pllmod_utree_create_parsimony(msa.size(),
+                                                        msa.length(),
+                                                        msa.pll_msa()->label,
+                                                        msa.pll_msa()->sequence,
+                                                        weights,
+                                                        map,
+                                                        num_states,
+                                                        attributes,
+                                                        random_seed,
+                                                        pscore);
+
+  if (!tree._pll_utree_start)
+    throw runtime_error("ERROR building parsimony tree: " + string(pll_errmsg));
+
+  return tree;
+}
+
+Tree Tree::loadFromFile(const std::string& file_name)
+{
+  Tree tree;
+
+  pll_utree_t * utree;
+  pll_rtree_t * rtree;
+
+  unsigned int tip_count;
+  if (!(rtree = pll_rtree_parse_newick(file_name.c_str(), &tip_count)))
+  {
+    if (!(utree = pll_utree_parse_newick(file_name.c_str(), &tip_count)))
+    {
+      throw runtime_error("ERROR reading tree file: " + string(pll_errmsg));
+    }
+  }
+  else
+  {
+//    LOG_INFO << "NOTE: You provided a rooted tree; it will be automatically unrooted." << endl;
+    utree = pll_rtree_unroot(rtree);
+
+    /* optional step if using default PLL clv/pmatrix index assignments */
+    pll_utree_reset_template_indices(utree, tip_count);
+  }
+
+  tree._num_tips = tip_count;
+  tree._pll_utree_start = utree;
+
+  return tree;
+}
+
+PllTreeVector const& Tree::tip_nodes() const
+{
+ if (_pll_utree_tips.empty() && _num_tips > 0)
+ {
+   _pll_utree_tips.resize(_num_tips);
+
+   pll_utree_query_tipnodes(_pll_utree_start, _pll_utree_tips.data());
+ }
+
+ return _pll_utree_tips;
+}
+
+IdNameVector Tree::tip_labels() const
+{
+  IdNameVector result;
+  for (auto const& node: tip_nodes())
+    result.emplace_back(node->clv_index, string(node->label));
+
+  assert(!result.empty());
+
+  return result;
+}
+
+void Tree::reset_tip_ids(const NameIdMap& label_id_map)
+{
+  if (label_id_map.size() != _num_tips)
+    throw invalid_argument("Invalid map size");
+
+  tip_nodes();
+  for (auto& node: _pll_utree_tips)
+  {
+    const unsigned int tip_id = label_id_map.at(node->label);
+    node->clv_index = node->node_index = tip_id;
+  }
+}
+
+
+void Tree::fix_missing_brlens(double new_brlen)
+{
+  pllmod_utree_set_length_recursive(_pll_utree_start, new_brlen, 1);
+}
+
+PllTreeVector Tree::subnodes() const
+{
+  std::vector<pll_utree_t*> subnodes;
+
+  if (_num_tips > 0)
+  {
+    subnodes.resize(num_subnodes());
+
+    pllmod_utree_traverse_apply(_pll_utree_start, nullptr,
+                                [](pll_utree * node, void * data) -> int
+                                { auto list = (pll_utree_t**) data;
+                                  list[node->node_index] = node;
+                                  if (node->next)
+                                  {
+                                    list[node->next->node_index] = node->next;
+                                    list[node->next->next->node_index] = node->next->next;
+                                  }
+//                                  printf("node: %u\n", node->node_index);
+                                  return 1;
+                                },
+                                nullptr,
+                                (void*) subnodes.data());
+  }
+
+  return subnodes;
+}
+
+TreeTopology Tree::topology() const
+{
+  TreeTopology topol;
+
+  for (auto n: subnodes())
+  {
+    if (n->node_index < n->back->node_index)
+      topol.emplace_back(n->node_index, n->back->node_index, n->length);
+  }
+
+//  for (auto& branch: topol)
+//    printf("%u %u %lf\n", branch.left_node_id, branch.right_node_id, branch.length);
+
+  assert(topol.size() == num_branches());
+
+  return topol;
+}
+
+void Tree::topology(const TreeTopology& topol)
+{
+  if (topol.size() != num_branches())
+    throw runtime_error("Incompatible topology!");
+
+  auto allnodes = subnodes();
+  unsigned int pmatrix_index = 0;
+  for (const auto& branch: topol)
+  {
+    pll_utree_t * left_node = allnodes.at(branch.left_node_id);
+    pll_utree_t * right_node = allnodes.at(branch.right_node_id);
+    pllmod_utree_connect_nodes(left_node, right_node, branch.length);
+
+    // important: make sure all branches have distinct pmatrix indices!
+    left_node->pmatrix_index = right_node->pmatrix_index = pmatrix_index++;
+//    printf("%u %u %lf %d\n", branch.left_node_id, branch.right_node_id, branch.length, left_node->pmatrix_index);
+  }
+
+  assert(pmatrix_index == num_branches());
+}
+
+TreeCollection::const_iterator TreeCollection::best() const
+{
+  return std::max_element(_trees.cbegin(), _trees.cend(),
+                          [](const value_type& a, const value_type& b) -> bool
+                          { return a.first < b.first; }
+                         );
+}
+
+void TreeCollection::push_back(double score, const Tree& tree)
+{
+  _trees.emplace_back(score, tree.topology());
+}
+
+void TreeCollection::push_back(double score, TreeTopology&& topol)
+{
+  _trees.emplace_back(score, topol);
+}
+
+
+
diff --git a/src/Tree.hpp b/src/Tree.hpp
new file mode 100644
index 0000000..9f8a6e8
--- /dev/null
+++ b/src/Tree.hpp
@@ -0,0 +1,104 @@
+#ifndef RAXML_TREE_HPP_
+#define RAXML_TREE_HPP_
+
+#include "common.h"
+#include "PartitionedMSA.hpp"
+
+struct TreeBranch
+{
+  TreeBranch() : left_node_id(0), right_node_id(0), length(0.) {};
+  TreeBranch(size_t left_node_id, size_t right_node_id, double length) :
+    left_node_id(left_node_id), right_node_id(right_node_id), length(length) {};
+
+  size_t left_node_id;
+  size_t right_node_id;
+  double length;
+};
+
+typedef std::vector<TreeBranch> TreeTopology;
+
+typedef std::vector<pll_utree_t*> PllTreeVector;
+
+class BasicTree
+{
+public:
+  BasicTree(size_t num_tips) : _num_tips(num_tips) {}
+
+  bool empty() const { return _num_tips == 0; };
+  size_t num_tips() const { return _num_tips; };
+  size_t num_inner() const { return _num_tips - 2; };
+  size_t num_nodes() const { return _num_tips + _num_tips - 2; };
+  size_t num_subnodes() const { return _num_tips + num_inner() * 3; };
+  size_t num_branches() const { return _num_tips + _num_tips - 3; };
+
+protected:
+  size_t _num_tips;
+};
+
+class Tree : public BasicTree
+{
+public:
+  Tree() : BasicTree(0), _pll_utree_start(nullptr) {};
+  Tree(size_t num_tips, pll_utree_t* pll_utree_start) :
+    BasicTree(num_tips), _pll_utree_start(pll_utree_clone(pll_utree_start)) {};
+
+  Tree (const Tree& other);
+  Tree& operator=(const Tree& other);
+  Tree (Tree&& other);
+  Tree& operator=(Tree&& other);
+
+  virtual ~Tree();
+
+  static Tree buildRandom(size_t num_tips, const char * const* tip_labels);
+  static Tree buildRandom(const MSA& msa);
+  static Tree buildParsimony(const PartitionedMSA& parted_msa, unsigned int random_seed,
+                             unsigned int attributes, unsigned int * score = nullptr);
+  static Tree loadFromFile(const std::string& file_name);
+
+  IdNameVector tip_labels() const;
+
+  TreeTopology topology() const;
+  void topology(const TreeTopology& topol);
+
+  // TODO: use move semantics to transfer ownership?
+  pll_utree_t * pll_utree_copy() const { return pll_utree_clone(_pll_utree_start); };
+  const pll_utree_t& pll_utree_start() const { return *_pll_utree_start; };
+
+  void fix_missing_brlens(double new_brlen = RAXML_BRLEN_DEFAULT);
+  void reset_tip_ids(const NameIdMap& label_id_map);
+
+protected:
+  pll_utree_t* _pll_utree_start;
+
+  mutable PllTreeVector _pll_utree_tips;
+
+  PllTreeVector const& tip_nodes() const;
+  PllTreeVector subnodes() const;
+};
+
+typedef std::pair<double, TreeTopology> ScoredTopology;
+
+class TreeCollection
+{
+public:
+  typedef std::vector<ScoredTopology> container_type;
+  typedef container_type::const_iterator const_iterator;
+  typedef container_type::value_type value_type;
+
+  size_t size() const { return  _trees.size(); }
+  const_iterator best() const;
+  value_type::first_type best_score() const { return best()->first; };
+  const value_type::second_type& best_topology() const { return best()->second; };
+
+  const_iterator begin() const { return _trees.cbegin(); }
+  const_iterator end() const { return _trees.cend(); }
+
+  void clear() { _trees.clear(); };
+  void push_back(double score, const Tree& tree);
+  void push_back(double score, TreeTopology&& topol);
+
+private:
+  container_type _trees;
+};
+
+#endif /* RAXML_TREE_HPP_ */
diff --git a/src/TreeInfo.cpp b/src/TreeInfo.cpp
new file mode 100644
index 0000000..1d28b70
--- /dev/null
+++ b/src/TreeInfo.cpp
@@ -0,0 +1,471 @@
+#include <algorithm>
+
+#include "TreeInfo.hpp"
+#include "ParallelContext.hpp"
+
+using namespace std;
+
+TreeInfo::TreeInfo (const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+                    const PartitionAssignment& part_assign)
+{
+  init(opts, tree, parted_msa, part_assign, std::vector<uintVector>());
+}
+
+TreeInfo::TreeInfo (const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+                    const PartitionAssignment& part_assign,
+                    const std::vector<uintVector>& site_weights)
+{
+  init(opts, tree, parted_msa, part_assign, site_weights);
+}
+
+void TreeInfo::init(const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+                    const PartitionAssignment& part_assign,
+                    const std::vector<uintVector>& site_weights)
+{
+  _pll_treeinfo = pllmod_treeinfo_create(tree.pll_utree_copy(), tree.num_tips(),
+                                         parted_msa.part_count(), opts.brlen_linkage);
+
+  if (!_pll_treeinfo)
+    throw runtime_error("ERROR creating treeinfo structure: " + string(pll_errmsg));
+
+  if (ParallelContext::num_procs() > 1)
+  {
+    pllmod_treeinfo_set_parallel_context(_pll_treeinfo, (void *) nullptr,
+                                         ParallelContext::parallel_reduce_cb);
+  }
+
+  // init partitions
+  int optimize_branches = opts.optimize_brlen ? PLLMOD_OPT_PARAM_BRANCHES_ITERATIVE : 0;
+  if (opts.optimize_model && opts.brlen_linkage == PLLMOD_TREE_BRLEN_SCALED &&
+      parted_msa.part_count() > 1)
+  {
+    optimize_branches |= PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER;
+  }
+
+  for (size_t p = 0; p < parted_msa.part_count(); ++p)
+  {
+    const PartitionInfo& pinfo = parted_msa.part_info(p);
+    int params_to_optimize = opts.optimize_model ? pinfo.model().params_to_optimize() : 0;
+    params_to_optimize |= optimize_branches;
+
+    PartitionAssignment::const_iterator part_range = part_assign.find(p);
+    if (part_range != part_assign.end())
+    {
+      /* create and init PLL partition structure */
+      const auto& weights = site_weights.empty() ? pinfo.msa().weights() : site_weights.at(p);
+      pll_partition_t * partition = create_pll_partition(opts, pinfo, *part_range, weights);
+
+      int retval = pllmod_treeinfo_init_partition(_pll_treeinfo, p, partition,
+                                                  params_to_optimize,
+                                                  pinfo.model().alpha(),
+                                                  pinfo.model().ratecat_submodels().data(),
+                                                  pinfo.model().submodel(0).rate_sym().data());
+
+      if (!retval)
+        throw runtime_error("ERROR adding treeinfo partition: " + string(pll_errmsg));
+
+      if (part_range->master())
+        _parts_master.insert(p);
+    }
+    else
+    {
+      // this partition will be processed by other threads, but we still need to know
+      // which parameters to optimize
+      _pll_treeinfo->params_to_optimize[p] = params_to_optimize;
+    }
+  }
+}
+
+TreeInfo::~TreeInfo ()
+{
+  if (_pll_treeinfo)
+  {
+    pll_utree_destroy(_pll_treeinfo->root, NULL);
+    pllmod_treeinfo_destroy(_pll_treeinfo);
+  }
+}
+
+Tree TreeInfo::tree() const
+{
+  return _pll_treeinfo ? Tree(_pll_treeinfo->tip_count, _pll_treeinfo->root) : Tree();
+}
+
+double TreeInfo::loglh(bool incremental)
+{
+  return pllmod_treeinfo_compute_loglh(_pll_treeinfo, incremental ? 1 : 0);
+}
+
+void TreeInfo::model(size_t partition_id, const Model& model)
+{
+  if (partition_id >= _pll_treeinfo->partition_count)
+    throw out_of_range("Partition ID out of range");
+
+  if (!_pll_treeinfo->partitions[partition_id])
+    return;
+
+  assign(_pll_treeinfo->partitions[partition_id], model);
+  _pll_treeinfo->alphas[partition_id] = model.alpha();
+  if (_pll_treeinfo->brlen_scalers)
+    _pll_treeinfo->brlen_scalers[partition_id] = model.brlen_scaler();
+}
+
+//#define DBG printf
+
+double TreeInfo::optimize_branches(double lh_epsilon, double brlen_smooth_factor)
+{
+  /* update all CLVs and p-matrices before calling BLO */
+  double new_loglh = loglh();
+
+  if (_pll_treeinfo->params_to_optimize[0] & PLLMOD_OPT_PARAM_BRANCHES_ITERATIVE)
+  {
+    new_loglh = -1 * pllmod_opt_optimize_branch_lengths_local_multi(_pll_treeinfo->partitions,
+                                                                    _pll_treeinfo->partition_count,
+                                                                    _pll_treeinfo->root,
+                                                                    _pll_treeinfo->param_indices,
+                                                                    _pll_treeinfo->deriv_precomp,
+                                                                    _pll_treeinfo->brlen_scalers,
+                                                                    RAXML_BRLEN_MIN,
+                                                                    RAXML_BRLEN_MAX,
+                                                                    lh_epsilon,
+                                                                    brlen_smooth_factor * RAXML_BRLEN_SMOOTHINGS,
+                                                                    -1,  /* radius */
+                                                                    1,    /* keep_update */
+                                                                    _pll_treeinfo->parallel_context,
+                                                                    _pll_treeinfo->parallel_reduce_cb
+                                                                    );
+    DBG("\t - after brlen: logLH = %f\n", new_loglh);
+
+    if (pll_errno)
+      throw runtime_error("ERROR in branch lenght optimization: " + string(pll_errmsg));
+  }
+
+  /* optimize brlen scalers, if needed */
+  if (_pll_treeinfo->brlen_linkage == PLLMOD_TREE_BRLEN_SCALED &&
+      _pll_treeinfo->partition_count > 1)
+  {
+    new_loglh = -1 * pllmod_algo_opt_onedim_treeinfo(_pll_treeinfo,
+                                                    PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER,
+                                                    RAXML_BRLEN_SCALER_MIN,
+                                                    RAXML_BRLEN_SCALER_MAX,
+                                                    RAXML_PARAM_EPSILON);
+
+    /* normalize scalers and scale the branches accordingly */
+    pllmod_treeinfo_normalize_brlen_scalers(_pll_treeinfo);
+
+    DBG("\t - after brlen scalers: logLH = %f\n", new_loglh);
+  }
+
+
+  return new_loglh;
+}
+
+double TreeInfo::optimize_params(int params_to_optimize, double lh_epsilon)
+{
+  double new_loglh;
+
+  /* optimize SUBSTITUTION RATES */
+  if (params_to_optimize & PLLMOD_OPT_PARAM_SUBST_RATES)
+  {
+    new_loglh = -1 * pllmod_algo_opt_subst_rates_treeinfo(_pll_treeinfo,
+                                                          0,
+                                                          PLLMOD_OPT_MIN_SUBST_RATE,
+                                                          PLLMOD_OPT_MAX_SUBST_RATE,
+                                                          RAXML_BFGS_FACTOR,
+                                                          RAXML_PARAM_EPSILON);
+    DBG("\t - after rates: logLH = %f\n", new_loglh);
+  }
+
+  /* optimize BASE FREQS */
+  if (params_to_optimize & PLLMOD_OPT_PARAM_FREQUENCIES)
+  {
+    new_loglh = -1 * pllmod_algo_opt_frequencies_treeinfo(_pll_treeinfo,
+                                                          0,
+                                                          PLLMOD_OPT_MIN_FREQ,
+                                                          PLLMOD_OPT_MAX_FREQ,
+                                                          RAXML_BFGS_FACTOR,
+                                                          RAXML_PARAM_EPSILON);
+
+    DBG("\t - after freqs: logLH = %f\n", new_loglh);
+  }
+
+  /* optimize ALPHA */
+  if (params_to_optimize & PLLMOD_OPT_PARAM_ALPHA)
+  {
+    new_loglh = -1 * pllmod_algo_opt_onedim_treeinfo(_pll_treeinfo,
+                                                      PLLMOD_OPT_PARAM_ALPHA,
+                                                      PLLMOD_OPT_MIN_ALPHA,
+                                                      PLLMOD_OPT_MAX_ALPHA,
+                                                      RAXML_PARAM_EPSILON);
+    DBG("\t - after alpha: logLH = %f\n", new_loglh);
+  }
+
+  if (params_to_optimize & PLLMOD_OPT_PARAM_PINV)
+  {
+    new_loglh = -1 * pllmod_algo_opt_onedim_treeinfo(_pll_treeinfo,
+                                                      PLLMOD_OPT_PARAM_PINV,
+                                                      PLLMOD_OPT_MIN_PINV,
+                                                      PLLMOD_OPT_MAX_PINV,
+                                                      RAXML_PARAM_EPSILON);
+
+    DBG("\t - after p-inv: logLH = %f\n", new_loglh);
+  }
+
+  /* optimize FREE RATES and WEIGHTS */
+  if (params_to_optimize & PLLMOD_OPT_PARAM_FREE_RATES)
+  {
+    new_loglh = -1 * pllmod_algo_opt_rates_weights_treeinfo (_pll_treeinfo,
+                                                          RAXML_FREERATE_MIN,
+                                                          RAXML_FREERATE_MAX,
+                                                          RAXML_BFGS_FACTOR,
+                                                          RAXML_PARAM_EPSILON);
+
+    /* normalize scalers and scale the branches accordingly */
+    if (_pll_treeinfo->brlen_linkage == PLLMOD_TREE_BRLEN_SCALED &&
+        _pll_treeinfo->partition_count > 1)
+      pllmod_treeinfo_normalize_brlen_scalers(_pll_treeinfo);
+
+    DBG("\t - after freeR: logLH = %f\n", new_loglh);
+    DBG("\t - after freeR/crosscheck: logLH = %f\n", loglh());
+  }
+
+  if (params_to_optimize & PLLMOD_OPT_PARAM_BRANCHES_ITERATIVE)
+  {
+    new_loglh = optimize_branches(lh_epsilon, 0.25);
+  }
+
+  return new_loglh;
+}
+
+double TreeInfo::spr_round(spr_round_params& params)
+{
+  return pllmod_algo_spr_round(_pll_treeinfo, params.radius_min, params.radius_max,
+                               params.ntopol_keep, params.thorough,
+                               RAXML_BRLEN_MIN, RAXML_BRLEN_MAX, RAXML_BRLEN_SMOOTHINGS,
+                               0.1,
+                               params.subtree_cutoff > 0. ? &params.cutoff_info : nullptr,
+                               params.subtree_cutoff);
+}
+
+
+void assign(PartitionedMSA& parted_msa, const TreeInfo& treeinfo)
+{
+  const pllmod_treeinfo_t& pll_treeinfo = treeinfo.pll_treeinfo();
+
+  if (parted_msa.part_count() != pll_treeinfo.partition_count)
+    throw runtime_error("Incompatible arguments");
+
+  for (size_t p = 0; p < parted_msa.part_count(); ++p)
+  {
+    if (!pll_treeinfo.partitions[p])
+      continue;
+
+    Model model(parted_msa.model(p));
+    assign(model, treeinfo, p);
+    parted_msa.model(p, move(model));
+  }
+}
+
+void assign(Model& model, const TreeInfo& treeinfo, size_t partition_id)
+{
+  const pllmod_treeinfo_t& pll_treeinfo = treeinfo.pll_treeinfo();
+
+  if (partition_id >= pll_treeinfo.partition_count)
+    throw out_of_range("Partition ID out of range");
+
+  if (!pll_treeinfo.partitions[partition_id])
+    return;
+
+  assign(model, pll_treeinfo.partitions[partition_id]);
+  model.alpha(pll_treeinfo.alphas[partition_id]);
+  if (pll_treeinfo.brlen_scalers)
+    model.brlen_scaler(pll_treeinfo.brlen_scalers[partition_id]);
+}
+
+void build_clv(ProbVector::const_iterator probs, size_t sites, WeightVector::const_iterator weights,
+               pll_partition_t* partition, bool normalize, std::vector<double>& clv)
+{
+  const auto states = partition->states;
+  const auto states_padded = partition->states_padded;
+  auto clvp = clv.begin();
+
+  for (size_t i = 0; i < sites; ++i)
+  {
+    if (weights[i] > 0)
+    {
+      double sum = 0.;
+      for (size_t j = 0; j < states; ++j)
+        sum += probs[j];
+
+      for (size_t j = 0; j < states; ++j)
+      {
+        if (sum > 0.)
+          clvp[j] =  normalize ? probs[j] / sum : probs[j];
+        else
+          clvp[j] = 1.0;
+      }
+
+      clvp += states_padded;
+    }
+
+    /* NB: clv has to be padded, but msa arrays are not! */
+    probs += states;
+  }
+
+  assert(clvp == clv.end());
+}
+
+void set_partition_tips(const Options& opts, const MSA& msa, const PartitionRange& part_region,
+                        pll_partition_t* partition)
+{
+  /* set pattern weights */
+  if (!msa.weights().empty())
+    pll_set_pattern_weights(partition, msa.weights().data() + part_region.start);
+
+  if (opts.use_prob_msa && msa.probabilistic())
+  {
+    assert(!(partition->attributes & PLL_ATTRIB_PATTERN_TIP));
+    assert(partition->states == msa.states());
+
+    auto normalize = !msa.normalized();
+    auto weights_start = msa.weights().cbegin() + part_region.start;
+
+    // we need a libpll function for that!
+    auto clv_size = partition->sites * partition->states_padded;
+    std::vector<double> tmp_clv(clv_size);
+    for (size_t i = 0; i < msa.size(); ++i)
+    {
+      auto prob_start = msa.probs(i, part_region.start);
+      build_clv(prob_start, partition->sites, weights_start, partition, normalize, tmp_clv);
+      pll_set_tip_clv(partition, i, tmp_clv.data());
+    }
+  }
+  else
+  {
+    for (size_t i = 0; i < msa.size(); ++i)
+    {
+      pll_set_tip_states(partition, i, partition->map, msa.at(i).c_str() + part_region.start);
+    }
+  }
+}
+
+void set_partition_tips(const Options& opts, const MSA& msa, const PartitionRange& part_region,
+                        pll_partition_t* partition, const WeightVector& weights)
+{
+  assert(!weights.empty());
+
+  const auto pstart = part_region.start;
+  const auto pend = part_region.start + part_region.length;
+
+  /* compress weights array by removing all zero entries */
+  uintVector comp_weights;
+  for (size_t j = pstart; j < pend; ++j)
+  {
+    if (weights[j] > 0)
+      comp_weights.push_back(weights[j]);
+  }
+
+  /* now set tip sequences, ignoring all columns with zero weights */
+  if (opts.use_prob_msa && msa.probabilistic())
+  {
+    assert(!(partition->attributes & PLL_ATTRIB_PATTERN_TIP));
+    assert(partition->states == msa.states());
+
+    auto normalize = !msa.normalized();
+    auto weights_start = msa.weights().cbegin() + part_region.start;
+
+    // we need a libpll function for that!
+    auto clv_size = part_region.length * partition->states_padded;
+    std::vector<double> tmp_clv(clv_size);
+    for (size_t i = 0; i < msa.size(); ++i)
+    {
+      auto prob_start = msa.probs(i, part_region.start);
+      build_clv(prob_start, part_region.length, weights_start, partition, normalize, tmp_clv);
+      pll_set_tip_clv(partition, i, tmp_clv.data());
+    }
+  }
+  else
+  {
+    std::vector<char> bs_seq(part_region.length);
+    for (size_t i = 0; i < msa.size(); ++i)
+    {
+      const char * full_seq = msa.at(i).c_str();
+      size_t pos = 0;
+      for (size_t j = pstart; j < pend; ++j)
+      {
+        if (weights[j] > 0)
+          bs_seq[pos++] = full_seq[j];
+      }
+      assert(pos == comp_weights.size());
+
+      pll_set_tip_states(partition, i, partition->map, bs_seq.data());
+    }
+  }
+
+  pll_set_pattern_weights(partition, comp_weights.data());
+}
+
+pll_partition_t* create_pll_partition(const Options& opts, const PartitionInfo& pinfo,
+                                      const PartitionRange& part_region, const uintVector& weights)
+{
+  const MSA& msa = pinfo.msa();
+  const Model& model = pinfo.model();
+
+  unsigned int attrs = opts.simd_arch;
+
+  if (opts.use_rate_scalers && model.num_ratecats() > 1)
+  {
+    attrs |= PLL_ATTRIB_RATE_SCALERS;
+
+    if (model.num_states() != 4 ||
+        (opts.simd_arch != PLL_ATTRIB_ARCH_AVX && opts.simd_arch != PLL_ATTRIB_ARCH_AVX2))
+    {
+      throw runtime_error("Per-rate scalers are implemented for DNA with AVX/AVX2 vectorization only!\n");
+    }
+  }
+
+  if (opts.use_tip_inner)
+  {
+    assert(!(opts.use_prob_msa));
+    // TODO: use proper auto-tuning
+    const unsigned long min_len_ti = 100;
+    if ((unsigned long) msa.length() > min_len_ti)
+      attrs |= PLL_ATTRIB_PATTERN_TIP;
+  }
+
+  /* part_length doesn't include columns with zero weight */
+  const size_t part_length = weights.empty() ? part_region.length :
+                             std::count_if(weights.begin() + part_region.start,
+                                           weights.begin() + part_region.start + part_region.length,
+                                           [](uintVector::value_type w) -> bool
+                                             { return w > 0; }
+                                           );
+
+  BasicTree tree(msa.size());
+  pll_partition_t * partition = pll_partition_create(
+      tree.num_tips(),         /* number of tip sequences */
+      tree.num_inner(),        /* number of CLV buffers */
+      model.num_states(),      /* number of states in the data */
+      part_length,             /* number of alignment sites/patterns */
+      model.num_submodels(),   /* number of different substitution models (LG4 = 4) */
+      tree.num_branches(),     /* number of probability matrices */
+      model.num_ratecats(),    /* number of (GAMMA) rate categories */
+      tree.num_inner(),        /* number of scaling buffers */
+      attrs                    /* list of flags (SSE3/AVX, TIP-INNER special cases etc.) */
+  );
+
+  if (!partition)
+    throw runtime_error("ERROR creating pll_partition: " + string(pll_errmsg));
+
+  partition->map = model.charmap();
+
+  if (part_length == part_region.length)
+    set_partition_tips(opts, msa, part_region, partition);
+  else
+    set_partition_tips(opts, msa, part_region, partition, weights);
+
+  assign(partition, model);
+
+  return partition;
+}
+
+
diff --git a/src/TreeInfo.hpp b/src/TreeInfo.hpp
new file mode 100644
index 0000000..48310ad
--- /dev/null
+++ b/src/TreeInfo.hpp
@@ -0,0 +1,74 @@
+#ifndef RAXML_TREEINFO_HPP_
+#define RAXML_TREEINFO_HPP_
+
+#include "common.h"
+#include "Tree.hpp"
+#include "Options.hpp"
+#include "PartitionAssignment.hpp"
+
+struct spr_round_params
+{
+  bool thorough;
+  int radius_min;
+  int radius_max;
+  int ntopol_keep;
+  double subtree_cutoff;
+  cutoff_info_t cutoff_info;
+
+  void reset_cutoff_info(double loglh)
+  {
+    cutoff_info.lh_dec_count = 0;
+    cutoff_info.lh_dec_sum = 0.;
+    cutoff_info.lh_cutoff = loglh / -1000.0;
+  }
+};
+
+class TreeInfo
+{
+public:
+  TreeInfo (const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+            const PartitionAssignment& part_assign);
+  TreeInfo (const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+            const PartitionAssignment& part_assign, const std::vector<uintVector>& site_weights);
+  virtual
+  ~TreeInfo ();
+
+  const pllmod_treeinfo_t& pll_treeinfo() const { return *_pll_treeinfo; }
+  const pll_utree& pll_utree_root() const { assert(_pll_treeinfo); return *_pll_treeinfo->root; }
+
+  Tree tree() const;
+  void tree(const Tree& tree) { _pll_treeinfo->root = tree.pll_utree_copy(); }
+
+  /* in parallel mode, partition can be share among multiple threads and TreeInfo objects;
+   * this method returns list of partition IDs for which this thread is designated as "master"
+   * and thus responsible for e.g. sending model parameters to the main thread. */
+  const IDSet& parts_master() const { return _parts_master; }
+
+  void model(size_t partition_id, const Model& model);
+
+  double loglh(bool incremental = false);
+  double optimize_params(int params_to_optimize, double lh_epsilon);
+  double optimize_params_all(double lh_epsilon)
+  { return optimize_params(PLLMOD_OPT_PARAM_ALL, lh_epsilon); } ;
+  double optimize_model(double lh_epsilon)
+  { return optimize_params(PLLMOD_OPT_PARAM_ALL & ~PLLMOD_OPT_PARAM_BRANCHES_ITERATIVE, lh_epsilon); } ;
+  double optimize_branches(double lh_epsilon, double brlen_smooth_factor);
+  double spr_round(spr_round_params& params);
+
+private:
+  pllmod_treeinfo_t * _pll_treeinfo;
+  IDSet _parts_master;
+
+  void init(const Options &opts, const Tree& tree, const PartitionedMSA& parted_msa,
+            const PartitionAssignment& part_assign, const std::vector<uintVector>& site_weights);
+};
+
+void assign(PartitionedMSA& parted_msa, const TreeInfo& treeinfo);
+void assign(Model& model, const TreeInfo& treeinfo, size_t partition_id);
+
+
+pll_partition_t* create_pll_partition(const Options& opts, const PartitionInfo& pinfo,
+                                      const PartitionRange& part_region, const uintVector& weights);
+
+
+#endif /* RAXML_TREEINFO_HPP_ */
diff --git a/src/bootstrap/BootstrapGenerator.cpp b/src/bootstrap/BootstrapGenerator.cpp
new file mode 100644
index 0000000..2d275ae
--- /dev/null
+++ b/src/bootstrap/BootstrapGenerator.cpp
@@ -0,0 +1,60 @@
+#include "BootstrapGenerator.hpp"
+
+BootstrapGenerator::BootstrapGenerator ()
+{
+  // TODO Auto-generated constructor stub
+
+}
+
+BootstrapGenerator::~BootstrapGenerator ()
+{
+  // TODO Auto-generated destructor stub
+}
+
+BootstrapReplicate BootstrapGenerator::generate(const PartitionedMSA& parted_msa,
+                                                unsigned long random_seed)
+{
+  BootstrapReplicate result;
+
+  RandomGenerator gen(random_seed);
+
+  for (const auto& pinfo: parted_msa.part_list())
+    result.site_weights.emplace_back(generate(pinfo.msa(), gen));
+
+  return result;
+}
+
+WeightVector BootstrapGenerator::generate(const MSA& msa, unsigned long random_seed)
+{
+  RandomGenerator gen(random_seed);
+
+  return generate(msa, gen);
+}
+
+WeightVector BootstrapGenerator::generate(const MSA& msa, RandomGenerator& gen)
+{
+  auto orig_weights = msa.weights();
+  unsigned int orig_len = msa.num_sites();
+  unsigned int comp_len = msa.length();
+
+  WeightVector result(comp_len, 0);
+  WeightVector w_buf(orig_len, 0);
+
+  std::uniform_int_distribution<unsigned int> distr(0, orig_len-1);
+
+  for (unsigned int i = 0; i < orig_len; ++i)
+  {
+    auto site = distr(gen);
+    w_buf[site]++;
+  }
+
+  unsigned int pos = 0;
+  for (unsigned int i = 0; i < comp_len; ++i)
+  {
+    for (unsigned int j = 0; j < orig_weights[i]; ++j)
+      result[i] += w_buf[pos++];
+  }
+
+  return result;
+}
+
diff --git a/src/bootstrap/BootstrapGenerator.hpp b/src/bootstrap/BootstrapGenerator.hpp
new file mode 100644
index 0000000..603fc66
--- /dev/null
+++ b/src/bootstrap/BootstrapGenerator.hpp
@@ -0,0 +1,31 @@
+#ifndef RAXML_BOOTSTRAP_BOOTSTRAPGENERATOR_HPP_
+#define RAXML_BOOTSTRAP_BOOTSTRAPGENERATOR_HPP_
+
+#include <random>
+
+#include "../PartitionedMSA.hpp"
+
+typedef std::default_random_engine RandomGenerator;
+
+struct BootstrapReplicate
+{
+  WeightVectorList site_weights;
+};
+
+typedef std::vector<BootstrapReplicate> BootstrapReplicateList;
+
+class BootstrapGenerator
+{
+public:
+  BootstrapGenerator ();
+  virtual
+  ~BootstrapGenerator ();
+
+  BootstrapReplicate generate(const PartitionedMSA& parted_msa, unsigned long random_seed);
+  WeightVector generate(const MSA& msa, unsigned long random_seed);
+
+private:
+  WeightVector generate(const MSA& msa, RandomGenerator& gen);
+};
+
+#endif /* RAXML_BOOTSTRAP_BOOTSTRAPGENERATOR_HPP_ */
diff --git a/src/bootstrap/BootstrapTree.cpp b/src/bootstrap/BootstrapTree.cpp
new file mode 100644
index 0000000..76e1357
--- /dev/null
+++ b/src/bootstrap/BootstrapTree.cpp
@@ -0,0 +1,106 @@
+#include "BootstrapTree.hpp"
+
+#include "../common.h"
+
+using namespace std;
+
+BootstrapTree::BootstrapTree (const Tree& tree) : Tree(tree), _num_bs_trees(0), _num_splits(0)
+{
+  _pll_splits_hash = nullptr;
+
+  add_splits_to_hashtable(_pll_utree_start, true);
+}
+
+BootstrapTree::~BootstrapTree ()
+{
+  pll_utree_destroy(_pll_utree_start, nullptr);
+  pllmod_utree_split_hashtable_destroy(_pll_splits_hash);
+  free(_node_split_map);
+}
+
+void BootstrapTree::add_bootstrap_tree(const Tree& tree)
+{
+  if (tree.num_tips() != _num_tips)
+    throw runtime_error("Incompatible tree!");
+
+  add_splits_to_hashtable(&tree.pll_utree_start(), false);
+  _num_bs_trees++;
+}
+
+void BootstrapTree::add_splits_to_hashtable(const pll_utree_t* root, bool ref_tree)
+{
+  unsigned int n_splits;
+  int ** node_split_map = ref_tree ? &_node_split_map : nullptr;
+  int update_only = ref_tree ? 0 : 1;
+
+  pll_split_t * ref_splits = pllmod_utree_split_create((pll_utree_t*) root,
+                                                       _num_tips,
+                                                       &n_splits,
+                                                       node_split_map);
+
+  _pll_splits_hash = pllmod_utree_split_hashtable_insert(_pll_splits_hash,
+                                                         ref_splits,
+                                                         _num_tips,
+                                                         n_splits,
+                                                         update_only);
+
+  pllmod_utree_split_destroy(ref_splits);
+
+  if (ref_tree)
+    _num_splits = n_splits;
+}
+
+void BootstrapTree::calc_support()
+{
+  vector<unsigned char> support(_pll_splits_hash->entry_count);
+
+  for (unsigned int i = 0; i < _pll_splits_hash->table_size; ++i)
+  {
+    bitv_hash_entry_t * e =  _pll_splits_hash->table[i];
+    while (e != NULL)
+    {
+//      printf("split %d, support %d\n", e->bip_number, e->support);
+      support[e->bip_number] = (unsigned char) (e->support - 1) * 100 / _num_bs_trees;
+      e = e->next;
+    }
+  }
+
+//  printf("\n\n");
+//  for (size_t i = 0; i < 3 * (_num_tips - 2); ++i)
+//    printf("node_id %d, split_id %d\n", i, _node_split_map[i]);
+//  printf("\n\n");
+
+  PllTreeVector inner(num_inner());
+
+  pll_utree_query_innernodes(_pll_utree_start, inner.data());
+
+  vector<bool> split_plotted(_num_splits);
+  for (auto node: inner)
+  {
+    assert(node->next);
+
+    auto n = node;
+    do
+    {
+      if (pllmod_utree_is_tip(n->back))
+        continue;
+
+      auto node_id = n->node_index - _num_tips;
+      auto split_id = _node_split_map[node_id];
+      assert(split_id >= 0);
+
+      if (split_plotted[split_id])
+        continue;
+
+      n->label = n->next->label = n->next->next->label =
+          strdup(std::to_string(support[split_id]).c_str());
+
+      split_plotted[split_id].flip();
+
+      break;
+    }
+    while ((n = n->next) != node);
+  }
+}
+
+
diff --git a/src/bootstrap/BootstrapTree.hpp b/src/bootstrap/BootstrapTree.hpp
new file mode 100644
index 0000000..904f875
--- /dev/null
+++ b/src/bootstrap/BootstrapTree.hpp
@@ -0,0 +1,27 @@
+#ifndef RAXML_BOOTSTRAP_BOOTSTRAPTREE_HPP_
+#define RAXML_BOOTSTRAP_BOOTSTRAPTREE_HPP_
+
+#include "../Tree.hpp"
+
+class BootstrapTree : public Tree
+{
+public:
+  BootstrapTree (const Tree& tree);
+
+  virtual
+  ~BootstrapTree ();
+
+  void add_bootstrap_tree(const Tree& tree);
+
+  void calc_support();
+
+private:
+  size_t _num_bs_trees;
+  size_t _num_splits;
+  bitv_hashtable_t* _pll_splits_hash;
+  int * _node_split_map;
+
+  void add_splits_to_hashtable(const pll_utree_t* root, bool update_only);
+};
+
+#endif /* RAXML_BOOTSTRAP_BOOTSTRAPTREE_HPP_ */
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..488aebe
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,75 @@
+#ifndef RAXML_COMMON_H_
+#define RAXML_COMMON_H_
+
+#include <unistd.h>
+
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <sstream>
+#include <vector>
+#include <set>
+#include <unordered_map>
+#include <stdexcept>
+
+extern "C" {
+//#include <libpll/pll.h>
+#include <libpll/pll_optimize.h>
+#include <libpll/pll_msa.h>
+#include <libpll/pll_tree.h>
+#include <libpll/pllmod_util.h>
+#include <libpll/pllmod_algorithm.h>
+}
+
+#include "types.hpp"
+#include "constants.hpp"
+#include "ParallelContext.hpp"
+#include "log.hpp"
+
+#define RAXML_DOUBLE_TOLERANCE    1e-14
+
+// defaults
+#define DEF_LH_EPSILON            0.1
+#define OPT_LH_EPSILON            0.1
+#define RAXML_PARAM_EPSILON       0.01
+#define RAXML_BFGS_FACTOR         1e7
+
+#define RAXML_BRLEN_SMOOTHINGS    32
+#define RAXML_BRLEN_DEFAULT       0.1
+#define RAXML_BRLEN_MIN           1.0e-6
+#define RAXML_BRLEN_MAX           100.
+#define RAXML_BRLEN_TOLERANCE     1.0e-7
+
+#define RAXML_FREERATE_MIN        0.001
+#define RAXML_FREERATE_MAX        100.
+
+#define RAXML_BRLEN_SCALER_MIN    0.01
+#define RAXML_BRLEN_SCALER_MAX    100.
+
+/* used to supress compiler warnings about unused args */
+#define UNUSED(expr) while (0) { (void)(expr); }
+
+// cpu features
+#define RAXML_CPU_SSE3  (1<<0)
+#define RAXML_CPU_AVX   (1<<1)
+#define RAXML_CPU_FMA3  (1<<2)
+#define RAXML_CPU_AVX2  (1<<3)
+
+/* system utils */
+void sysutil_fatal(const char * format, ...);
+void sysutil_fatal_libpll();
+
+double sysutil_gettime();
+void sysutil_show_rusage();
+unsigned long sysutil_get_memused();
+unsigned long sysutil_get_memtotal();
+
+unsigned long sysutil_get_cpu_features();
+unsigned int sysutil_simd_autodetect();
+
+double sysutil_elapsed_seconds();
+
+std::string sysutil_realpath(const std::string& path);
+bool sysutil_file_exists(const std::string& fname, int access_mode = F_OK);
+
+#endif /* RAXML_COMMON_H_ */
diff --git a/src/constants.hpp b/src/constants.hpp
new file mode 100644
index 0000000..6e79724
--- /dev/null
+++ b/src/constants.hpp
@@ -0,0 +1,27 @@
+#ifndef RAXML_CONSTANTS_HPP_
+#define RAXML_CONSTANTS_HPP_
+
+/* 1 = 1   | 2 = 2    | 3 = 16 | 4 = 4    | 5 = 32 | 6 = 64 | 7 = 0 | 8 = 8
+ * 9 = 128 | 10 = 256 | 11 = 0 | 12 = 512 | 13 = 0 | 14 = 0 | 15 = 1023      */
+const unsigned int pll_map_diploid10[256] =
+ {
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0, 1023,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0, 1023,
+   0,  1,   0,   2,  0,  0,  0,    4,    0,   0,  0, 512,  0,   16, 1023, 1023,
+   0,  0,  32,  64,  8,  8,  0,  128, 1023, 256,  0,   0,  0,    0,    0,    0,
+   0,  1,   0,   2,  0,  0,  0,    4,    0,   0,  0, 512,  0,   16, 1023, 1023,
+   0,  0,  32,  64,  8,  8,  0,    0, 1023, 256,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0,
+   0,  0,   0,   0,  0,  0,  0,    0,    0,   0,  0,   0,  0,    0,    0,    0
+ };
+
+
+#endif /* RAXML_CONSTANTS_HPP_ */
diff --git a/src/io/NewickStream.cpp b/src/io/NewickStream.cpp
new file mode 100644
index 0000000..f31ef4d
--- /dev/null
+++ b/src/io/NewickStream.cpp
@@ -0,0 +1,35 @@
+#include "file_io.hpp"
+
+NewickStream& operator<<(NewickStream& stream, const pll_utree_t& tree)
+{
+  char * newick_str = pll_utree_export_newick(&tree);
+  stream << newick_str << std::endl;
+  free(newick_str);
+  return stream;
+}
+
+NewickStream& operator<<(NewickStream& stream, const Tree& tree)
+{
+  stream << tree.pll_utree_start();
+  return stream;
+}
+
+NewickStream& operator>>(NewickStream& stream, Tree& tree)
+{
+//  stream.getline()
+//  char * newick_str = pll_utree_export_newick((pll_utree_t *) &tree);
+//  stream << newick_str << std::endl;
+//  free(newick_str);
+
+  tree.fix_missing_brlens();
+  assert(0);
+  return stream;
+}
+
+NewickStream& operator<<(NewickStream& stream, const BootstrapTree& tree)
+{
+  stream << tree.pll_utree_start();
+  return stream;
+}
+
+
diff --git a/src/io/binary_io.cpp b/src/io/binary_io.cpp
new file mode 100644
index 0000000..90e75fb
--- /dev/null
+++ b/src/io/binary_io.cpp
@@ -0,0 +1,102 @@
+#include "binary_io.hpp"
+
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const SubstitutionModel& sm)
+{
+  stream << sm.base_freqs();
+  stream << sm.subst_rates();
+
+  return stream;
+}
+
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const Model& m)
+{
+//  if (m.param_mode(PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER) != ParamValue::undefined)
+  stream << m.brlen_scaler();
+
+//  if (m.param_mode(PLLMOD_OPT_PARAM_PINV) != ParamValue::undefined)
+    stream << m.pinv();
+
+  stream << m.num_ratecats();
+
+  if (m.num_ratecats() > 1)
+  {
+    stream << m.ratehet_mode();
+    if (m.ratehet_mode() == PLLMOD_UTIL_MIXTYPE_GAMMA)
+      stream << m.alpha();
+
+    stream << m.ratecat_weights();
+    stream << m.ratecat_rates();
+  }
+
+  /* store subst model parameters only if they were estimated */
+  if (m.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES) == ParamValue::ML ||
+      m.param_mode(PLLMOD_OPT_PARAM_SUBST_RATES) == ParamValue::ML)
+  {
+    stream << m.num_submodels();
+    for (size_t i = 0; i < m.num_submodels(); ++i)
+    {
+      stream << m.submodel(i);
+    }
+  }
+
+  return stream;
+}
+
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, Model& m)
+{
+//  if (m.param_mode(PLLMOD_OPT_PARAM_BRANCH_LEN_SCALER) != ParamValue::undefined)
+  m.brlen_scaler(stream.get<double>());
+
+//  if (m.param_mode(PLLMOD_OPT_PARAM_PINV) != ParamValue::undefined)
+
+  m.pinv(stream.get<double>());
+
+  auto num_ratecats = stream.get<unsigned int>();
+
+  assert(num_ratecats == m.num_ratecats());
+
+  if (m.num_ratecats() > 1)
+  {
+    auto ratehet_mode = stream.get<unsigned int>();
+    assert(ratehet_mode == m.ratehet_mode());
+
+    if (m.ratehet_mode() == PLLMOD_UTIL_MIXTYPE_GAMMA)
+      m.alpha(stream.get<double>());
+
+    m.ratecat_weights(stream.get<doubleVector>());
+    m.ratecat_rates(stream.get<doubleVector>());
+  }
+
+  if (m.param_mode(PLLMOD_OPT_PARAM_FREQUENCIES) == ParamValue::ML ||
+      m.param_mode(PLLMOD_OPT_PARAM_SUBST_RATES) == ParamValue::ML)
+  {
+    auto num_submodels = stream.get<unsigned int>();
+    assert(num_submodels == m.num_submodels());
+    for (size_t i = 0; i < m.num_submodels(); ++i)
+    {
+      m.base_freqs(i, stream.get<doubleVector>());
+      m.subst_rates(i, stream.get<doubleVector>());
+    }
+  }
+
+  return stream;
+}
+
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const TreeCollection& c)
+{
+  stream << c.size();
+  for (const auto tree: c)
+    stream << tree.first << tree.second;
+  return stream;
+}
+
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, TreeCollection& c)
+{
+  auto size = stream.get<size_t>();
+  for (size_t i = 0; i < size; ++i)
+  {
+    auto score = stream.get<double>();
+    c.push_back(score, stream.get<TreeTopology>());
+  }
+  return stream;
+}
diff --git a/src/io/binary_io.hpp b/src/io/binary_io.hpp
new file mode 100644
index 0000000..ab83782
--- /dev/null
+++ b/src/io/binary_io.hpp
@@ -0,0 +1,162 @@
+#ifndef RAXML_BINARY_IO_HPP_
+#define RAXML_BINARY_IO_HPP_
+
+#include "../Model.hpp"
+#include "../Tree.hpp"
+
+class BasicBinaryStream
+{
+public:
+  virtual ~BasicBinaryStream() {}
+
+  template<typename T>
+  T get()
+  {
+    T tmp;
+    *this >> tmp;
+//    get(&tmp, sizeof(T));
+    return tmp;
+  }
+
+  void get(void *data, size_t size) { read(data, size); };
+  void put(const void *data, size_t size) { write(data, size); };
+
+protected:
+  virtual void read(void *data, size_t size) = 0;
+  virtual void write(const void *data, size_t size) = 0;
+};
+
+class BinaryStream : public BasicBinaryStream
+{
+public:
+  BinaryStream(char * buf, size_t size)
+  {
+    _buf = buf;
+    _ptr = buf;
+    _size = size;
+  }
+
+  ~BinaryStream() {}
+
+//  template<typename T>
+//  T get()
+//  {
+//    T tmp;
+//    *this >> tmp;
+//    return tmp;
+//  }
+
+  const char* buf() { return _buf; }
+  size_t size() const { return _size; }
+  size_t pos() const { return _ptr - _buf;}
+  char* reset() { _ptr = _buf; return _buf; }
+
+protected:
+  void write(const void *data, size_t size)
+  {
+    if (_ptr + size > _buf + _size)
+      throw std::out_of_range("BinaryStream::put");
+
+    memcpy(_ptr, data, size);
+    _ptr += size;
+  }
+
+  void read(void *data, size_t size)
+  {
+    if (_ptr + size > _buf + _size)
+      throw std::out_of_range("BinaryStream::get");
+
+    memcpy(data, _ptr, size);
+    _ptr += size;
+  }
+
+private:
+  char * _buf;
+  char * _ptr;
+  size_t _size;
+};
+
+class BinaryFileStream : public BasicBinaryStream
+{
+public:
+  BinaryFileStream(const std::string fname, std::ios_base::openmode mode) :
+    _fstream(fname, std::ios::binary | mode) {}
+
+protected:
+  void write(const void *data, size_t size) { _fstream.write((char*) data, size); }
+  void read(void *data, size_t size) { _fstream.read((char*) data, size); }
+
+private:
+  std::fstream _fstream;
+};
+
+template<typename T>
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, T v)
+{
+  stream.put(&v, sizeof(T));
+
+  return stream;
+}
+
+template<typename T>
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const std::vector<T>& vec)
+{
+  stream << vec.size();
+  for (auto const& v: vec)
+    stream << v;
+
+  return stream;
+}
+
+/**
+ *  Reading
+ */
+
+template<typename T>
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, T& v)
+{
+  stream.get(&v, sizeof(T));
+
+//  std::cout << "read value: " << v << std::endl;
+
+  return stream;
+}
+
+template<typename T>
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, std::vector<T>& vec)
+{
+  size_t vec_size;
+  stream >> vec_size;
+  vec.resize(vec_size);
+
+  for (auto& v: vec)
+    stream >> v;
+
+  return stream;
+}
+
+//BinaryStream& operator>>(BinaryStream& stream, SubstitutionModel& sm)
+//{
+//  stream >> sm.base_freqs();
+//  stream >> sm.subst_rates();
+//
+//  return stream;
+//}
+
+/**
+ * Model I/O
+ */
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const SubstitutionModel& sm);
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const Model& m);
+
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, Model& m);
+
+/**
+ * TreeCollection I/O
+ */
+BasicBinaryStream& operator<<(BasicBinaryStream& stream, const TreeCollection& c);
+BasicBinaryStream& operator>>(BasicBinaryStream& stream, TreeCollection& c);
+
+#endif
+
+
diff --git a/src/io/file_io.hpp b/src/io/file_io.hpp
new file mode 100644
index 0000000..5cbbe79
--- /dev/null
+++ b/src/io/file_io.hpp
@@ -0,0 +1,88 @@
+#ifndef RAXML_FILE_IO_HPP_
+#define RAXML_FILE_IO_HPP_
+
+#include <fstream>
+
+#include "../Tree.hpp"
+#include "../bootstrap/BootstrapTree.hpp"
+
+class NewickStream : public std::fstream
+{
+public:
+  NewickStream(std::string fname) : std::fstream(fname, std::ios::out) {};
+  NewickStream(std::string fname, std::ios_base::openmode mode) :
+    std::fstream(fname, mode) {};
+};
+
+class MSAFileStream
+{
+public:
+  MSAFileStream(const std::string& fname) :
+    _fname(fname) {}
+
+  const std::string& fname() const { return _fname; };
+
+private:
+  std::string _fname;
+};
+
+class PhylipStream : public MSAFileStream
+{
+public:
+  PhylipStream(const std::string& fname) : MSAFileStream(fname) {}
+};
+
+class FastaStream : public MSAFileStream
+{
+public:
+  FastaStream(const std::string& fname) : MSAFileStream(fname) {}
+};
+
+class CATGStream : public MSAFileStream
+{
+public:
+  CATGStream(const std::string& fname) : MSAFileStream(fname) {}
+};
+
+
+class RaxmlPartitionStream : public std::fstream
+{
+public:
+  RaxmlPartitionStream(std::string fname) : std::fstream(fname, std::ios::out), _offset(0) {}
+  RaxmlPartitionStream(std::string fname, std::ios_base::openmode mode) :
+    std::fstream(fname, mode), _offset(0) {}
+
+  void reset() { _offset = 0; }
+  void put_range(size_t part_len)
+  {
+    *this << (_offset+1) << "-" << (_offset+part_len);
+    _offset += part_len;
+  }
+
+private:
+  size_t _offset;
+};
+
+NewickStream& operator<<(NewickStream& stream, const pll_utree_t& tree);
+NewickStream& operator<<(NewickStream& stream, const Tree& tree);
+NewickStream& operator>>(NewickStream& stream, Tree& tree);
+
+NewickStream& operator<<(NewickStream& stream, const BootstrapTree& tree);
+//NewickStream& operator>>(NewickStream& stream, BootstrapTree& tree);
+
+PhylipStream& operator>>(PhylipStream& stream, MSA& msa);
+FastaStream& operator>>(FastaStream& stream, MSA& msa);
+CATGStream& operator>>(CATGStream& stream, MSA& msa);
+MSA msa_load_from_file(const std::string &filename, const FileFormat format);
+
+PhylipStream& operator<<(PhylipStream& stream, MSA& msa);
+PhylipStream& operator<<(PhylipStream& stream, PartitionedMSA& msa);
+
+RaxmlPartitionStream& operator>>(RaxmlPartitionStream& stream, PartitionInfo& part_info);
+RaxmlPartitionStream& operator>>(RaxmlPartitionStream& stream, PartitionedMSA& parted_msa);
+
+RaxmlPartitionStream& operator<<(RaxmlPartitionStream& stream, const PartitionInfo& part_info);
+RaxmlPartitionStream& operator<<(RaxmlPartitionStream& stream, const PartitionedMSA& parted_msa);
+
+
+#endif /* RAXML_FILE_IO_HPP_ */
diff --git a/src/io/msa_streams.cpp b/src/io/msa_streams.cpp
new file mode 100644
index 0000000..497ef55
--- /dev/null
+++ b/src/io/msa_streams.cpp
@@ -0,0 +1,278 @@
+#include <stdexcept>
+#include <algorithm>
+#include <map>
+
+#include "file_io.hpp"
+
+using namespace std;
+
+FastaStream& operator>>(FastaStream& stream, MSA& msa)
+{
+  /* open the file */
+  auto file = pll_fasta_open(stream.fname().c_str(), pll_map_fasta);
+  if (!file)
+    throw runtime_error{"Cannot open file " + stream.fname()};
+
+  char * sequence = NULL;
+  char * header = NULL;
+  long sequence_length;
+  long header_length;
+  long sequence_number;
+
+  /* read sequences and make sure they are all of the same length */
+  int sites = 0;
+
+  /* read the first sequence separately, so that the MSA object can be constructed */
+  pll_fasta_getnext(file, &header, &header_length, &sequence, &sequence_length, &sequence_number);
+  sites = sequence_length;
+
+  if (sites == -1 || sites == 0)
+    throw runtime_error{"Unable to read MSA file"};
+
+  msa = MSA(sites);
+
+  msa.append(sequence, header);
+
+  free(sequence);
+  free(header);
+
+  /* read the rest */
+  while (pll_fasta_getnext(file, &header, &header_length, &sequence, &sequence_length, &sequence_number))
+  {
+    if (sites && sites != sequence_length)
+      throw runtime_error{"MSA file does not contain equal size sequences"};
+
+    if (!sites) sites = sequence_length;
+
+    msa.append(sequence, header);
+    free(sequence);
+    free(header);
+  }
+
+  if (pll_errno != PLL_ERROR_FILE_EOF)
+    throw runtime_error{"Error while reading file: " +  stream.fname()};
+
+  pll_fasta_close(file);
+
+  return stream;
+}
+
+PhylipStream& operator>>(PhylipStream& stream, MSA& msa)
+{
+  unsigned int sequence_count;
+  pll_msa_t * pll_msa = pll_phylip_parse_msa(stream.fname().c_str(), &sequence_count);
+  if (pll_msa)
+  {
+    msa = MSA(pll_msa);
+    pll_msa_destroy(pll_msa);
+  }
+  else
+    throw runtime_error{"Unable to parse PHYLIP file: " +  stream.fname()};
+
+  return stream;
+}
+
+PhylipStream& operator<<(PhylipStream& stream, MSA& msa)
+{
+  auto retval = pllmod_msa_save_phylip(msa.pll_msa(), stream.fname().c_str());
+
+  if (!retval)
+    throw runtime_error{pll_errmsg};
+
+  return stream;
+}
+
+PhylipStream& operator<<(PhylipStream& stream, PartitionedMSA& msa)
+{
+  ofstream fs(stream.fname());
+
+  auto taxa = msa.full_msa().size();
+  auto sites = msa.total_length();
+  fs << taxa << " " << sites << endl;
+
+  for (size_t i = 0; i < taxa; ++i)
+  {
+    fs << msa.full_msa().label(i) << " ";
+    for (const auto& p: msa.part_list())
+    {
+      fs << p.msa().at(i);
+    }
+    fs << endl;
+  }
+
+  return stream;
+}
+
+
+CATGStream& operator>>(CATGStream& stream, MSA& msa)
+{
+  ifstream fs;
+  fs.open(stream.fname());
+
+  /* read alignment dimensions */
+  size_t taxa_count, site_count;
+
+  try
+  {
+    fs >> taxa_count >> site_count;
+  }
+  catch(exception& e)
+  {
+    LOG_DEBUG << e.what() << endl;
+    throw runtime_error("Invalid alignment dimensions!");
+  }
+
+  LOG_DEBUG << "CATG: taxa: " << taxa_count << ", sites: " << site_count << endl;
+
+  string dummy(site_count, '*');
+
+  /* read taxa names */
+  try
+  {
+    string taxon_name;
+    for (size_t i = 0; i < taxa_count; ++i)
+    {
+      fs >> taxon_name;
+      msa.append(dummy, taxon_name);
+      LOG_DEBUG << "CATG: taxon " << i << ": " << taxon_name << endl;
+    }
+  }
+  catch (exception& e)
+  {
+    LOG_DEBUG << e.what() << endl;
+    throw runtime_error("Wrong number of taxon labels!");
+  }
+
+  assert (msa.size() == taxa_count);
+
+  /* this is mapping for DNA: CATG -> ACGT, for other datatypes we assume 1:1 mapping */
+  std::vector<size_t> state_map({1, 0, 3, 2});
+
+  string cons_str, prob_str;
+
+  /* read alignment, remember that the matrix is transposed! */
+  for (size_t i = 0; i < msa.num_sites(); ++i)
+  {
+    /* read consensus sequences */
+    fs >> cons_str;
+
+    LOG_DEBUG << "CATG: site " << i << " consesus seq: " << cons_str << endl;
+
+    if (cons_str.length() != msa.size())
+      throw runtime_error("Wrong length of consensus sequence for site " + to_string(i+1) + "!");
+
+    std::vector<double> probs;
+
+    for (size_t j = 0; j < msa.size(); ++j)
+    {
+      msa[j][i] = cons_str[j];
+
+      fs >> prob_str;
+
+      if (msa.states() == 0)
+      {
+        auto states = std::count_if(prob_str.cbegin(), prob_str.cend(),
+                                    [](char c) -> bool { return c == ','; }) + 1;
+        msa.states(states);
+
+        /* see above: for datatypes other than DNA we assume 1:1 mapping */
+        if (states != 4)
+          state_map.clear();
+
+        LOG_DEBUG << "CATG: number of states: " << states << endl;
+      }
+
+      auto site_probs = msa.probs(j, i);
+
+      istringstream ss(prob_str);
+      size_t k = 0;
+      for (string token; getline(ss, token, ','); ++k)
+      {
+        if (state_map.empty())
+          site_probs[k] = stod(token);
+        else
+          site_probs[state_map[k]] = stod(token);
+      }
+
+      if (k != msa.states())
+        throw runtime_error("Wrong number of state probabilities for site " + to_string(i+1) + "!");
+    }
+  }
+
+#ifdef CATG_DEBUG
+  {
+    PhylipStream ps("catgout.phy");
+    ps << msa;
+  }
+#endif
+
+  return stream;
+}
+
+MSA msa_load_from_file(const std::string &filename, const FileFormat format)
+{
+  MSA msa;
+
+  typedef pair<FileFormat, string> FormatNamePair;
+  static vector<FormatNamePair> msa_formats = { {FileFormat::phylip, "PHYLIP"},
+                                                 {FileFormat::fasta, "FASTA"},
+                                                 {FileFormat::catg, "CATG"},
+                                                 {FileFormat::vcf, "VCF"},
+                                                 {FileFormat::binary, "RAxML-binary"} };
+
+  auto fmt_begin = msa_formats.cbegin();
+  auto fmt_end = msa_formats.cend();
+
+  if (format != FileFormat::autodetect)
+  {
+    fmt_begin = std::find_if(msa_formats.begin(), msa_formats.end(),
+                             [format](const FormatNamePair& p) { return p.first == format; });
+    assert(fmt_begin != msa_formats.cend());
+    fmt_end = fmt_begin + 1;
+  }
+
+  for (; fmt_begin != fmt_end; fmt_begin++)
+  {
+    try
+    {
+      switch (fmt_begin->first)
+      {
+        case FileFormat::fasta:
+        {
+          FastaStream s(filename);
+          s >> msa;
+          return msa;
+          break;
+        }
+        case FileFormat::phylip:
+        {
+          PhylipStream s(filename);
+          s >> msa;
+          return msa;
+          break;
+        }
+        case FileFormat::catg:
+        {
+          CATGStream s(filename);
+          s >> msa;
+          return msa;
+          break;
+        }
+        default:
+          throw runtime_error("Unsupported MSA file format!");
+      }
+    }
+    catch(exception &e)
+    {
+      if (format == FileFormat::autodetect)
+        LOG_DEBUG << "Failed to load as " << fmt_begin->second << ": " << e.what() << endl;
+      else
+        throw runtime_error("Error loading MSA: " + string(e.what()));
+    }
+  }
+
+  throw runtime_error("Error loading MSA: cannot parse any format supported by RAxML-NG!");
+}
+
+
+
diff --git a/src/io/part_info.cpp b/src/io/part_info.cpp
new file mode 100644
index 0000000..505d447
--- /dev/null
+++ b/src/io/part_info.cpp
@@ -0,0 +1,79 @@
+#include "file_io.hpp"
+
+RaxmlPartitionStream& operator>>(RaxmlPartitionStream& stream, PartitionInfo& part_info)
+{
+  std::ostringstream strstream;
+  std::streambuf * pbuf = strstream.rdbuf();
+
+  bool model_set = false;
+  bool eof = false;
+  while (!eof)
+  {
+    int c = stream.get();
+    switch (c)
+    {
+      case ' ':
+      case '\t':
+        /* ignore whitespace */
+        break;
+      case '\r':
+      case '\n':
+      case EOF:
+        part_info.range_string(strstream.str());
+        eof = true;
+        break;
+      case ',':
+        if (!model_set)
+        {
+          part_info.model(Model(strstream.str()));
+          strstream.str("");
+          strstream.clear();
+          model_set = true;
+        }
+        else
+          pbuf->sputc(c);
+        break;
+      case '=':
+        part_info.name(strstream.str());
+        strstream.str("");
+        strstream.clear();
+        break;
+      default:
+        pbuf->sputc(c);
+    }
+  }
+
+  return stream;
+}
+
+RaxmlPartitionStream& operator>>(RaxmlPartitionStream& stream, PartitionedMSA& parted_msa)
+{
+  while (stream.peek() != EOF)
+  {
+    PartitionInfo pinfo;
+    stream >> pinfo;
+    parted_msa.append_part_info(std::move(pinfo));
+  }
+  return stream;
+}
+
+RaxmlPartitionStream& operator<<(RaxmlPartitionStream& stream, const PartitionInfo& part_info)
+{
+  stream << part_info.model().to_string() << ", ";
+  stream << part_info.name() << " = ";
+  stream.put_range(part_info.msa().length());
+  stream << std::endl;
+  return stream;
+}
+
+RaxmlPartitionStream& operator<<(RaxmlPartitionStream& stream, const PartitionedMSA& parted_msa)
+{
+  stream.reset();
+  for (const auto& pinfo: parted_msa.part_list())
+    stream << pinfo;
+
+  return stream;
+}
+
+
+
diff --git a/src/log.cpp b/src/log.cpp
new file mode 100644
index 0000000..d3b3498
--- /dev/null
+++ b/src/log.cpp
@@ -0,0 +1,88 @@
+#include "log.hpp"
+#include "common.h"
+
+using namespace std;
+
+void LogStream::add_stream(std::ostream* stream)
+{
+  if (stream)
+  {
+    *stream << fixed;
+    _streams.push_back(stream);
+  }
+}
+
+Logging::Logging() : _log_level(LogLevel::info), _logfile()
+{
+  /* add file stream to the list, even though it's to attached to a file yet */
+  _full_stream.add_stream(&_logfile);
+}
+
+Logging& Logging::instance()
+{
+  static Logging instance;
+
+  return instance;
+}
+
+LogStream& Logging::logstream(LogLevel level)
+{
+  if (ParallelContext::master() && level <= _log_level)
+    return _full_stream;
+  else
+    return _empty_stream;
+}
+
+void Logging::set_log_filename(const std::string& fname)
+{
+  _logfile.open(fname, ofstream::out);
+}
+
+void Logging::add_log_stream(std::ostream* stream)
+{
+  if (stream)
+    _full_stream.add_stream(stream);
+}
+
+void Logging::set_log_level(LogLevel level)
+{
+  _log_level = level;
+}
+
+Logging& logger()
+{
+  return Logging::instance();
+}
+
+TimeStamp::TimeStamp() : secs(sysutil_elapsed_seconds())
+{
+};
+
+LogStream& operator<<(LogStream& logstream, std::ostream& (*pf)(std::ostream&))
+{
+  for (auto s: logstream.streams())
+    *s << pf;
+
+  return logstream;
+}
+
+LogStream& operator<<(LogStream& logstream, const TimeStamp& ts)
+{
+  const unsigned int hh = ts.secs / 3600;
+  const unsigned int mm = (ts.secs - hh * 3600) / 60;
+  const unsigned int ss = (ts.secs - hh * 3600 - mm * 60);
+
+  logstream << setfill('0') << setw(2) << hh << ":" <<
+                      setw(2) << mm << ":" <<
+                      setw(2) << ss;
+
+  return logstream;
+}
+
+LogStream& operator<<(LogStream& logstream, const ProgressInfo& prog)
+{
+
+  logstream << "[" << TimeStamp() << " " << FMT_LH(prog.loglh) << "] ";
+
+  return logstream;
+}
diff --git a/src/log.hpp b/src/log.hpp
new file mode 100644
index 0000000..d78b39d
--- /dev/null
+++ b/src/log.hpp
@@ -0,0 +1,109 @@
+#ifndef RAXML_LOG_HPP_
+#define RAXML_LOG_HPP_
+
+#include <fstream>
+#include <vector>
+#include <memory>
+
+enum class LogLevel
+{
+  none = 0,
+  error,
+  warning,
+  info,
+  progress,
+  verbose,
+  debug
+};
+
+struct TimeStamp
+{
+  TimeStamp();
+
+  double secs;
+};
+
+struct ProgressInfo
+{
+  ProgressInfo(double loglh) : loglh(loglh) {};
+
+  double loglh;
+};
+
+typedef std::vector<std::ostream*> StreamList;
+
+class LogStream
+{
+public:
+  LogStream() {};
+  LogStream(const StreamList& streams) { _streams = streams; };
+
+  StreamList& streams() { return _streams;};
+
+  void add_stream(std::ostream* stream);
+
+private:
+  StreamList _streams;
+};
+
+class Logging
+{
+public:
+  static Logging& instance();
+
+  LogStream& logstream(LogLevel level);
+  LogLevel log_level() const;
+
+  void set_log_filename(const std::string& fname);
+  void add_log_stream(std::ostream* stream);
+  void set_log_level(LogLevel level);
+
+  /* singleton: remove copy/move constructors and assignment ops */
+  Logging(const Logging& other) = delete;
+  Logging(Logging&& other) = delete;
+  Logging& operator=(const Logging& other) = delete;
+  Logging& operator=(Logging&& other) = delete;
+
+private:
+  Logging();
+
+  LogLevel _log_level;
+  std::ofstream _logfile;
+  LogStream _empty_stream;
+  LogStream _full_stream;
+};
+
+Logging& logger();
+
+#define RAXML_LOG(level) logger().logstream(level)
+
+#define LOG_ERROR RAXML_LOG(LogLevel::error)
+#define LOG_WARN RAXML_LOG(LogLevel::warning)
+#define LOG_INFO RAXML_LOG(LogLevel::info)
+#define LOG_DEBUG RAXML_LOG(LogLevel::debug)
+#define LOG_PROGR RAXML_LOG(LogLevel::progress)
+#define LOG_VERB RAXML_LOG(LogLevel::verbose)
+
+#define LOG_INFO_TS LOG_INFO << "[" << TimeStamp() << "] "
+#define LOG_VERB_TS LOG_VERB << "[" << TimeStamp() << "] "
+#define LOG_PROGRESS(loglh) LOG_PROGR << ProgressInfo(loglh)
+
+#define FMT_LH(loglh) setprecision(6) << loglh
+#define FMT_PREC3(val) setprecision(3) << val
+
+template <class T>
+LogStream& operator<<(LogStream& logstream, const T& object)
+{
+  for (auto s: logstream.streams())
+    *s << object;
+
+  return logstream;
+}
+
+LogStream& operator<<(LogStream& logstream, std::ostream& (*pf)(std::ostream&));
+
+LogStream& operator<<(LogStream& logstream, const ProgressInfo& prog);
+
+LogStream& operator<<(LogStream& logstream, const TimeStamp& ts);
+
+#endif /* RAXML_LOG_HPP_ */
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..d30a6e6
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,798 @@
+/*
+    Copyright (C) 2017 Alexey Kozlov, Alexandros Stamatakis, Diego Darriba, Tomas Flouri
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+    Contact: Alexey Kozlov <Alexey.Kozlov at h-its.org>,
+    Heidelberg Institute for Theoretical Studies,
+    Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
+*/
+#include <algorithm>
+#include <chrono>
+
+#include <memory>
+
+#include "version.h"
+#include "common.h"
+#include "MSA.hpp"
+#include "Options.hpp"
+#include "CommandLineParser.hpp"
+#include "Optimizer.hpp"
+#include "PartitionInfo.hpp"
+#include "TreeInfo.hpp"
+#include "io/file_io.hpp"
+#include "io/binary_io.hpp"
+#include "ParallelContext.hpp"
+#include "LoadBalancer.hpp"
+#include "bootstrap/BootstrapGenerator.hpp"
+
+using namespace std;
+
+typedef vector<Tree> TreeList;
+struct RaxmlInstance
+{
+  Options opts;
+  PartitionedMSA parted_msa;
+  TreeList start_trees;
+  BootstrapReplicateList bs_reps;
+  PartitionAssignmentList proc_part_assign;
+  unique_ptr<BootstrapTree> bs_tree;
+
+  /* this is just a dummy random tree used for convenience, e,g, if we need tip labels or
+   * just 'any' valid tree for the alignment at hand */
+  Tree random_tree;
+};
+
+void print_banner()
+{
+  LOG_INFO << endl << "RAxML-NG v. " << RAXML_VERSION << " released on " << RAXML_DATE <<
+      " by The Exelixis Lab." << endl;
+  LOG_INFO << "Authors: Alexey Kozlov, Alexandros Stamatakis, Diego Darriba, "
+              "Tomas Flouri, Benoit Morel." << endl;
+  LOG_INFO << "Latest version: https://github.com/amkozlov/raxml-ng" << endl;
+  LOG_INFO << "Questions/problems/suggestions? "
+              "Please visit: https://groups.google.com/forum/#!forum/raxml" << endl;
+  LOG_INFO << endl << "WARNING: This is a BETA release, please use at your own risk!" << endl << endl;
+}
+
+void init_part_info(RaxmlInstance& instance)
+{
+  auto& opts = instance.opts;
+  auto& parted_msa = instance.parted_msa;
+
+  /* check if model is a file */
+  if (sysutil_file_exists(opts.model_file))
+  {
+    // read partition definitions from file
+    RaxmlPartitionStream partfile(opts.model_file, ios::in);
+    partfile >> parted_msa;
+
+//    DBG("partitions found: %d\n", useropt->part_count);
+  }
+  else
+  {
+    // create and init single pseudo-partition
+    parted_msa.emplace_part_info("noname", opts.data_type, opts.model_file);
+  }
+
+  /* make sure that linked branch length mode is set for unpartitioned alignments */
+  if (parted_msa.part_count() == 1)
+    opts.brlen_linkage = PLLMOD_TREE_BRLEN_LINKED;
+
+  int lg4x_count = 0;
+
+  for (const auto& pinfo: parted_msa.part_list())
+  {
+//    LOG_INFO << "|" << pinfo.name() << "|   |" << pinfo.model().to_string() << "|   |" <<
+//        pinfo.range_string() << "|" << endl;
+
+    if (pinfo.model().name() == "LG4X")
+      lg4x_count++;
+  }
+
+  if (parted_msa.part_count() > 1 && lg4x_count > 0 && opts.brlen_linkage == PLLMOD_TREE_BRLEN_LINKED)
+  {
+    throw runtime_error("LG4X model is not supported in linked branch length mode, sorry :(");
+  }
+}
+
+void check_msa(RaxmlInstance& instance)
+{
+  LOG_VERB_TS << "Checking the alignment...\n";
+
+  const auto& full_msa = instance.parted_msa.full_msa();
+  const auto pll_msa = full_msa.pll_msa();
+
+  unsigned long stats_mask = PLLMOD_MSA_STATS_DUP_TAXA | PLLMOD_MSA_STATS_DUP_SEQS;
+
+  pllmod_msa_stats_t * stats = pllmod_msa_compute_stats(pll_msa,
+                                                        4,
+                                                        pll_map_nt, // map is not used here
+                                                        NULL,
+                                                        stats_mask);
+
+  if (!stats)
+    throw runtime_error(pll_errmsg);
+
+  if (stats->dup_taxa_pairs_count > 0)
+  {
+    LOG_ERROR << "\nERROR: Duplicate sequence names found: " << stats->dup_taxa_pairs_count << endl;
+    for (unsigned long c = 0; c < stats->dup_taxa_pairs_count; ++c)
+    {
+      const unsigned long idx1 = stats->dup_taxa_pairs[c*2];
+      const unsigned long idx2 = stats->dup_taxa_pairs[c*2+1];
+      LOG_ERROR << "ERROR: Sequences " << idx1 << " and " << idx2 << " have identical name: " <<
+          pll_msa->label[idx1] << endl;
+    }
+    throw runtime_error("Please fix your alignment!");
+  }
+
+  if (stats->dup_seqs_pairs_count > 0)
+  {
+    LOG_WARN << "\nWARNING: Duplicate sequences found: " << stats->dup_seqs_pairs_count << endl;
+    for (unsigned long c = 0; c < stats->dup_seqs_pairs_count; ++c)
+    {
+      const unsigned long idx1 = stats->dup_seqs_pairs[c*2];
+      const unsigned long idx2 = stats->dup_seqs_pairs[c*2+1];
+      LOG_WARN << "WARNING: Sequences " << pll_msa->label[idx1] << " and " <<
+          pll_msa->label[idx2] << " are exactly identical!" << endl;
+    }
+  }
+
+  pllmod_msa_destroy_stats(stats);
+
+  std::set<size_t> gap_seqs;
+  size_t total_gap_cols = 0;
+  size_t part_num = 0;
+  for (auto& pinfo: instance.parted_msa.part_list())
+  {
+    stats_mask = PLLMOD_MSA_STATS_GAP_SEQS | PLLMOD_MSA_STATS_GAP_COLS;
+
+    pllmod_msa_stats_t * stats = pinfo.compute_stats(stats_mask);
+
+    if (stats->gap_cols_count > 0)
+    {
+      total_gap_cols += stats->gap_cols_count;
+      std::vector<size_t> gap_cols(stats->gap_cols, stats->gap_cols + stats->gap_cols_count);
+      pinfo.msa().remove_sites(gap_cols);
+    }
+
+    std::set<size_t> cur_gap_seq(stats->gap_seqs, stats->gap_seqs + stats->gap_seqs_count);
+
+    if (!part_num)
+    {
+      gap_seqs = cur_gap_seq;
+    }
+    else
+    {
+      for (auto e: gap_seqs)
+      {
+        if (cur_gap_seq.find(e) == cur_gap_seq.end())
+          gap_seqs.erase(e);
+      }
+    }
+
+    pllmod_msa_destroy_stats(stats);
+
+    part_num++;
+  }
+
+  if (total_gap_cols > 0)
+  {
+    LOG_WARN << "\nWARNING: Fully undetermined columns found: " << total_gap_cols << endl;
+//    for (unsigned long c = 0; c < stats->gap_cols_count; ++c)
+//      LOG_VERB << "WARNING: Column " << stats->gap_cols[c]+1 << " contains only gaps!" << endl;
+  }
+
+  if (!gap_seqs.empty())
+  {
+   LOG_WARN << "\nWARNING: Fully undetermined sequences found: " << stats->gap_seqs_count << endl;
+   for (auto c : gap_seqs)
+     LOG_VERB << "WARNING: Sequence " << pll_msa->label[c] << " contains only gaps!" << endl;
+  }
+
+
+  if (total_gap_cols > 0 || !gap_seqs.empty() > 0)
+  {
+    // save reduced MSA and partition files
+    auto reduced_msa_fname = instance.opts.output_fname("reduced.phy");
+    PhylipStream ps(reduced_msa_fname);
+
+    ps << instance.parted_msa;
+
+    LOG_INFO << "\nNOTE: Reduced alignment (with gap-only sequences and columns removed) was printed to:\n";
+    LOG_INFO << sysutil_realpath(reduced_msa_fname) << endl;
+
+    // save reduced partition file
+    if (sysutil_file_exists(instance.opts.model_file))
+    {
+      auto reduced_part_fname = instance.opts.output_fname("reduced.partition");
+      RaxmlPartitionStream ps(reduced_part_fname, ios::out);
+
+      ps << instance.parted_msa;
+
+      LOG_INFO << "\nNOTE: The corresponding reduced partition file was printed to:\n";
+      LOG_INFO << sysutil_realpath(reduced_part_fname) << endl;
+    }
+  }
+
+}
+
+void load_msa(RaxmlInstance& instance)
+{
+  const auto& opts = instance.opts;
+  auto& parted_msa = instance.parted_msa;
+
+  LOG_INFO_TS << "Reading alignment from file: " << opts.msa_file << endl;
+
+  /* load MSA */
+  auto msa = msa_load_from_file(opts.msa_file, opts.msa_format);
+
+  LOG_INFO_TS << "Loaded alignment with " << msa.size() << " taxa and " <<
+      msa.num_sites() << " sites" << endl;
+
+  if (msa.probabilistic() && opts.use_prob_msa)
+  {
+    instance.opts.use_pattern_compression = false;
+    instance.opts.use_tip_inner = false;
+
+    if (parted_msa.part_count() > 1)
+      throw runtime_error("Partitioned probabilistic alignments are not supported yet, sorry...");
+  }
+  else
+    instance.opts.use_prob_msa = false;
+
+  parted_msa.full_msa(std::move(msa));
+
+  LOG_VERB_TS << "Extracting partitions... " << endl;
+
+  parted_msa.split_msa();
+
+  /* check alignment */
+  check_msa(instance);
+
+  if (opts.use_pattern_compression)
+    parted_msa.compress_patterns();
+
+  parted_msa.set_model_empirical_params();
+
+  LOG_INFO << endl;
+
+  LOG_INFO << "Alignment comprises " << parted_msa.part_count() << " partitions and " <<
+      parted_msa.total_length() << " patterns\n" << endl;
+
+  LOG_INFO << parted_msa;
+
+  LOG_INFO << endl;
+}
+
+Tree generate_tree(const RaxmlInstance& instance, StartingTree type)
+{
+  Tree tree;
+
+  const auto& opts = instance.opts;
+  const auto& parted_msa = instance.parted_msa;
+  const auto& msa = parted_msa.full_msa();
+
+  switch (type)
+  {
+    case StartingTree::user:
+    {
+      assert(!opts.tree_file.empty());
+
+      /* parse the unrooted binary tree in newick format, and store the number
+         of tip nodes in tip_nodes_count */
+      tree = Tree::loadFromFile(opts.tree_file);
+
+      LOG_DEBUG << "Loaded user starting tree with " << tree.num_tips() << " taxa from: "
+                           << opts.tree_file << endl;
+
+      if (msa.size() > tree.num_tips())
+        sysutil_fatal("Alignment file contains more sequences than expected");
+      else if (msa.size() != tree.num_tips())
+        sysutil_fatal("Some taxa are missing from the alignment file");
+
+      break;
+    }
+    case StartingTree::random:
+      /* no starting tree provided, generate a random one */
+      assert(opts.command != Command::evaluate);
+
+      LOG_DEBUG << "Generating a random starting tree with " << msa.size() << " taxa" << endl;
+
+      tree = Tree::buildRandom(msa);
+
+      break;
+    case StartingTree::parsimony:
+    {
+      LOG_DEBUG << "Generating a parsimony starting tree with " << msa.size() << " taxa" << endl;
+
+      unsigned int score;
+      tree = Tree::buildParsimony(parted_msa, rand(), opts.simd_arch, &score);
+
+      LOG_DEBUG << "Parsimony score of the starting tree: " << score << endl;
+
+      break;
+    }
+    default:
+      sysutil_fatal("Unknown starting tree type: %d\n", opts.start_tree);
+  }
+
+  assert(!tree.empty());
+
+  return tree;
+}
+
+void load_checkpoint(RaxmlInstance& instance, CheckpointManager& cm)
+{
+  /* init checkpoint and set to the manager */
+  {
+    Checkpoint ckp;
+    for (size_t p = 0; p < instance.parted_msa.part_count(); ++p)
+      ckp.models[p] = instance.parted_msa.part_info(p).model();
+
+    // this is a "template" tree, which provides tip labels and node ids
+    ckp.tree = instance.random_tree;
+
+    cm.checkpoint(move(ckp));
+  }
+
+  if (!instance.opts.redo_mode && cm.read())
+  {
+    const auto& ckp = cm.checkpoint();
+    for (const auto& m: ckp.models)
+      instance.parted_msa.model(m.first, m.second);
+
+    LOG_INFO_TS << "NOTE: Resuming execution from checkpoint " <<
+        "(logLH: " << ckp.loglh() <<
+        ", ML trees: " << ckp.ml_trees.size() <<
+        ", bootstraps: " << ckp.bs_trees.size() <<
+        ")"
+        << endl;
+  }
+}
+
+void build_start_trees(RaxmlInstance& instance, CheckpointManager& cm)
+{
+  const auto& opts = instance.opts;
+  const auto& parted_msa = instance.parted_msa;
+  const auto& msa = parted_msa.full_msa();
+
+  switch (opts.start_tree)
+  {
+    case StartingTree::user:
+      LOG_INFO_TS << "Loading user starting tree(s) from: " << opts.tree_file << endl;
+      break;
+    case StartingTree::random:
+      LOG_INFO_TS << "Generating random starting tree(s) with " << msa.size() << " taxa" << endl;
+      break;
+    case StartingTree::parsimony:
+      LOG_INFO_TS << "Generating parsimony starting tree(s) with " << msa.size() << " taxa" << endl;
+      break;
+    default:
+      assert(0);
+  }
+
+  for (size_t i = 0; i < opts.num_searches; ++i)
+  {
+    auto tree = generate_tree(instance, opts.start_tree);
+
+    // TODO: skip generation
+    if (i < cm.checkpoint().ml_trees.size())
+      continue;
+
+    /* fix missing branch lengths */
+    tree.fix_missing_brlens();
+
+    /* make sure tip indices are consistent between MSA and pll_tree */
+    tree.reset_tip_ids(msa.label_id_map());
+
+    instance.start_trees.emplace_back(move(tree));
+  }
+
+  if (::ParallelContext::master_rank())
+  {
+    NewickStream nw_start(opts.start_tree_file());
+    for (auto const& tree: instance.start_trees)
+      nw_start << tree;
+  }
+}
+
+void balance_load(RaxmlInstance& instance)
+{
+  PartitionAssignment part_sizes;
+
+  /* init list of partition sizes */
+  size_t i = 0;
+  for (auto const& pinfo: instance.parted_msa.part_list())
+  {
+    part_sizes.assign_sites(i, 0, pinfo.msa().length());
+    ++i;
+  }
+
+//  SimpleLoadBalancer balancer;
+  KassianLoadBalancer balancer;
+
+  instance.proc_part_assign = balancer.get_all_assignments(part_sizes, ParallelContext::num_procs());
+
+  LOG_INFO_TS << "Data distribution: " << PartitionAssignmentStats(instance.proc_part_assign) << endl;
+  LOG_VERB << endl << instance.proc_part_assign;
+}
+
+void generate_bootstraps(RaxmlInstance& instance, const Checkpoint& checkp)
+{
+  if (instance.opts.command == Command::bootstrap || instance.opts.command == Command::all)
+  {
+    BootstrapGenerator bg;
+    for (size_t b = 0; b < instance.opts.num_bootstraps; ++b)
+    {
+      auto seed = rand();
+
+      /* check if this BS was already computed in the previous run and saved in checkpoint */
+      if (b < checkp.bs_trees.size())
+        continue;
+
+      instance.bs_reps.emplace_back(bg.generate(instance.parted_msa, seed));
+    }
+  }
+}
+
+void draw_bootstrap_support(RaxmlInstance& instance, const Checkpoint& checkp)
+{
+  Tree tree = checkp.tree;
+  tree.topology(checkp.ml_trees.best_topology());
+
+  instance.bs_tree.reset(new BootstrapTree(tree));
+
+  for (auto bs: checkp.bs_trees)
+  {
+    tree.topology(bs.second);
+    instance.bs_tree->add_bootstrap_tree(tree);
+  }
+  instance.bs_tree->calc_support();
+}
+
+void print_final_output(const RaxmlInstance& instance, const Checkpoint& checkp)
+{
+  auto const& opts = instance.opts;
+
+  LOG_INFO << "\nOptimized model parameters:" << endl;
+
+  assert(checkp.models.size() == instance.parted_msa.part_count());
+
+  for (size_t p = 0; p < instance.parted_msa.part_count(); ++p)
+  {
+    LOG_INFO << "\n   Partition " << p << ": " << instance.parted_msa.part_info(p).name().c_str() << endl;
+    LOG_INFO << checkp.models.at(p);
+  }
+
+  if (opts.command == Command::search || opts.command == Command::all)
+  {
+    auto best = checkp.ml_trees.best();
+
+    LOG_INFO << "\nFinal LogLikelihood: " << FMT_LH(best->first) << endl << endl;
+
+    Tree best_tree = checkp.tree;
+
+    best_tree.topology(best->second);
+
+    NewickStream nw_result(opts.best_tree_file());
+    nw_result << best_tree;
+
+    if (checkp.ml_trees.size() > 1)
+    {
+      NewickStream nw(opts.ml_trees_file(), std::ios::out);
+      Tree ml_tree = checkp.tree;
+      for (auto topol: checkp.ml_trees)
+      {
+        ml_tree.topology(topol.second);
+        nw << ml_tree;
+      }
+
+      LOG_INFO << "All ML trees saved to: " << sysutil_realpath(opts.ml_trees_file()) << endl;
+    }
+
+    LOG_INFO << "Best ML tree saved to: " << sysutil_realpath(opts.best_tree_file()) << endl;
+
+    if (opts.command == Command::all)
+    {
+      assert(instance.bs_tree);
+
+      NewickStream nw(opts.support_tree_file(), std::ios::out);
+      nw << *instance.bs_tree;
+
+      LOG_INFO << "Best ML tree with bootstrap support values saved to: " <<
+          sysutil_realpath(opts.support_tree_file()) << endl;
+    }
+  }
+
+  if (opts.command == Command::bootstrap || opts.command == Command::all)
+  {
+    // TODO now only master process writes the output, this will have to change with
+    // coarse-grained parallelization scheme (parallel start trees/bootstraps)
+//    NewickStream nw(opts.bootstrap_trees_file(), std::ios::out | std::ios::app);
+    NewickStream nw(opts.bootstrap_trees_file(), std::ios::out);
+
+    Tree bs_tree = checkp.tree;
+    for (auto topol: checkp.bs_trees)
+    {
+      bs_tree.topology(topol.second);
+      nw << bs_tree;
+    }
+
+    LOG_INFO << "Bootstrap trees saved to: " << sysutil_realpath(opts.bootstrap_trees_file()) << endl;
+  }
+
+  LOG_INFO << "\nExecution log saved to: " << sysutil_realpath(opts.log_file()) << endl;
+
+  LOG_INFO << "\nElapsed time: " << FMT_PREC3(sysutil_elapsed_seconds()) << " seconds\n";
+  if (checkp.elapsed_seconds > 0.)
+  {
+    LOG_INFO << "Total analysis time: " <<
+                FMT_PREC3(checkp.elapsed_seconds + sysutil_elapsed_seconds()) << " seconds\n";
+  }
+  LOG_INFO << endl;
+}
+
+void thread_main(const RaxmlInstance& instance, CheckpointManager& cm)
+{
+  unique_ptr<TreeInfo> treeinfo;
+
+//  printf("thread %lu / %lu\n", ParallelContext::thread_id(), ParallelContext::num_procs());
+
+  /* wait until master thread prepares all global data */
+  ParallelContext::thread_barrier();
+
+  auto const& master_msa = instance.parted_msa;
+  auto const& opts = instance.opts;
+
+  /* get partitions assigned to the current thread */
+  auto const& part_assign = instance.proc_part_assign.at(ParallelContext::proc_id());
+
+  if ((opts.command == Command::search || opts.command == Command::all) &&
+      !instance.start_trees.empty())
+  {
+
+    LOG_INFO << "\nStarting ML tree search with " << opts.num_searches <<
+        " distinct starting trees" << endl << endl;
+
+    size_t start_tree_num = cm.checkpoint().ml_trees.size();
+    bool use_ckp_tree = cm.checkpoint().search_state.step != CheckpointStep::start;
+    for (const auto& tree: instance.start_trees)
+    {
+      assert(!tree.empty());
+
+      start_tree_num++;
+
+      if (use_ckp_tree)
+      {
+        treeinfo.reset(new TreeInfo(opts, cm.checkpoint().tree, master_msa, part_assign));
+        use_ckp_tree = false;
+      }
+      else
+        treeinfo.reset(new TreeInfo(opts, tree, master_msa, part_assign));
+
+  //    if (!treeinfo)
+  //      treeinfo.reset(new TreeInfo(opts, tree, master_msa, part_assign));
+  //    else
+  //      treeinfo->tree(tree);
+
+      Optimizer optimizer(opts);
+      if (opts.command == Command::search || opts.command == Command::all)
+      {
+        optimizer.optimize_topology(*treeinfo, cm);
+      }
+      else
+      {
+        LOG_INFO << "\nInitial LogLikelihood: " << FMT_LH(treeinfo->loglh()) << endl << endl;
+        optimizer.optimize(*treeinfo);
+      }
+
+      LOG_PROGR << endl;
+      LOG_INFO_TS << "ML tree search #" << start_tree_num <<
+          ", logLikelihood: " << FMT_LH(cm.checkpoint().loglh()) << endl;
+      LOG_PROGR << endl;
+
+      cm.save_ml_tree();
+      cm.reset_search_state();
+    }
+  }
+
+  ParallelContext::thread_barrier();
+
+  if (!instance.bs_reps.empty())
+  {
+    if (opts.command == Command::all)
+    {
+      LOG_INFO << endl;
+      LOG_INFO_TS << "ML tree search completed, best tree logLH: " <<
+          FMT_LH(cm.checkpoint().ml_trees.best_score()) << endl << endl;
+    }
+
+    LOG_INFO_TS << "Starting bootstrapping analysis with " << opts.num_bootstraps
+             << " replicates." << endl << endl;
+  }
+
+  /* infer bootstrap trees if needed */
+  size_t bs_num = cm.checkpoint().bs_trees.size();
+  for (const auto bs: instance.bs_reps)
+  {
+    ++bs_num;
+
+//    Tree tree = Tree::buildRandom(master_msa.full_msa());
+    /* for now, use the same random tree for all bootstraps */
+    const Tree& tree = instance.random_tree;
+    treeinfo.reset(new TreeInfo(opts, tree, master_msa, part_assign, bs.site_weights));
+
+    Optimizer optimizer(opts);
+    optimizer.optimize_topology(*treeinfo, cm);
+
+    LOG_PROGR << endl;
+    LOG_INFO_TS << "Bootstrap tree #" << bs_num <<
+                ", logLikelihood: " << FMT_LH(cm.checkpoint().loglh()) << endl;
+    LOG_PROGR << endl;
+
+    cm.save_bs_tree();
+    cm.reset_search_state();
+  }
+
+  ParallelContext::thread_barrier();
+}
+
+void master_main(RaxmlInstance& instance, CheckpointManager& cm)
+{
+  auto const& opts = instance.opts;
+  auto& parted_msa = instance.parted_msa;
+
+  init_part_info(instance);
+
+  load_msa(instance);
+
+  /* init template tree */
+  instance.random_tree = generate_tree(instance, StartingTree::random);
+
+  /* load checkpoint */
+  load_checkpoint(instance, cm);
+
+  /* load/create starting tree */
+  build_start_trees(instance, cm);
+
+  LOG_VERB << endl << "Initial model parameters:" << endl;
+  for (size_t p = 0; p < parted_msa.part_count(); ++p)
+  {
+    LOG_VERB << "   Partition: " << parted_msa.part_info(p).name() << endl <<
+        parted_msa.model(p) << endl;
+  }
+
+  /* run load balancing algorithm */
+  balance_load(instance);
+
+  /* generate bootstrap replicates */
+  generate_bootstraps(instance, cm.checkpoint());
+
+  thread_main(instance, cm);
+
+  if (ParallelContext::master_rank())
+  {
+    if (opts.command == Command::all)
+      draw_bootstrap_support(instance, cm.checkpoint());
+
+    print_final_output(instance, cm.checkpoint());
+
+    /* analysis finished successfully, remove checkpoint file */
+    cm.remove();
+  }
+}
+
+void clean_exit(int retval)
+{
+  ParallelContext::finalize(retval != EXIT_SUCCESS);
+  exit(retval);
+}
+
+int main(int argc, char** argv)
+{
+  int retval = EXIT_SUCCESS;
+
+  RaxmlInstance instance;
+
+  ParallelContext::init_mpi(argc, argv);
+
+  instance.opts.num_ranks = ParallelContext::num_ranks();
+
+  logger().add_log_stream(&cout);
+
+  CommandLineParser cmdline;
+  try
+  {
+    cmdline.parse_options(argc, argv, instance.opts);
+  }
+  catch (OptionException &e)
+  {
+    LOG_INFO << "ERROR: " << e.what() << std::endl;
+    clean_exit(EXIT_FAILURE);
+  }
+
+  /* handle trivial commands first */
+  switch (instance.opts.command)
+  {
+    case Command::help:
+      print_banner();
+      cmdline.print_help();
+      clean_exit(EXIT_SUCCESS);
+      break;
+    case Command::version:
+      print_banner();
+      clean_exit(EXIT_SUCCESS);
+      break;
+    default:
+      break;
+  }
+
+  /* now get to the real stuff */
+  srand(instance.opts.random_seed);
+  logger().set_log_level(instance.opts.log_level);
+  logger().set_log_filename(instance.opts.log_file());
+
+  print_banner();
+  LOG_INFO << instance.opts;
+
+  switch (instance.opts.command)
+  {
+    case Command::evaluate:
+    case Command::search:
+    case Command::bootstrap:
+    case Command::all:
+    {
+      try
+      {
+        if (instance.opts.redo_mode)
+        {
+          LOG_WARN << "WARNING: Running in REDO mode: existing checkpoints are ignored, "
+              "and all result files will be overwritten!" << endl << endl;
+        }
+        else
+        {
+          if (instance.opts.result_files_exist())
+            throw runtime_error("Result files for the run with prefix `" +
+                                (instance.opts.outfile_prefix.empty() ?
+                                    instance.opts.msa_file : instance.opts.outfile_prefix) +
+                                "` already exist!\n"
+                                "Please either choose a new prefix, remove old files, or add "
+                                "--redo command line switch to overwrite them.");
+        }
+
+        CheckpointManager cm(instance.opts.checkp_file());
+        ParallelContext::init_pthreads(instance.opts, std::bind(thread_main,
+                                                                std::cref(instance),
+                                                                std::ref(cm)));
+
+        master_main(instance, cm);
+      }
+      catch(exception& e)
+      {
+        LOG_ERROR << endl << "ERROR: " << e.what() << endl << endl;
+        retval = EXIT_FAILURE;
+      }
+      break;
+    }
+    case Command::none:
+    default:
+      LOG_ERROR << "Unknown command!" << endl;
+      retval = EXIT_FAILURE;
+  }
+
+  clean_exit(retval);
+  return retval;
+}
diff --git a/src/sysutil.cpp b/src/sysutil.cpp
new file mode 100644
index 0000000..513038f
--- /dev/null
+++ b/src/sysutil.cpp
@@ -0,0 +1,274 @@
+#include <cpuid.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#include <chrono>
+
+#include "common.h"
+
+using namespace std;
+
+chrono::time_point<chrono::system_clock> start_time = chrono::system_clock::now();
+
+void sysutil_fatal(const char * format, ...)
+{
+  va_list argptr;
+  va_start(argptr, format);
+  vfprintf(stderr, format, argptr);
+  va_end(argptr);
+  fprintf(stderr, "\n");
+  exit(EXIT_FAILURE);
+}
+
+void sysutil_fatal_libpll()
+{
+  sysutil_fatal("ERROR(%d): %s\n", pll_errno, pll_errmsg);
+}
+
+void * xmemalign(size_t size, size_t alignment)
+{
+  void * t = NULL;
+  int err = posix_memalign(& t, alignment, size);
+
+  if (err || !t)
+    sysutil_fatal("Unable to allocate enough memory.");
+
+  return t;
+}
+
+double sysutil_gettime()
+{
+#ifdef WIN32 // WINDOWS build
+        FILETIME tm;
+        ULONGLONG t;
+#if defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8 // >= WIN8
+        GetSystemTimePreciseAsFileTime( &tm );
+#else // < WIN8
+        GetSystemTimeAsFileTime( &tm );
+#endif
+        t = ((ULONGLONG)tm.dwHighDateTime << 32) | (ULONGLONG)tm.dwLowDateTime;
+        return (double)t / 10000000.0;
+#else // Unixoid build
+  struct timeval ttime;
+  gettimeofday(&ttime , NULL);
+  return ttime.tv_sec + ttime.tv_usec * 0.000001;
+#endif
+}
+
+void sysutil_show_rusage()
+{
+  struct rusage r_usage;
+  getrusage(RUSAGE_SELF, & r_usage);
+  
+  fprintf(stderr,
+          "Time: %.3fs (user)", 
+          r_usage.ru_utime.tv_sec * 1.0 + (double)r_usage.ru_utime.tv_usec * 1.0e-6);
+  fprintf(stderr,
+          " %.3fs (sys)",
+          r_usage.ru_stime.tv_sec * 1.0 + r_usage.ru_stime.tv_usec * 1.0e-6);
+
+#if defined __APPLE__
+  /* Mac: ru_maxrss gives the size in bytes */
+  fprintf(stderr, " Memory: %.0fMB\n", r_usage.ru_maxrss * 1.0e-6);
+#else
+  /* Linux: ru_maxrss gives the size in kilobytes  */
+  fprintf(stderr, " Memory: %.0fMB\n", r_usage.ru_maxrss * 1.0e-3);
+#endif
+}
+
+unsigned long sysutil_get_memused()
+{
+  struct rusage r_usage;
+  getrusage(RUSAGE_SELF, & r_usage);
+
+#if defined __APPLE__
+  /* Mac: ru_maxrss gives the size in bytes */
+  return (unsigned long)(r_usage.ru_maxrss);
+#else
+  /* Linux: ru_maxrss gives the size in kilobytes  */
+  return (unsigned long)r_usage.ru_maxrss * 1024;
+#endif
+}
+
+unsigned long sysutil_get_memtotal()
+{
+#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+
+  long phys_pages = sysconf(_SC_PHYS_PAGES);
+  long pagesize = sysconf(_SC_PAGESIZE);
+
+  if ((phys_pages == -1) || (pagesize == -1))
+    sysutil_fatal("Cannot determine amount of RAM");
+
+  // sysconf(3) notes that pagesize * phys_pages can overflow, such as
+  // when long is 32-bits and there's more than 4GB RAM.  Since vsearch
+  // apparently targets LP64 systems like x86_64 linux, this will not
+  // arise in practice on the intended platform.
+
+  if (pagesize > LONG_MAX / phys_pages)
+    return LONG_MAX;
+  else
+    return (unsigned long)pagesize * (unsigned long)phys_pages;
+
+#elif defined(__APPLE__)
+
+  int mib [] = { CTL_HW, HW_MEMSIZE };
+  int64_t ram = 0;
+  size_t length = sizeof(ram);
+  if(-1 == sysctl(mib, 2, &ram, &length, NULL, 0))
+    sysutil_fatal("Cannot determine amount of RAM");
+  return ram;
+
+#else
+
+  struct sysinfo si;
+  if (sysinfo(&si))
+    sysutil_fatal("Cannot determine amount of RAM");
+  return si.totalram * si.mem_unit;
+
+#endif
+}
+
+static void get_cpuid(int32_t out[4], int32_t x)
+{
+  __cpuid_count(x, 0, out[0], out[1], out[2], out[3]);
+}
+
+unsigned long sysutil_get_cpu_features()
+{
+  unsigned long features = 0;
+
+  // adapted from: https://github.com/Mysticial/FeatureDetector
+
+  //  OS Features
+//  OS_x64 = detect_OS_x64();
+//  OS_AVX = detect_OS_AVX();
+//  OS_AVX512 = detect_OS_AVX512();
+
+  int info[4];
+  get_cpuid(info, 0);
+  int nIds = info[0];
+
+  get_cpuid(info, 0x80000000);
+  u_int32_t nExIds = info[0];
+
+  //  Detect Features
+  if (nIds >= 0x00000001)
+  {
+    get_cpuid(info, 0x00000001);
+
+//      HW_MMX    = (info[3] & ((int)1 << 23)) != 0;
+//      HW_SSE    = (info[3] & ((int)1 << 25)) != 0;
+//      HW_SSE2   = (info[3] & ((int)1 << 26)) != 0;
+
+    if (info[2] & ((int)1 <<  0))
+      features |= RAXML_CPU_SSE3;
+
+//      HW_SSSE3  = (info[2] & ((int)1 <<  9)) != 0;
+//      HW_SSE41  = (info[2] & ((int)1 << 19)) != 0;
+//      HW_SSE42  = (info[2] & ((int)1 << 20)) != 0;
+//      HW_AES    = (info[2] & ((int)1 << 25)) != 0;
+
+    if (info[2] & ((int)1 << 28))
+      features |= RAXML_CPU_AVX;
+
+    if (info[2] & ((int)1 << 12))
+      features |= RAXML_CPU_FMA3;
+
+//      HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
+  }
+
+  if (nIds >= 0x00000007)
+  {
+    get_cpuid(info, 0x00000007);
+
+    if (info[1] & ((int)1 << 5))
+      features |= RAXML_CPU_AVX2;
+
+//      HW_BMI1         = (info[1] & ((int)1 <<  3)) != 0;
+//      HW_BMI2         = (info[1] & ((int)1 <<  8)) != 0;
+//      HW_ADX          = (info[1] & ((int)1 << 19)) != 0;
+//      HW_MPX          = (info[1] & ((int)1 << 14)) != 0;
+//      HW_SHA          = (info[1] & ((int)1 << 29)) != 0;
+//      HW_PREFETCHWT1  = (info[2] & ((int)1 <<  0)) != 0;
+//
+//      HW_AVX512_F     = (info[1] & ((int)1 << 16)) != 0;
+//      HW_AVX512_CD    = (info[1] & ((int)1 << 28)) != 0;
+//      HW_AVX512_PF    = (info[1] & ((int)1 << 26)) != 0;
+//      HW_AVX512_ER    = (info[1] & ((int)1 << 27)) != 0;
+//      HW_AVX512_VL    = (info[1] & ((int)1 << 31)) != 0;
+//      HW_AVX512_BW    = (info[1] & ((int)1 << 30)) != 0;
+//      HW_AVX512_DQ    = (info[1] & ((int)1 << 17)) != 0;
+//      HW_AVX512_IFMA  = (info[1] & ((int)1 << 21)) != 0;
+//      HW_AVX512_VBMI  = (info[2] & ((int)1 <<  1)) != 0;
+  }
+
+  if (nExIds >= 0x80000001)
+  {
+    get_cpuid(info, 0x80000001);
+//      HW_x64   = (info[3] & ((int)1 << 29)) != 0;
+//      HW_ABM   = (info[2] & ((int)1 <<  5)) != 0;
+//      HW_SSE4a = (info[2] & ((int)1 <<  6)) != 0;
+//      HW_FMA4  = (info[2] & ((int)1 << 16)) != 0;
+//      HW_XOP   = (info[2] & ((int)1 << 11)) != 0;
+  }
+
+  return features;
+}
+
+unsigned int sysutil_simd_autodetect()
+{
+  unsigned long features = sysutil_get_cpu_features();
+  if ((features & RAXML_CPU_AVX2) && (features & RAXML_CPU_FMA3))
+    return PLL_ATTRIB_ARCH_AVX2;
+  else if (features & RAXML_CPU_AVX)
+    return PLL_ATTRIB_ARCH_AVX;
+  else if (features & RAXML_CPU_SSE3)
+    return PLL_ATTRIB_ARCH_SSE;
+  else
+    return PLL_ATTRIB_ARCH_CPU;
+}
+
+double sysutil_elapsed_seconds()
+{
+  chrono::time_point<chrono::system_clock> end_time = chrono::system_clock::now();
+  chrono::duration<double> elapsed_seconds = end_time - start_time;
+  return elapsed_seconds.count();
+}
+
+std::string sysutil_realpath(const std::string& path)
+{
+  char * real_path = realpath(path.c_str(), NULL);
+  if (real_path)
+  {
+    const string result(real_path);
+    free(real_path);
+    return result;
+  }
+  else
+  {
+    switch(errno)
+    {
+      case EACCES:
+        throw ios_base::failure("Can't access file: " + path);
+        break;
+      case ENOENT:
+        throw ios_base::failure("File doesn't exist: " + path);
+        break;
+      case ELOOP:
+      case ENAMETOOLONG:
+        throw ios_base::failure("Path too long or too many symlinks: " + path);
+        break;
+      default:
+        throw ios_base::failure("Unknown I/O error: " + path);
+    }
+  }
+}
+
+bool sysutil_file_exists(const std::string& fname, int access_mode)
+{
+  return access(fname.c_str(), access_mode) == 0;
+}
+
diff --git a/src/types.hpp b/src/types.hpp
new file mode 100644
index 0000000..7b65f65
--- /dev/null
+++ b/src/types.hpp
@@ -0,0 +1,104 @@
+#ifndef RAXML_TYPES_HPP_
+#define RAXML_TYPES_HPP_
+
+enum class StartingTree
+{
+  random,
+  parsimony,
+  user
+};
+
+enum class Command
+{
+  none = 0,
+  help,
+  version,
+  evaluate,
+  search,
+  bootstrap,
+  all
+};
+
+enum class FileFormat
+{
+  autodetect = 0,
+  fasta,
+  phylip,
+  vcf,
+  catg,
+  binary
+};
+
+enum class DataType
+{
+  autodetect = 0,
+  dna,
+  protein,
+  binary,
+  multistate,
+  diploid10
+};
+
+enum class ParamValue
+{
+  undefined = 0,
+  equal = 1,
+  user = 2,
+  model = 3,
+  empirical = 4,
+  ML = 5
+};
+
+const std::string ParamValueNames[] = {"undefined", "equal", "user", "model", "empirical", "ML"};
+
+typedef std::vector<double> doubleVector;
+typedef std::vector<int> intVector;
+typedef std::vector<unsigned int> uintVector;
+typedef std::pair<size_t,std::string> IdNamePair;
+typedef std::vector<IdNamePair> IdNameVector;
+typedef std::unordered_map<size_t,std::string> IdNameMap;
+typedef std::unordered_map<std::string,size_t> NameIdMap;
+typedef std::set<size_t> IDSet;
+
+/*
+ * workaround needed for using enum as std::map key
+ * code from: http://stackoverflow.com/a/24847480
+ * */
+struct EnumClassHash
+{
+  template <typename T>
+  std::size_t operator()(T t) const
+  {
+      return static_cast<std::size_t>(t);
+  }
+};
+
+/* generic exception class */
+class RaxmlException : public std::exception
+{
+public:
+  RaxmlException(const std::string& message)
+  : _message(message)
+  {
+  }
+
+  virtual const char* what() const noexcept { return message().c_str(); }
+
+  virtual const std::string message() const { return _message; };
+
+protected:
+  std::string _message;
+
+  template<typename ... Args>
+  std::string format_message(const std::string& fmt, Args ... args) const
+  {
+    size_t size = std::snprintf(nullptr, 0, fmt.c_str(), args ...) + 1;
+    std::string msg;
+    msg.reserve(size);
+    std::snprintf(&msg[0], size, fmt.c_str(), args ...);
+    return msg;
+  }
+};
+
+
+#endif /* RAXML_TYPES_HPP_ */
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 0000000..cb0e6c9
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,2 @@
+#define RAXML_VERSION "0.1.0 BETA"
+#define RAXML_DATE "18.03.2017"
diff --git a/test/src/CMakeLists.txt b/test/src/CMakeLists.txt
new file mode 100644
index 0000000..8836fd3
--- /dev/null
+++ b/test/src/CMakeLists.txt
@@ -0,0 +1,49 @@
+find_package (GTest)
+
+if(NOT GTEST_FOUND)
+    message (STATUS "GTest not found")
+    message (WARNING "Skipping building tests.")
+    return()
+endif()
+
+message (STATUS "Building tests")
+
+file (GLOB_RECURSE raxml_test_sources ${PROJECT_SOURCE_DIR}/test/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cpp)
+
+# sources list now has 2 Main.cpp, old has to be removed
+list(REMOVE_ITEM raxml_test_sources "${PROJECT_SOURCE_DIR}/src/main.cpp")
+
+include_directories (${PROJECT_SOURCE_DIR})
+
+set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/test/bin)
+
+add_executable        (raxml_test_module ${raxml_test_sources})
+
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_algorithm.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_binary.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_optimize.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_msa.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_tree.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll_util.a)
+target_link_libraries (raxml_test_module ${RAXML_LOCALDEPS_DIR}/lib/libpll.a)
+target_link_libraries (raxml_test_module m)
+
+target_link_libraries (raxml_test_module ${GTEST_BOTH_LIBRARIES} -pthread)
+
+target_link_libraries (raxml_test_module ${MPI_CXX_LIBRARIES})
+
+if(MPI_COMPILE_FLAGS)
+  set_target_properties(raxml_test_module PROPERTIES
+  COMPILE_FLAGS "${MPI_COMPILE_FLAGS}")
+endif()
+
+if(MPI_LINK_FLAGS)
+  set_target_properties(raxml_test_module PROPERTIES
+    LINK_FLAGS "${MPI_LINK_FLAGS}")
+endif()
+
+set_target_properties (raxml_test_module PROPERTIES OUTPUT_NAME raxml_test)
+set_target_properties (raxml_test_module PROPERTIES PREFIX "")
+
+
+add_test (raxml_test ${PROJECT_SOURCE_DIR}/test/bin/raxml_test)
diff --git a/test/src/CommandLineParserTest.cpp b/test/src/CommandLineParserTest.cpp
new file mode 100644
index 0000000..97773d9
--- /dev/null
+++ b/test/src/CommandLineParserTest.cpp
@@ -0,0 +1,158 @@
+#include "RaxmlTest.hpp"
+
+#include "src/CommandLineParser.hpp"
+
+using namespace std;
+
+#define MAX_ARGS 256
+
+::testing::internal::Mutex mutx;
+
+void cmdline_to_argv(char * cmd, int &argc, char** argv)
+{
+  argc = 0;
+  char *saveptr;
+  char *p2 = strtok_r(cmd, " ", &saveptr);
+  while (p2 && argc <= MAX_ARGS)
+    {
+      argv[argc++] = p2;
+      p2 = strtok_r(0, " ", &saveptr);
+    }
+  argv[argc] = 0;
+}
+
+void parse_options(string &cmd, CommandLineParser &parser, Options &opts,
+                   bool except_throw)
+{
+  int argc;
+  char * argv[MAX_ARGS];
+
+  cmdline_to_argv(&cmd[0], argc, argv);
+  mutx.Lock();
+  try
+  {
+    parser.parse_options(argc, argv, opts);
+    EXPECT_FALSE(except_throw) << "Exception expected but not thrown";
+  }
+  catch (exception &e)
+  {
+    EXPECT_TRUE(except_throw) << "Exception: " << e.what() << std::endl;
+  }
+  mutx.Unlock();
+}
+
+TEST(CommandLineParserTest, help)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // tests
+  string cmd = "raxml-ng";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::help, options.command);
+}
+
+TEST(CommandLineParserTest, search_wrong)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // wrong: --msa switsch missing
+  string cmd = "raxml-ng --threads 2";
+  parse_options(cmd, parser, options, true);
+}
+
+TEST(CommandLineParserTest, search_minimal)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // minimal valid command line
+  string cmd = "raxml-ng --msa data.fa";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::search, options.command);
+  EXPECT_EQ("data.fa", options.msa_file);
+}
+
+TEST(CommandLineParserTest, search_complex1)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // a more involved example
+  string cmd = "raxml-ng --msa data.fa --tree start.tre --model JC69 --threads 4 --seed 9";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::search, options.command);
+  EXPECT_EQ("data.fa", options.msa_file);
+  EXPECT_EQ(StartingTree::user, options.start_tree);
+  EXPECT_EQ("start.tre", options.tree_file);
+  EXPECT_EQ("JC69", options.model_file);
+  EXPECT_EQ(4, options.num_threads);
+  EXPECT_EQ(9, options.random_seed);
+}
+
+TEST(CommandLineParserTest, search_complex2)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // fancy search command
+  string cmd = "raxml-ng --search --msa data.fa --tree random --model part.txt "
+      "--prefix myRun --prob-msa ON --brlen linked --spr-radius 10 --spr-cutoff 0.5";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::search, options.command);
+  EXPECT_EQ(StartingTree::random, options.start_tree);
+  EXPECT_EQ("myRun", options.outfile_prefix);
+  EXPECT_TRUE(options.use_prob_msa);
+  EXPECT_EQ(PLLMOD_TREE_BRLEN_LINKED, options.brlen_linkage);
+  EXPECT_EQ(10, options.spr_radius);
+  EXPECT_DOUBLE_EQ(0.5, options.spr_cutoff);
+}
+
+TEST(CommandLineParserTest, eval_wrong)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // wrong: evaluate without a tree
+  string cmd = "raxml-ng --evaluate --msa data.fa";
+  parse_options(cmd, parser, options, true);
+  EXPECT_EQ(Command::evaluate, options.command);
+}
+
+TEST(CommandLineParserTest, eval_minimal)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // valid evaluate command line
+  string cmd = "raxml-ng --evaluate --msa data.fa --tree start.tre";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::evaluate, options.command);
+  EXPECT_TRUE(options.optimize_model);
+  EXPECT_TRUE(options.optimize_brlen);
+}
+
+TEST(CommandLineParserTest, eval_complex)
+{
+  // buildup
+  CommandLineParser parser;
+  Options options;
+
+  // evaluate with more settings
+  string cmd = "raxml-ng --evaluate --msa data.fa --tree start.tre --opt-model OFF "
+      "--opt-branches OFF --lh-epsilon 0.02";
+  parse_options(cmd, parser, options, false);
+  EXPECT_EQ(Command::evaluate, options.command);
+  EXPECT_FALSE(options.optimize_model);
+  EXPECT_FALSE(options.optimize_brlen);
+  EXPECT_DOUBLE_EQ(0.02, options.lh_epsilon);
+}
+
diff --git a/test/src/ModelTest.cpp b/test/src/ModelTest.cpp
new file mode 100644
index 0000000..70c9670
--- /dev/null
+++ b/test/src/ModelTest.cpp
@@ -0,0 +1,81 @@
+#include "RaxmlTest.hpp"
+
+#include "src/Model.hpp"
+
+using namespace std;
+
+TEST(ModelTest, defaults)
+{
+  // buildup
+  auto model = Model();
+
+  // tests
+  EXPECT_EQ(model.to_string(), "GTR+F+G4");
+  EXPECT_EQ(model.data_type(), DataType::dna);
+  EXPECT_EQ(model.name(), "GTR");
+  EXPECT_EQ(model.num_states(), 4);
+  EXPECT_EQ(model.ratehet_mode(), PLLMOD_UTIL_MIXTYPE_GAMMA);
+  EXPECT_EQ(model.num_ratecats(), 4);
+  EXPECT_EQ(model.params_to_optimize(), PLLMOD_OPT_PARAM_SUBST_RATES | PLLMOD_OPT_PARAM_ALPHA);
+}
+
+TEST(ModelTest, GTR)
+{
+  // buildup
+  auto model = Model(DataType::autodetect, "GTR");
+
+  // tests
+  EXPECT_EQ(model.to_string(), "GTR+FO");
+  EXPECT_EQ(model.data_type(), DataType::dna);
+  EXPECT_EQ(model.name(), "GTR");
+  EXPECT_EQ(model.num_states(), 4);
+  EXPECT_EQ(model.ratehet_mode(), PLLMOD_UTIL_MIXTYPE_FIXED);
+  EXPECT_EQ(model.num_ratecats(), 1);
+  EXPECT_EQ(model.params_to_optimize(), PLLMOD_OPT_PARAM_SUBST_RATES | PLLMOD_OPT_PARAM_FREQUENCIES);
+}
+
+TEST(ModelTest, JCI_user)
+{
+  // buildup
+  auto model = Model(DataType::autodetect, "JC+G+I{0.7}");
+
+  // tests
+  EXPECT_EQ(model.to_string(), "JC+I{0.7}+G4");
+  EXPECT_EQ(model.data_type(), DataType::dna);
+  EXPECT_EQ(model.name(), "JC");
+  EXPECT_EQ(model.num_states(), 4);
+  EXPECT_EQ(model.ratehet_mode(), PLLMOD_UTIL_MIXTYPE_GAMMA);
+  EXPECT_EQ(model.num_ratecats(), 4);
+  EXPECT_EQ(model.params_to_optimize(), PLLMOD_OPT_PARAM_ALPHA);
+  EXPECT_EQ(model.pinv(), 0.7);
+}
+
+TEST(ModelTest, LGFI)
+{
+  // buildup
+  auto model = Model(DataType::autodetect, "LG+F+I+G8");
+
+  // tests
+  EXPECT_EQ(model.to_string(), "LG+F+I+G8");
+  EXPECT_EQ(model.data_type(), DataType::protein);
+  EXPECT_EQ(model.name(), "LG");
+  EXPECT_EQ(model.num_states(), 20);
+  EXPECT_EQ(model.ratehet_mode(), PLLMOD_UTIL_MIXTYPE_GAMMA);
+  EXPECT_EQ(model.num_ratecats(), 8);
+  EXPECT_EQ(model.params_to_optimize(), PLLMOD_OPT_PARAM_PINV | PLLMOD_OPT_PARAM_ALPHA);
+}
+
+TEST(ModelTest, LG4X)
+{
+  // buildup
+  auto model = Model(DataType::autodetect, "LG4X");
+
+  // tests
+  EXPECT_EQ(model.to_string(), "LG4X+R4");
+  EXPECT_EQ(model.data_type(), DataType::protein);
+  EXPECT_EQ(model.name(), "LG4X");
+  EXPECT_EQ(model.num_states(), 20);
+  EXPECT_EQ(model.ratehet_mode(), PLLMOD_UTIL_MIXTYPE_FREE);
+  EXPECT_EQ(model.num_ratecats(), 4);
+  EXPECT_EQ(model.params_to_optimize(), PLLMOD_OPT_PARAM_FREE_RATES | PLLMOD_OPT_PARAM_RATE_WEIGHTS);
+}
diff --git a/test/src/RaxmlTest.hpp b/test/src/RaxmlTest.hpp
new file mode 100644
index 0000000..9a43652
--- /dev/null
+++ b/test/src/RaxmlTest.hpp
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include "src/Options.hpp"
+
+// The testing environment
+class RaxmlTest : public ::testing::Environment {
+public:
+  // You can remove any or all of the following functions if its body
+  // is empty.
+
+  RaxmlTest() {};
+
+  virtual ~RaxmlTest() {
+    // You can do clean-up work that doesn't throw exceptions here.
+  }
+
+  // If the constructor and destructor are not enough for setting up
+  // and cleaning up each test, you can define the following methods:
+
+  virtual void SetUp() {
+    // Code here will be called immediately after the constructor (right
+    // before each test).
+  }
+
+  virtual void TearDown() {
+    // Code here will be called immediately after each test (right
+    // before the destructor).
+  }
+
+  // Objects declared here can be used by all tests in the test case for Foo.
+  std::string data_dir;
+  std::string out_dir;
+  Options options;
+
+};
+
+extern RaxmlTest* env;
diff --git a/test/src/mainTest.cpp b/test/src/mainTest.cpp
new file mode 100644
index 0000000..20cde9e
--- /dev/null
+++ b/test/src/mainTest.cpp
@@ -0,0 +1,34 @@
+#include <gtest/gtest.h>
+#include <string>
+#include <iostream>
+
+#include "RaxmlTest.hpp"
+
+RaxmlTest* env;
+
+int main(int argc, char** argv)
+{
+  env = new RaxmlTest();
+
+  // Set data dir using the program path.
+  std::string call = argv[0];
+  std::size_t found = call.find_last_of("/\\");
+  if (found != std::string::npos) {
+      env->data_dir = call.substr(0,found) + "/../data/";
+  }
+
+//  env->tree_file = std::string(env->data_dir);
+//  env->tree_file += "ref.tre";
+
+//  env->out_dir  = std::string("/tmp/epatest/");
+//  std::string cmd("mkdir ");
+//  cmd += env->out_dir.c_str();
+//  system(cmd.c_str());
+
+  ::testing::InitGoogleTest(&argc, argv);
+//  MPI_INIT(&argc, &argv);
+  ::testing::AddGlobalTestEnvironment(env);
+  auto result = RUN_ALL_TESTS();
+//  MPI_FINALIZE();
+  return result;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/raxml-ng.git