[med-svn] [libbpp-phyl-omics] 05/06: Imported Upstream version 2.2.0

Andreas Tille tille at debian.org
Wed Apr 13 14:21:56 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository libbpp-phyl-omics.

commit f9473fbf349e1da3238f80099f105a04d92d8dd1
Author: Andreas Tille <tille at debian.org>
Date:   Wed Apr 13 16:12:45 2016 +0200

    Imported Upstream version 2.2.0
---
 CMakeLists.txt                                     |   9 +-
 ChangeLog                                          |   8 +
 Doxyfile                                           |   2 +-
 bpp-phyl-omics.spec                                |   6 +-
 debian/changelog                                   |   6 +
 debian/control                                     |   6 +-
 debian/copyright                                   |   6 +-
 genIncludes.sh                                     |  35 ++++
 .../Io/Maf/AbstractDistanceEstimationMafIterator.h |   4 +-
 src/Bpp/Seq/Io/Maf/CountClustersMafStatistics.cpp  |   2 +-
 ...imumLikelihoodDistanceEstimationMafIterator.cpp |   2 +-
 ...aximumLikelihoodDistanceEstimationMafIterator.h |   4 +-
 .../Maf/MaximumLikelihoodModelFitMafStatistics.cpp | 115 ++++++++++++
 .../Maf/MaximumLikelihoodModelFitMafStatistics.h   | 202 +++++++++++++++++++++
 src/CMakeLists.txt                                 |   2 +
 15 files changed, 387 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7701b9f..8ee7e14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,18 +99,14 @@ IF (DOXYGEN_FOUND)
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
 ENDIF (DOXYGEN_FOUND)
 
-# Python, needed for install:
-FIND_PACKAGE(PythonInterp)
-FIND_PACKAGE(PythonLibs)
-
 ENDIF(NO_DEP_CHECK)
 
 # Packager
 SET(CPACK_PACKAGE_NAME "libbpp-phyl-omics")
 SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "2.1.0")
+SET(CPACK_PACKAGE_VERSION "2.2.0")
 SET(CPACK_PACKAGE_VERSION_MAJOR "2")
-SET(CPACK_PACKAGE_VERSION_MINOR "1")
+SET(CPACK_PACKAGE_VERSION_MINOR "2")
 SET(CPACK_PACKAGE_VERSION_PATCH "0")
 SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Phylogen-Omics library")
 SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
@@ -128,7 +124,6 @@ SET(CPACK_SOURCE_IGNORE_FILES
  ".*\\\\.deb"
  ".*\\\\.rpm"
  ".*\\\\.dmg"
- ".*\\\\.sh"
  ".*\\\\..*\\\\.swp"
  "src/\\\\..*"
  "src/libbpp*"
diff --git a/ChangeLog b/ChangeLog
index 2f52f93..212e980 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+26/09/14 -*- Version 2.2.0. -*-
+
+04/06/14 Julien Dutheil
+* Added non-homogeneous model fitting.
+
+12/04/14 Julien Dutheil
+* Added model fitting and parameter estimation.
+
 07/03/13 -*- Version 2.1.0. -*-
 
 21/01/13 Julien Dutheil
diff --git a/Doxyfile b/Doxyfile
index 667b746..e86870e 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -32,7 +32,7 @@ PROJECT_NAME           = bpp-phyl-omics
 # This could be handy for archiving the generated documentation or 
 # if some version control system is used.
 
-PROJECT_NUMBER         = 2.1.0
+PROJECT_NUMBER         = 2.2.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description 
 # for a project that appears at the top of each page and should give viewer 
diff --git a/bpp-phyl-omics.spec b/bpp-phyl-omics.spec
index 96167a9..c10db79 100644
--- a/bpp-phyl-omics.spec
+++ b/bpp-phyl-omics.spec
@@ -1,5 +1,5 @@
 %define _basename bpp-phyl-omics
-%define _version 2.1.0
+%define _version 2.2.0
 %define _release 1
 %define _prefix /usr
 
@@ -189,8 +189,10 @@ exit 0
 %{_prefix}/include/*
 
 %changelog
+* Fri Sep 26 2014 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.2.0-1
+- Added model fitting and parameter estimations.
 * Thu Mar 07 2013 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.1.0-1
 - Initial release. Contains tools to build phylogenies along a genome alignment (distance methods only for now).
 * Tue Nov 06 2012 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.0.3-1
-- First draft of the spec file
+- First draft of the spec file.
 
diff --git a/debian/changelog b/debian/changelog
index f54ee77..7143020 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+libbpp-phyl-omics (2.2.0-1) unstable; urgency=low
+
+  * Added model fitting and parameter estimations.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr>  Fri, 26 Sep 2014 14:00:00 +0100
+
 libbpp-phyl-omics (2.1.0-1) unstable; urgency=low
 
   * Contain tools to apply phylogenetic method along a genome alignment. Only distance methods are implemented for now.
diff --git a/debian/control b/debian/control
index 991938f..e7f57c3 100644
--- a/debian/control
+++ b/debian/control
@@ -4,14 +4,14 @@ Priority: optional
 Maintainer: Loic Dachary <loic at dachary.org>
 Uploaders: Julien Dutheil <julien.dutheil at univ-montp2.fr>
 Build-Depends: debhelper (>= 5), cmake (>= 2.6),
-  libbpp-phyl-dev (>= 2.1.0), libbpp-seq-omics-dev (>= 2.1.0)
+  libbpp-phyl-dev (>= 2.2.0), libbpp-seq-omics-dev (>= 2.2.0)
 Standards-Version: 3.9.4
 
 Package: libbpp-phyl-omics-dev
 Section: libdevel
 Architecture: any
 Depends: libbpp-phyl-omics1 (= ${binary:Version}), ${misc:Depends},
-  libbpp-phyl-dev (>= 2.1.0), libbpp-seq-omics-dev (>= 2.1.0)
+  libbpp-phyl-dev (>= 2.2.0), libbpp-seq-omics-dev (>= 2.2.0)
 Description: Bio++ Phylogenetics library: genomics components.
  Contains the Bio++ sequence classes dedicated to phylogenomics.
 
@@ -19,7 +19,7 @@ Package: libbpp-phyl-omics1
 Section: libs
 Architecture: any
 Depends: ${shlibs:Depends}, ${misc:Depends},
-  libbpp-phyl9 (>= 2.1.0), libbpp-seq-omics1 (>= 2.1.0)
+  libbpp-phyl9 (>= 2.2.0), libbpp-seq-omics1 (>= 2.2.0)
 Description: Bio++ Phylogenetics library: genomics components.
  Contains the Bio++ sequence classes dedicated to phylogenomics.
 
diff --git a/debian/copyright b/debian/copyright
index 071c61f..311ae74 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,5 +1,5 @@
 This package was debianized by Julien Dutheil <julien.dutheil at univ-montp2.fr> on
-Wed, 06 Mar 2013 14:34:00 +0100.
+Fri, 26 Sep 2014 14:00:00 +0100.
 
 It was downloaded from <http://biopp.univ-montp2.fr/Repositories/sources>
 
@@ -9,7 +9,7 @@ Upstream Author:
 
 Copyright: 
 
-    Copyright (C) 2013 Bio++ Development Team
+    Copyright (C) 2014 Bio++ Development Team
 
 License:
 
@@ -30,7 +30,7 @@ License:
 On Debian systems, the complete text of the GNU General
 Public License can be found in `/usr/share/common-licenses/GPL'.
 
-The Debian packaging is (C) 2013, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
+The Debian packaging is (C) 2014, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
 is licensed under the GPL, see above.
 
 The provided software is distributed under the CeCILL license:
diff --git a/genIncludes.sh b/genIncludes.sh
new file mode 100755
index 0000000..56710e9
--- /dev/null
+++ b/genIncludes.sh
@@ -0,0 +1,35 @@
+#! /bin/bash
+
+createGeneric() {
+  echo "-- Creating generic include file: $1.all"
+  #Make sure we run into subdirectories first:
+  dirs=()
+  for file in "$1"/*
+  do
+    if [ -d "$file" ]
+    then
+      # Recursion:
+      dirs+=( "$file" )
+    fi
+  done
+  for dir in ${dirs[@]}
+  do
+    createGeneric $dir
+  done
+  #Now list all files, including newly created .all files:
+  if [ -f $1.all ]
+  then
+    rm $1.all
+  fi
+  dir=`basename $1`
+  for file in "$1"/*
+  do
+    if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] )
+    then
+      file=`basename $file`
+      echo "#include \"$dir/$file\"" >> $1.all
+    fi
+  done;
+}
+
+createGeneric $1
diff --git a/src/Bpp/Seq/Io/Maf/AbstractDistanceEstimationMafIterator.h b/src/Bpp/Seq/Io/Maf/AbstractDistanceEstimationMafIterator.h
index d6b0858..b26daca 100644
--- a/src/Bpp/Seq/Io/Maf/AbstractDistanceEstimationMafIterator.h
+++ b/src/Bpp/Seq/Io/Maf/AbstractDistanceEstimationMafIterator.h
@@ -69,8 +69,8 @@ class AbstractDistanceEstimationMafIterator:
     {
       MafBlock* block = iterator_->nextBlock();
       if (!block) return 0;
-      DistanceMatrix* dist = estimateDistanceMatrixForBlock(*block);
-      block->setProperty(getPropertyName(), dist);
+      auto_ptr<DistanceMatrix> dist(estimateDistanceMatrixForBlock(*block));
+      block->setProperty(getPropertyName(), dist.release());
       return block;
     }
 
diff --git a/src/Bpp/Seq/Io/Maf/CountClustersMafStatistics.cpp b/src/Bpp/Seq/Io/Maf/CountClustersMafStatistics.cpp
index 217b60d..9b9921b 100644
--- a/src/Bpp/Seq/Io/Maf/CountClustersMafStatistics.cpp
+++ b/src/Bpp/Seq/Io/Maf/CountClustersMafStatistics.cpp
@@ -55,7 +55,7 @@ unsigned int CountClustersMafStatistics::getNumberOfClusters_(const Node* node,
   if (h < threshold_) {
     nClust++;
   } else {
-    for (unsigned int i = 0; i < node->getNumberOfSons(); ++i) {
+    for (int i = 0; i < static_cast<int>(node->getNumberOfSons()); ++i) {
       nClust += getNumberOfClusters_((*node)[i], heights);
     }
   }
diff --git a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.cpp b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.cpp
index 32f9a40..57709b7 100644
--- a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.cpp
+++ b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.cpp
@@ -1,5 +1,5 @@
 //
-// File: MaximumLikelihooDistanceEstimationMafIterators.cpp
+// File: MaximumLikelihoodDistanceEstimationMafIterators.cpp
 // Created by: Julien Dutheil
 // Created on: Nov 13 2012
 //
diff --git a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.h b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.h
index 430f496..61f683d 100644
--- a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.h
+++ b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.h
@@ -1,5 +1,5 @@
 //
-// File: MaximumLikelihooDistanceEstimationMafIterators.h
+// File: MaximumLikelihoodDistanceEstimationMafIterators.h
 // Created by: Julien Dutheil
 // Created on: Nov 13 2012
 //
@@ -54,7 +54,7 @@ class MaximumLikelihoodDistanceEstimationMafIterator:
   public AbstractDistanceEstimationMafIterator
 {
   private:
-    auto_ptr<DistanceEstimation> distEst_;
+    std::auto_ptr<DistanceEstimation> distEst_;
     double propGapsToKeep_; //Exclude sites with too many gaps
     bool gapsAsUnresolved_;  //For most models, should be yes as they do not allow for gap characters
     std::string paramOpt_;
diff --git a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.cpp b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.cpp
new file mode 100644
index 0000000..b019918
--- /dev/null
+++ b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.cpp
@@ -0,0 +1,115 @@
+//
+// File: MaximumLikelihoodModelFitMafStatistics.cpp
+// Created by: Julien Dutheil
+// Created on: Mar 25 2014
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team
+
+This software is a computer program whose purpose is to test the
+homogeneity of the substitution process of a given alignment.
+
+This software is governed by the CeCILL  license under French law and
+abiding by the rules of distribution of free software.  You can  use, 
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info". 
+
+As a counterpart to the access to the source code and  rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty  and the software's author,  the holder of the
+economic rights,  and the successive licensors  have only  limited
+liability. 
+
+In this respect, the user's attention is drawn to the risks associated
+with loading,  using,  modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean  that it is complicated to manipulate,  and  that  also
+therefore means  that it is reserved for developers  and  experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or 
+data to be ensured and,  more generally, to use and operate it in the 
+same conditions as regards security. 
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#include "MaximumLikelihoodModelFitMafStatistics.h"
+
+//From bpp-seq:
+#include <Bpp/Seq/Container/SiteContainerTools.h>
+
+//From bpp-phyl:
+#include <Bpp/Phyl/Likelihood/RHomogeneousTreeLikelihood.h>
+#include <Bpp/Phyl/Likelihood/RNonHomogeneousTreeLikelihood.h>
+#include <Bpp/Phyl/Model/SubstitutionModelSetTools.h>
+#include <Bpp/Phyl/OptimizationTools.h>
+
+using namespace bpp;
+using namespace std;
+
+const string MaximumLikelihoodModelFitMafStatistics::NO_PROPERTY = "RESERVED_NOPROPERTY";
+
+void MaximumLikelihoodModelFitMafStatistics::compute(const MafBlock& block)
+{
+  //First we get the alignment:
+  auto_ptr<SiteContainer> sites(SiteContainerTools::removeGapSites(block.getAlignment(), propGapsToKeep_));
+  if (gapsAsUnresolved_)
+    SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+
+  //Second we get the tree:
+  const Tree* tree = 0;
+  if (!tree_.get()) {
+    //No default tree is given, we try to retrieve one from the block:
+    if (!block.hasProperty(treePropertyIn_))
+      throw Exception("MaximumLikelihoodModelFitMafIterator::fitModelBlock. No property available for " + treePropertyIn_);
+    try {
+      tree = &(dynamic_cast<const Tree&>(block.getProperty(treePropertyIn_)));
+      if (tree->isRooted())
+        throw Exception("MaximumLikelihoodModelFitMafIterator::fitModelBlock. Tree must be unrooted.");
+    } catch (bad_cast& e) {
+      throw Exception("MaximumLikelihoodModelFitMafIterator::fitModelBlock. A property was found for '" + treePropertyIn_ + "' but does not appear to contain a phylogenetic tree.");
+    }
+  } else {
+    tree = tree_.get();
+  }
+
+  //We build a new TreeLikelihood object:
+  auto_ptr<DiscreteRatesAcrossSitesTreeLikelihood> tl;
+  
+  if (rootFreqs_.get()) {
+    modelSet_.reset(SubstitutionModelSetTools::createHomogeneousModelSet(model_->clone(), rootFreqs_->clone(), tree)); 
+    tl.reset(new RNonHomogeneousTreeLikelihood(*tree, *sites, modelSet_.get(), rDist_.get(), false, true, false));
+    //Initialize:
+    if (initParameters_.size() == 0)
+      init_(); //so far, even if tree changed, parameter names are supposingly the same. This might not be true in some complex cases...
+  } else {
+    tl.reset(new RHomogeneousTreeLikelihood(*tree, *sites, model_.get(), rDist_.get(), false, false, true));
+  }
+  tl->initialize();
+  tl->setParameters(fixedParameters_);
+  
+  //We optimize parameters:
+  unsigned int nbIt = OptimizationTools::optimizeNumericalParameters2(tl.get(), initParameters_, 0, 0.000001, 10000, 0, 0, false, false, 0);
+
+  //And we save interesting parameter values:
+  result_.setValue("NbIterations", static_cast<double>(nbIt));
+  for (size_t i = 0;i < parametersOut_.size(); ++i) {
+    result_.setValue(parametersOut_[i], tl->getParameterValue(parametersOut_[i]));
+  }
+}
+
+void MaximumLikelihoodModelFitMafStatistics::init_() {
+  if (rootFreqs_.get()) {
+    initParameters_.addParameters(modelSet_->getIndependentParameters());
+  } else {
+    initParameters_.addParameters(model_->getIndependentParameters());
+  }
+  initParameters_.addParameters(rDist_->getIndependentParameters());
+  //Remove from initParameters the ones to consider fixed:
+  initParameters_.deleteParameters(fixedParameters_.getParameterNames()); 
+ }
+
diff --git a/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.h b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.h
new file mode 100644
index 0000000..6609080
--- /dev/null
+++ b/src/Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.h
@@ -0,0 +1,202 @@
+//
+// File: MaximumLikelihoodModelFitMafStatistics.h
+// Created by: Julien Dutheil
+// Created on: Mar 25 2014
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team
+
+This software is a computer program whose purpose is to test the
+homogeneity of the substitution process of a given alignment.
+
+This software is governed by the CeCILL  license under French law and
+abiding by the rules of distribution of free software.  You can  use, 
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info". 
+
+As a counterpart to the access to the source code and  rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty  and the software's author,  the holder of the
+economic rights,  and the successive licensors  have only  limited
+liability. 
+
+In this respect, the user's attention is drawn to the risks associated
+with loading,  using,  modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean  that it is complicated to manipulate,  and  that  also
+therefore means  that it is reserved for developers  and  experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or 
+data to be ensured and,  more generally, to use and operate it in the 
+same conditions as regards security. 
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#ifndef _MAXIMUMLIKELIHOODMODELFITMAFSTATISTICS_H_
+#define _MAXIMUMLIKELIHOODMODELFITMAFSTATISTICS_H_
+
+#include <Bpp/Seq/Io/Maf/MafStatistics.h>
+#include <Bpp/Seq/Container/SiteContainer.h>
+
+//From bpp-phyl:
+#include <Bpp/Phyl/Model/SubstitutionModel.h>
+#include <Bpp/Phyl/Model/SubstitutionModelSetTools.h>
+#include <Bpp/Phyl/Model/FrequenciesSet/NucleotideFrequenciesSet.h>
+#include <Bpp/Phyl/Tree.h>
+#include <Bpp/Phyl/Likelihood/DiscreteRatesAcrossSitesTreeLikelihood.h>
+
+//From bpp-core
+#include <Bpp/Numeric/Prob/DiscreteDistribution.h>
+
+namespace bpp {
+
+/**
+ * @brief Fit a substitution model.
+ *
+ * All nucleotide substitution models and rate distributions are supported.
+ * Only time-homogeneous models are allowed though.
+ */
+class MaximumLikelihoodModelFitMafStatistics:
+  public AbstractMafStatistics
+{
+
+  private:
+    std::auto_ptr<SubstitutionModel> model_;
+    std::auto_ptr<SubstitutionModelSet> modelSet_; //Only used in case of non-stationary model.
+    std::auto_ptr<DiscreteDistribution> rDist_;
+    std::auto_ptr<NucleotideFrequenciesSet> rootFreqs_;
+    std::string treePropertyIn_;
+    std::auto_ptr<const Tree> tree_;
+    std::vector<std::string> parametersOut_;
+    bool reestimateBrLen_;
+    double propGapsToKeep_; //Exclude sites with too many gaps
+    bool gapsAsUnresolved_;  //For most models, should be yes as they do not allow for gap characters
+    ParameterList initParameters_;
+    ParameterList fixedParameters_;
+
+  public:
+    /**
+     * @brief Build a new distance estimation maf mafstat, based on the DistanceEstimation class.
+     *
+     * A tree must be associated to each block before this analysis can be run.
+     *
+     * @param model The substitution model.
+     * @param rDist The distribution of rates.
+     * @param rootFreqs Root frequencies for non-stationary model. If set to 0, then a stationary model is assumed.
+     * @param treePropertyIn The name of the property where the input tree is stored for each block.
+     * @param parametersOut Parameters to output. 
+     * @param fixedParameters Parameter which should not be estimated but fixed to the given value instead.
+     * @param reestimateBrLen If the branch length from the tree should be reestimated (otherwise kept as is).
+     * @param propGapsToKeep The maximum gapfrequency in a site to include it in the analysis. 
+     * @param gapsAsUnresolved Tell if gap characters should be considered as unresolved states. In ost cases it should be set to true, as very few substitution models consider gaps as genuine states.
+     */
+    MaximumLikelihoodModelFitMafStatistics(
+        SubstitutionModel* model,
+        DiscreteDistribution* rDist,
+        NucleotideFrequenciesSet* rootFreqs,
+        const std::string& treePropertyIn,
+        const std::vector<std::string>& parametersOut,
+        const ParameterList& fixedParameters,
+        bool reestimateBrLen = true,
+        double propGapsToKeep = 0,
+        bool gapsAsUnresolved = true):
+      AbstractMafStatistics(),
+      model_(model), modelSet_(0), rDist_(rDist), rootFreqs_(rootFreqs),
+      treePropertyIn_(treePropertyIn), tree_(0), parametersOut_(parametersOut),
+      reestimateBrLen_(reestimateBrLen), propGapsToKeep_(propGapsToKeep), gapsAsUnresolved_(gapsAsUnresolved),
+      initParameters_(), fixedParameters_(fixedParameters)
+    {
+      if (!rootFreqs)
+        init_();
+      //Otherwise we do not initialize parameters as the tree might change for each block.
+      //We therefore have to initialize once for each block.
+    }
+
+    /**
+     * @brief Build a new distance estimation maf mafstat, based on the DistanceEstimation class.
+     *
+     * This analysis use the same input tree for all blocks.
+     *
+     * @param model The substitution model.
+     * @param rDist The distribution of rates.
+     * @param rootFreqs Root frequencies for non-stationary model. If set to 0, then a stationary model is assumed.
+     * @param tree The tree to use for fitting the model.
+     * @param parametersOut Parameters to output. 
+     * @param fixedParameters Parameter which should not be estimated but fixed to the given value instead.
+     * @param reestimateBrLen If the branch length from the tree should be reestimated (otherwise kept as is).
+     * @param propGapsToKeep The maximum gapfrequency in a site to include it in the analysis. 
+     * @param gapsAsUnresolved Tell if gap characters should be considered as unresolved states. In ost cases it should be set to true, as very few substitution models consider gaps as genuine states.
+     */
+    MaximumLikelihoodModelFitMafStatistics(
+        SubstitutionModel* model,
+        DiscreteDistribution* rDist,
+        NucleotideFrequenciesSet* rootFreqs,
+        const Tree* tree,
+        const std::vector<std::string>& parametersOut,
+        const ParameterList& fixedParameters,
+        bool reestimateBrLen = true,
+        double propGapsToKeep = 0,
+        bool gapsAsUnresolved = true):
+      AbstractMafStatistics(),
+      model_(model), modelSet_(0), rDist_(rDist), rootFreqs_(rootFreqs),
+      treePropertyIn_(NO_PROPERTY), tree_(0), parametersOut_(parametersOut),
+      reestimateBrLen_(reestimateBrLen), propGapsToKeep_(propGapsToKeep), gapsAsUnresolved_(gapsAsUnresolved),
+      initParameters_(), fixedParameters_(fixedParameters)
+    {
+      if (rootFreqs)
+        modelSet_.reset(SubstitutionModelSetTools::createHomogeneousModelSet(model->clone(), rootFreqs->clone(), tree));
+      init_();
+    }
+
+  private:
+    MaximumLikelihoodModelFitMafStatistics(const MaximumLikelihoodModelFitMafStatistics& mafstat):
+      AbstractMafStatistics(),
+      model_(0), modelSet_(0), rDist_(0), rootFreqs_(0),
+      treePropertyIn_(mafstat.treePropertyIn_), tree_(0), parametersOut_(mafstat.parametersOut_),
+      reestimateBrLen_(mafstat.reestimateBrLen_), propGapsToKeep_(mafstat.propGapsToKeep_), gapsAsUnresolved_(mafstat.gapsAsUnresolved_),
+      initParameters_(mafstat.initParameters_), fixedParameters_(mafstat.fixedParameters_)
+    {}
+    
+    MaximumLikelihoodModelFitMafStatistics& operator=(const MaximumLikelihoodModelFitMafStatistics& mafstat)
+    {
+      model_.reset();
+      modelSet_.reset();
+      rDist_.reset();
+      rootFreqs_.reset();
+      treePropertyIn_ = mafstat.treePropertyIn_;
+      tree_.reset();
+      parametersOut_ = mafstat.parametersOut_;
+      reestimateBrLen_ = mafstat.reestimateBrLen_;
+      propGapsToKeep_ = mafstat.propGapsToKeep_;
+      gapsAsUnresolved_ = mafstat.gapsAsUnresolved_;
+      initParameters_ = mafstat.initParameters_;
+      fixedParameters_ = mafstat.fixedParameters_;
+      return *this;
+    }
+     
+  public:
+    std::string getShortName() const { return "MLModelFit"; }
+    std::string getFullName() const { return "Maximum Likelihood Model Fitting"; }
+    void compute(const MafBlock& block);
+    std::vector<std::string> getSupportedTags() const { 
+      std::vector<std::string> tags;
+      tags.push_back("NbIterations");
+      tags.insert(tags.end(), parametersOut_.begin(), parametersOut_.end());
+      return tags;
+    }
+
+    static const std::string NO_PROPERTY;
+  
+  private:
+    void init_();
+};
+
+} //end of namespace bpp.
+
+#endif //_MAXIMUMLIKELIHOODDISTANCEESTIMATIONMAFSTATISTICS_H_
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7ab1ee6..6b918e6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,6 +6,7 @@
 SET(CPP_FILES
   Bpp/Seq/Io/Maf/CountDistanceEstimationMafIterator.cpp
   Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.cpp
+  Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.cpp
   Bpp/Seq/Io/Maf/DistanceBasedPhylogenyReconstructionMafIterator.cpp
   Bpp/Seq/Io/Maf/TreeManipulationMafIterators.cpp
   Bpp/Seq/Io/Maf/OutputTreeMafIterator.cpp
@@ -16,6 +17,7 @@ SET(H_FILES
   Bpp/Seq/Io/Maf/AbstractDistanceEstimationMafIterator.h
   Bpp/Seq/Io/Maf/CountDistanceEstimationMafIterator.h
   Bpp/Seq/Io/Maf/MaximumLikelihoodDistanceEstimationMafIterator.h
+  Bpp/Seq/Io/Maf/MaximumLikelihoodModelFitMafStatistics.h
   Bpp/Seq/Io/Maf/AbstractPhylogenyReconstructionMafIterator.h
   Bpp/Seq/Io/Maf/DistanceBasedPhylogenyReconstructionMafIterator.h
   Bpp/Seq/Io/Maf/TreeManipulationMafIterators.h

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbpp-phyl-omics.git



More information about the debian-med-commit mailing list