[med-svn] [bppsuite] 05/10: Import Upstream version 0.8.0
Andreas Tille
tille at debian.org
Wed Jun 14 11:36:59 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository bppsuite.
commit d425f377c746af036c606a1348e7ee380d22e167
Author: Andreas Tille <tille at debian.org>
Date: Wed Jun 14 13:23:24 2017 +0200
Import Upstream version 0.8.0
---
CMakeLists.txt | 183 ++++---
ChangeLog | 11 +
.../MaximumLikelihood/Codons/BranchModel/ML.bpp | 4 +-
Examples/MaximumLikelihood/Codons/M0/ML.bpp | 7 +-
Examples/MaximumLikelihood/Codons/M1/ML.bpp | 10 +-
Examples/MaximumLikelihood/Codons/M2/ML.bpp | 4 +-
.../Nucleotides/Homogeneous/ML.bpp | 1 +
Examples/Parsimony/Pars.bpp | 2 +
.../{ => Homogeneous}/SeqGen.bpp | 16 +-
.../{ => HomogeneousCovarion}/SeqGen.bpp | 17 +-
.../{ => NonHomogeneous}/SeqGen.bpp | 27 +-
.../SequenceSimulation/SeqGenHomogeneousModel.bpp | 9 -
.../SeqGenNonHomogeneousModel.bpp | 22 -
bppSuite.spec | 100 ----
bppSuite/CMakeLists.txt | 5 +
bppSuite/bppAlnScore.cpp | 335 ++++++------
bppSuite/bppAncestor.cpp | 34 +-
bppSuite/bppDist.cpp | 17 +-
bppSuite/bppML.cpp | 75 ++-
bppSuite/bppMixedLikelihoods.cpp | 507 +++++++++++++++++
bppSuite/bppPars.cpp | 31 +-
bppSuite/bppPhyloSampler.cpp | 12 +-
bppSuite/bppReRoot.cpp | 12 +-
bppSuite/bppSeqGen.cpp | 71 ++-
bppSuite/bppSeqMan.cpp | 15 +-
bppsuite.spec | 136 +++++
debian/bppsuite.manpages | 1 +
debian/changelog | 7 +
debian/control | 1 +
debian/copyright | 8 +-
debian/rules | 10 +-
doc/bppsuite.texi | 598 ++++++++++++++++-----
man/CMakeLists.txt | 23 +-
man/bppmixedlikelihoods.1.txt | 42 ++
34 files changed, 1729 insertions(+), 624 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b96e901..95081cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@
# Created: 22/08/2009
# Global parameters
-CMAKE_MINIMUM_REQUIRED(VERSION 2.6.4)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
PROJECT(bppsuite CXX)
IF(NOT CMAKE_BUILD_TYPE)
@@ -12,7 +12,7 @@ IF(NOT CMAKE_BUILD_TYPE)
FORCE)
ENDIF()
-SET(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Wshadow")
+SET(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Wshadow -Wconversion")
IF(NOT NO_VIRTUAL_COV)
SET(NO_VIRTUAL_COV FALSE CACHE BOOL
"Disable covariant return type with virtual inheritance, for compilers that do not support it."
@@ -30,10 +30,24 @@ IF(NOT NO_DEP_CHECK)
FORCE)
ENDIF(NOT NO_DEP_CHECK)
+IF(NOT DOC_COMPRESS)
+ SET(DOC_COMPRESS gzip CACHE STRING
+ "Set program for compressing documentation."
+ FORCE)
+ENDIF(NOT DOC_COMPRESS)
+
+IF(NOT DOC_COMPRESS_EXT)
+ SET(DOC_COMPRESS_EXT gz CACHE STRING
+ "Set extension of compressed documentation."
+ FORCE)
+ENDIF(NOT DOC_COMPRESS_EXT)
+
+
IF(NO_DEP_CHECK)
MESSAGE("-- Dependencies checking disabled. Only distribution can be built.")
ELSE(NO_DEP_CHECK)
+
#static linkage?
IF(NOT BUILD_STATIC)
SET(BUILD_STATIC FALSE CACHE BOOL
@@ -56,9 +70,82 @@ IF(NOT DEFINED MAN)
ENDIF(NOT DEFINED MAN)
#find executables for documentation
-FIND_PROGRAM(MAKEINFO_EXE NAMES makeinfo)
+IF(MAN)
+ FIND_PROGRAM(NROFF_EXE NAMES nroff)
+ IF(NROFF_EXE)
+ MESSAGE("-- Found nroff here: ${NROFF_EXE}")
+ MESSAGE(" Adding targets: man")
+
+ ADD_CUSTOM_TARGET(man
+ ALL
+ COMMAND cp bppml.1.txt bppml.1
+ COMMAND ${DOC_COMPRESS} -f bppml.1
+ COMMAND cp bppseqgen.1.txt bppseqgen.1
+ COMMAND ${DOC_COMPRESS} -f bppseqgen.1
+ COMMAND cp bppdist.1.txt bppdist.1
+ COMMAND ${DOC_COMPRESS} -f bppdist.1
+ COMMAND cp bpppars.1.txt bpppars.1
+ COMMAND ${DOC_COMPRESS} -f bpppars.1
+ COMMAND cp bppseqman.1.txt bppseqman.1
+ COMMAND ${DOC_COMPRESS} -f bppseqman.1
+ COMMAND cp bppconsense.1.txt bppconsense.1
+ COMMAND ${DOC_COMPRESS} -f bppconsense.1
+ COMMAND cp bppancestor.1.txt bppancestor.1
+ COMMAND ${DOC_COMPRESS} -f bppancestor.1
+ COMMAND cp bppreroot.1.txt bppreroot.1
+ COMMAND ${DOC_COMPRESS} -f bppreroot.1
+ COMMAND cp bppphysamp.1.txt bppphysamp.1
+ COMMAND ${DOC_COMPRESS} -f bppphysamp.1
+ COMMAND cp bpptreedraw.1.txt bpptreedraw.1
+ COMMAND ${DOC_COMPRESS} -f bpptreedraw.1
+ COMMAND cp bppalnscore.1.txt bppalnscore.1
+ COMMAND ${DOC_COMPRESS} -f bppalnscore.1
+ COMMAND cp bppmixedlikelihoods.1.txt bppmixedlikelihoods.1
+ COMMAND ${DOC_COMPRESS} -f bppmixedlikelihoods.1
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/man
+ )
+ SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "man/bppml.1.${DOC_COMPRESS_EXT};man/bppseqgen.1.${DOC_COMPRESS_EXT};man/bppdist.1.${DOC_COMPRESS_EXT};man/bpppars.1.${DOC_COMPRESS_EXT};man/bppseqman.1.${DOC_COMPRESS_EXT};man/bppconsense.1.${DOC_COMPRESS_EXT};man/bppancestor.1.${DOC_COMPRESS_EXT};man/bppreroot.1.${DOC_COMPRESS_EXT};man/bppphysamp.1.${DOC_COMPRESS_EXT};man/bpptreedraw.1.${DOC_COMPRESS_EXT};man/bppalnscore.1.${DOC_COMPRESS_EXT};man/bppmixedlikelihoods.1. [...]
+
+ ELSE()
+ MESSAGE(FATAL_ERROR "Program nroff required but not found.")
+ ENDIF()
+ENDIF(MAN)
+
+IF(INFO)
+ FIND_PROGRAM(MAKEINFO_EXE NAMES makeinfo)
+ IF(MAKEINFO_EXE)
+ MESSAGE("-- Found makeinfo here: ${MAKEINFO_EXE}")
+ MESSAGE(" Adding targets: info, html")
+
+ SET(ADD_INFO_TO "ALL")
+ MESSAGE(" Adding target info to target all")
+
+ ADD_CUSTOM_TARGET(info
+ ${ADD_INFO_TO}
+ COMMAND ${MAKEINFO_EXE} bppsuite.texi
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
+ )
+
+ ADD_CUSTOM_TARGET(html
+ COMMAND ${MAKEINFO_EXE} --html --css-ref=http://www.w3.org/StyleSheets/Core/Steely bppsuite.texi
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
+ )
+ ELSE(MAKEINFO_EXE)
+ MESSAGE(FATAL_ERROR p"Program makeinfo required but not found.")
+ ENDIF(MAKEINFO_EXE)
+ENDIF(INFO)
+
FIND_PROGRAM(PDFTEX_EXE NAMES pdftex)
-FIND_PROGRAM(NROFF_EXE NAMES nroff)
+IF(PDFTEX_EXE)
+ MESSAGE("-- Found pdftex here: ${PDFTEX_EXE}")
+ MESSAGE(" Adding target: pdf")
+
+ ADD_CUSTOM_TARGET(pdf
+ COMMAND ${PDFTEX_EXE} bppsuite.texi
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
+ )
+ SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "doc/bppsuite.pdf;doc/bppsuite.aux;doc/bppsuite.cp;doc/bppsuite.fn;doc/bppsuite.info;doc/bppsuite.ky;doc/bppsuite.log;doc/bppsuite.pg;doc/bppsuite.toc;doc/bppsuite.tp;doc/bppsuite.vr")
+ENDIF(PDFTEX_EXE)
#here is a useful function:
MACRO(IMPROVED_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find)
@@ -106,71 +193,6 @@ IMPROVED_FIND_LIBRARY(LIBS bpp-phyl Bpp/Phyl/Tree.h)
IMPROVED_FIND_LIBRARY(LIBS bpp-seq Bpp/Seq/Alphabet/Alphabet.h)
IMPROVED_FIND_LIBRARY(LIBS bpp-core Bpp/Clonable.h)
-IF(MAKEINFO_EXE)
- MESSAGE("-- Found makeinfo here: ${MAKEINFO_EXE}")
- MESSAGE(" Adding targets: info, html")
-
- IF(INFO)
- SET(ADD_INFO_TO "ALL")
- MESSAGE(" Adding target info to target all")
- ENDIF(INFO)
-
- ADD_CUSTOM_TARGET(info
- ${ADD_INFO_TO}
- COMMAND ${MAKEINFO_EXE} bppsuite.texi
- WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
- )
-
- ADD_CUSTOM_TARGET(html
- COMMAND ${MAKEINFO_EXE} --html --css-ref=http://www.w3.org/StyleSheets/Core/Steely bppsuite.texi
- WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
- )
-ENDIF(MAKEINFO_EXE)
-
-IF(PDFTEX_EXE)
- MESSAGE("-- Found pdftex here: ${PDFTEX_EXE}")
- MESSAGE(" Adding target: pdf")
-
- ADD_CUSTOM_TARGET(pdf
- COMMAND ${PDFTEX_EXE} bppsuite.texi
- WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/doc
- )
- SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "doc/bppsuite.pdf;doc/bppsuite.aux;doc/bppsuite.cp;doc/bppsuite.fn;doc/bppsuite.info;doc/bppsuite.ky;doc/bppsuite.log;doc/bppsuite.pg;doc/bppsuite.toc;doc/bppsuite.tp;doc/bppsuite.vr")
-ENDIF(PDFTEX_EXE)
-
-IF(NROFF_EXE)
- MESSAGE("-- Found nroff here: ${NROFF_EXE}")
- MESSAGE(" Adding targets: man")
-
- ADD_CUSTOM_TARGET(man
- ALL
- COMMAND cp bppml.1.txt bppml.1
- COMMAND gzip -f bppml.1
- COMMAND cp bppseqgen.1.txt bppseqgen.1
- COMMAND gzip -f bppseqgen.1
- COMMAND cp bppdist.1.txt bppdist.1
- COMMAND gzip -f bppdist.1
- COMMAND cp bpppars.1.txt bpppars.1
- COMMAND gzip -f bpppars.1
- COMMAND cp bppseqman.1.txt bppseqman.1
- COMMAND gzip -f bppseqman.1
- COMMAND cp bppconsense.1.txt bppconsense.1
- COMMAND gzip -f bppconsense.1
- COMMAND cp bppancestor.1.txt bppancestor.1
- COMMAND gzip -f bppancestor.1
- COMMAND cp bppreroot.1.txt bppreroot.1
- COMMAND gzip -f bppreroot.1
- COMMAND cp bppphysamp.1.txt bppphysamp.1
- COMMAND gzip -f bppphysamp.1
- COMMAND cp bpptreedraw.1.txt bpptreedraw.1
- COMMAND gzip -f bpptreedraw.1
- COMMAND cp bppalnscore.1.txt bppalnscore.1
- COMMAND gzip -f bppalnscore.1
- WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/man
- )
- SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "man/bppml.1.gz;man/bppseqgen.1.gz;man/bppdist.1.gz;man/bpppars.1.gz;man/bppseqman.1.gz;man/bppconsense.1.gz;man/bppancestor.1.gz;man/bppreroot.1.gz;man/bppphysamp.1.gz;man/bpptreedraw.1.gz;man/bppalnscore.1.gz")
-ENDIF(NROFF_EXE)
-
# Subdirectories
ADD_SUBDIRECTORY(bppSuite)
ADD_SUBDIRECTORY(doc)
@@ -181,9 +203,9 @@ ENDIF(NO_DEP_CHECK)
# Packager
SET(CPACK_PACKAGE_NAME "bppsuite")
SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "0.7.0")
+SET(CPACK_PACKAGE_VERSION "0.8.0")
SET(CPACK_PACKAGE_VERSION_MAJOR "0")
-SET(CPACK_PACKAGE_VERSION_MINOR "7")
+SET(CPACK_PACKAGE_VERSION_MINOR "8")
SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Program Suite")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
@@ -230,7 +252,7 @@ SET(CPACK_SOURCE_IGNORE_FILES
"doc/bppsuite\\\\.pg"
"doc/bppsuite\\\\.aux"
"doc/bppsuite\\\\.pdf"
- "man/.*\\\\.1.gz"
+ "man/.*\\\\.1.${DOC_COMPRESS_EXT}"
"debian/tmp"
"debian/bppsuite/"
"debian/bppsuite\\\\.substvars"
@@ -245,10 +267,25 @@ IF (MACOS)
ENDIF()
SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
+SET(CPACK_DEBSOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}_${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.orig")
INCLUDE(CPack)
#This adds the 'dist' target
-add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
+ADD_CUSTOM_TARGET(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
+# 'clean' is not (yet) a first class target. However, we need to clean the directories before building the sources:
+IF("${CMAKE_GENERATOR}" MATCHES "Make")
+ ADD_CUSTOM_TARGET(make_clean
+ COMMAND ${CMAKE_MAKE_PROGRAM} clean
+ WORKING_DIRECTORY ${CMAKE_CURRENT_DIR}
+ )
+ ADD_DEPENDENCIES(dist make_clean)
+
+ ADD_CUSTOM_TARGET(make_clean_man
+ COMMAND rm -f *.${DOC_COMPRESS_EXT}
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/man
+ )
+ ADD_DEPENDENCIES(dist make_clean_man)
+ENDIF()
IF(NOT NO_DEP_CHECK)
IF (UNIX)
diff --git a/ChangeLog b/ChangeLog
index eeb7b49..3127dc8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+08/03/13 -*- Version 0.8.0 -*-
+
+22/01/13 Julien Dutheil
+* Compiles with -Wconversion
+
+14/01/13 Mathieu Groussin
+* Added COaLA model for proteins in bppML and bppSeqGen.
+
+23/11/12 Laurent Guéguen
+* New bppMixedLikelihood program
+
08/02/12 -*- Version 0.7.0 -*-
21/12/11 Julien Dutheil
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
index 8421ebc..24eb17f 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
@@ -48,12 +48,12 @@ nonhomogeneous=one_per_branch
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
#These lines are for the F3X4 option:
-nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.freq*
+nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
nonhomogeneous.stationarity=yes
#Only if stationarity is set to false:
nonhomogeneous.root_freq=
-rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
+rate_distribution = Constant() //Gamma(n=4, alpha=0.358)
# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
diff --git a/Examples/MaximumLikelihood/Codons/M0/ML.bpp b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
index 1b0561a..e6130ce 100644
--- a/Examples/MaximumLikelihood/Codons/M0/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
@@ -1,6 +1,7 @@
#Example data set adapted from PAML
#
-#logL = 65442.80353609310986939818
+#logL0 = -1084.23740828789
+#logL = -1056.00142355909
# Global variables:
DATA = lysozymeLarge
@@ -43,9 +44,9 @@ init.brlen.method = Input
# See the manual for a description of the syntax and available options.
#
model = YN98(kappa=1, omega=1.0, frequencies=F0)
-nonhomogeneous=no
+nonhomogeneous = no
-rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
+rate_distribution = Constant() //Gamma(n=4, alpha=0.358)
# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
diff --git a/Examples/MaximumLikelihood/Codons/M1/ML.bpp b/Examples/MaximumLikelihood/Codons/M1/ML.bpp
index e60badd..7fbe48b 100644
--- a/Examples/MaximumLikelihood/Codons/M1/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M1/ML.bpp
@@ -22,7 +22,7 @@ input.sequence.format=Fasta
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
-
+input.sequence.max_unresolved_allowed = 50%
input.sequence.remove_stop_codons = yes
# ----------------------------------------------------------------------------------------
@@ -42,10 +42,10 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = YNGKP_M1(kappa=1, omega=1.0, frequencies=F3X4, p0=1.0, initFreqs=observed)
+model = YNGKP_M1(kappa=1, omega=0.5, frequencies=F1X4, p0=0.5, initFreqs=observed, observedPseudoCount=1)
nonhomogeneous=no
-rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
+rate_distribution = Constant //Gamma(n=4, alpha=0.358)
# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
@@ -71,7 +71,7 @@ likelihood.recursion_simple.compression = recursive
# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
-optimization = D-Brent(derivatives=Newton, nstep=10)
+optimization = FullD(derivatives=Newton)
# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
@@ -85,7 +85,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter = YNGKP_M1.freq*
+optimization.ignore_parameter = YNGKP_M1.*Full*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
diff --git a/Examples/MaximumLikelihood/Codons/M2/ML.bpp b/Examples/MaximumLikelihood/Codons/M2/ML.bpp
index 8ffd713..fb0cf66 100644
--- a/Examples/MaximumLikelihood/Codons/M2/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M2/ML.bpp
@@ -45,7 +45,7 @@ init.brlen.method = Input
model = YNGKP_M2(kappa=1, omega0=0.5, omega2=2.0, frequencies=F3X4, theta1=0.33333, theta2=0.5, initFreqs=observed)
nonhomogeneous=no
-rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
+rate_distribution = Constant() //Gamma(n=4, alpha=0.358)
# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
@@ -85,7 +85,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter = YNGKP_M1.freq*
+optimization.ignore_parameter = YNGKP_M1.*Full*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
diff --git a/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp b/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
index 2c47a19..9f54e4d 100644
--- a/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
@@ -21,6 +21,7 @@ input.sequence.format=Phylip(order=sequential, type=extended, split=spaces)
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
+input.sequence.max_unresolved_allowed = 100%
# ----------------------------------------------------------------------------------------
# Input tree file
diff --git a/Examples/Parsimony/Pars.bpp b/Examples/Parsimony/Pars.bpp
index b6660b6..054554a 100644
--- a/Examples/Parsimony/Pars.bpp
+++ b/Examples/Parsimony/Pars.bpp
@@ -7,6 +7,8 @@ DATA = LSU
# DNA, RNA or Protein
alphabet=DNA
+use.gaps=no
+
# The sequence file to use (sequences must be aligned!)
input.sequence.file=../Data/$(DATA).phy
diff --git a/Examples/SequenceSimulation/SeqGen.bpp b/Examples/SequenceSimulation/Homogeneous/SeqGen.bpp
similarity index 67%
copy from Examples/SequenceSimulation/SeqGen.bpp
copy to Examples/SequenceSimulation/Homogeneous/SeqGen.bpp
index b05133d..118e3ff 100644
--- a/Examples/SequenceSimulation/SeqGen.bpp
+++ b/Examples/SequenceSimulation/Homogeneous/SeqGen.bpp
@@ -3,7 +3,7 @@
alphabet = DNA
# Input tree to use:
-input.tree.file = ../Data/LSUrooted.dnd
+input.tree.file = ../../Data/LSUrooted.dnd
input.tree.format=Newick
# Print a tree with ids as bootstrap values.
@@ -27,12 +27,16 @@ output.sequence.format = Fasta()
# Homogeneous model?
# no => Homogeneous case
# general => Specify the model by hand.
-nonhomogeneous = general
+nonhomogeneous = no
-# Models are described in a separate file.
-# Choose one of the two following examples:
-//param = SeqGenHomogeneousModel.bpp
-param = SeqGenNonHomogeneousModel.bpp
+# Options for homogeneous and one-per_branch models:
+
+# Available models.
+# For proteins, the DCmutt method is used for JTT92 and DSO78.
+# You can use the 'empirical' option to specify another model.
+# JCnuc, K80, T92, HKY85, F84, TN93, JCprot, DSO78, JTT92 or empirical
+# Append +G2001 or +TS98 to the model name to add a covarion model.
+model = HKY85(kappa=2.843, theta=0.7, theta1=0.4, theta2=0.6)
# Rate Across Sites variation
rate_distribution = Gamma(n=4, alpha=0.358)
diff --git a/Examples/SequenceSimulation/SeqGen.bpp b/Examples/SequenceSimulation/HomogeneousCovarion/SeqGen.bpp
similarity index 63%
copy from Examples/SequenceSimulation/SeqGen.bpp
copy to Examples/SequenceSimulation/HomogeneousCovarion/SeqGen.bpp
index b05133d..a723b6a 100644
--- a/Examples/SequenceSimulation/SeqGen.bpp
+++ b/Examples/SequenceSimulation/HomogeneousCovarion/SeqGen.bpp
@@ -3,7 +3,7 @@
alphabet = DNA
# Input tree to use:
-input.tree.file = ../Data/LSUrooted.dnd
+input.tree.file = ../../Data/LSUrooted.dnd
input.tree.format=Newick
# Print a tree with ids as bootstrap values.
@@ -27,13 +27,14 @@ output.sequence.format = Fasta()
# Homogeneous model?
# no => Homogeneous case
# general => Specify the model by hand.
-nonhomogeneous = general
+nonhomogeneous = no
-# Models are described in a separate file.
-# Choose one of the two following examples:
-//param = SeqGenHomogeneousModel.bpp
-param = SeqGenNonHomogeneousModel.bpp
+# Options for homogeneous and one-per_branch models:
-# Rate Across Sites variation
-rate_distribution = Gamma(n=4, alpha=0.358)
+# Available models.
+# For proteins, the DCmutt method is used for JTT92 and DSO78.
+# You can use the 'empirical' option to specify another model.
+# JCnuc, K80, T92, HKY85, F84, TN93, JCprot, DSO78, JTT92 or empirical
+# Append +G2001 or +TS98 to the model name to add a covarion model.
+model = G01(model=HKY85(kappa=2.843, theta=0.7, theta1=0.4, theta2=0.6), rdist=Gamma(n=4, alpha=0.358), nu=1)
diff --git a/Examples/SequenceSimulation/SeqGen.bpp b/Examples/SequenceSimulation/NonHomogeneous/SeqGen.bpp
similarity index 53%
rename from Examples/SequenceSimulation/SeqGen.bpp
rename to Examples/SequenceSimulation/NonHomogeneous/SeqGen.bpp
index b05133d..0b2adc5 100644
--- a/Examples/SequenceSimulation/SeqGen.bpp
+++ b/Examples/SequenceSimulation/NonHomogeneous/SeqGen.bpp
@@ -3,7 +3,7 @@
alphabet = DNA
# Input tree to use:
-input.tree.file = ../Data/LSUrooted.dnd
+input.tree.file = ../../Data/LSUrooted.dnd
input.tree.format=Newick
# Print a tree with ids as bootstrap values.
@@ -29,10 +29,27 @@ output.sequence.format = Fasta()
# general => Specify the model by hand.
nonhomogeneous = general
-# Models are described in a separate file.
-# Choose one of the two following examples:
-//param = SeqGenHomogeneousModel.bpp
-param = SeqGenNonHomogeneousModel.bpp
+# How to deal with root frequencies:
+# balanced : all frequencies are set to 1/size of the alphabet
+# observed : use observed counts in the data set
+# init : manually set the frequencies (they have to sum to one)
+# For nucleotides, one can use the GC rate (theta) parametrization, like in the T92 model:
+# balancedGC: ancestral GC set to 0.5
+# observedGC: use observed GC content
+# initGC : manually set the ancestral GC content.
+nonhomogeneous.root_freq = GC(theta=0.9)
+
+# Now the general heterogeneous case:
+# Specify the number of distincts models.
+nonhomogeneous.number_of_models = 2
+# Set up each model:
+model1=T92(kappa=2, theta=0.1)
+model1.nodes_id=0:62 # The Ids of the nodes to which this model should be assigned.
+
+model2=T92(kappa=1, theta=0.9)
+model2.nodes_id=63:155
+
+#etc
# Rate Across Sites variation
rate_distribution = Gamma(n=4, alpha=0.358)
diff --git a/Examples/SequenceSimulation/SeqGenHomogeneousModel.bpp b/Examples/SequenceSimulation/SeqGenHomogeneousModel.bpp
deleted file mode 100644
index 64f4e0a..0000000
--- a/Examples/SequenceSimulation/SeqGenHomogeneousModel.bpp
+++ /dev/null
@@ -1,9 +0,0 @@
-# Options for homogeneous and one-per_branch models:
-
-# Available models.
-# For proteins, the DCmutt method is used for JTT92 and DSO78.
-# You can use the 'empirical' option to specify another model.
-# JCnuc, K80, T92, HKY85, F84, TN93, JCprot, DSO78, JTT92 or empirical
-# Append +G2001 or +TS98 to the model name to add a covarion model.
-model = HKY85(kappa=2.843, theta=0.7, theta1=0.4, theta2=0.6)
-
diff --git a/Examples/SequenceSimulation/SeqGenNonHomogeneousModel.bpp b/Examples/SequenceSimulation/SeqGenNonHomogeneousModel.bpp
deleted file mode 100644
index 538723e..0000000
--- a/Examples/SequenceSimulation/SeqGenNonHomogeneousModel.bpp
+++ /dev/null
@@ -1,22 +0,0 @@
-# How to deal with root frequencies:
-# balanced : all frequencies are set to 1/size of the alphabet
-# observed : use observed counts in the data set
-# init : manually set the frequencies (they have to sum to one)
-# For nucleotides, one can use the GC rate (theta) parametrization, like in the T92 model:
-# balancedGC: ancestral GC set to 0.5
-# observedGC: use observed GC content
-# initGC : manually set the ancestral GC content.
-nonhomogeneous.root_freq = GC(theta=0.9)
-
-# Now the general heterogeneous case:
-# Specify the number of distincts models.
-nonhomogeneous.number_of_models = 2
-# Set up each model:
-model1=T92(kappa=2, theta=0.1)
-model1.nodes_id=0:62 # The Ids of the nodes to which this model should be assigned.
-
-model2=T92(kappa=1, theta=0.9)
-model2.nodes_id=63:155
-
-#etc
-
diff --git a/bppSuite.spec b/bppSuite.spec
deleted file mode 100644
index 85723b7..0000000
--- a/bppSuite.spec
+++ /dev/null
@@ -1,100 +0,0 @@
-%define name bppsuite
-%define version 0.7.0
-%define release 1
-%define _prefix /usr
-
-Summary: The Bio++ Program Suite.
-Name: %{name}
-Version: %{version}
-Release: %{release}
-Vendor: Julien Dutheil
-Source: http://download.gna.org/bppsuite/%{name}-%{version}.tar.gz
-License: CeCILL 2
-Group: System Environment/Libraries
-BuildRoot: %{_builddir}/%{name}-root
-Packager: Julien Dutheil
-Prefix: %{_prefix}
-AutoReq: yes
-AutoProv: yes
-
-%description
-Bio++ program suite includes programs:
- - BppML for maximum likelihood analysis,
- - BppSeqGen for sequences simulation,
- - BppAncestor for ancestral states reconstruction,
- - BppDist for distance methods,
- - BppPars for parsimony analysis,
- - BppSeqMan for file conversion and sequence manipulation,
- - BppConsense for building consensus tree and computing bootstrap values,
- - BppPhySamp for phylogenetic sampling,
- - BppReRoot for tree rerooting.
- - BppTreeDraw for tree drawing.
- - BppAlnScore for comparing alignments and computing alignment scores.
-
-%prep
-%setup -q
-
-%build
-CFLAGS="-I%{_prefix}/include $RPM_OPT_FLAGS"
-CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=%{_prefix}"
-if [ %{_lib} == 'lib64' ] ; then
- CMAKE_FLAGS="$CMAKE_FLAGS -DLIB_SUFFIX=64"
-fi
-cmake $CMAKE_FLAGS .
-make
-make info
-
-%install
-rm -rf $RPM_BUILD_ROOT
-make DESTDIR=$RPM_BUILD_ROOT install
-
-%clean
-rm -rf $RPM_BUILD_ROOT
-
-%post -p /sbin/ldconfig
-
-%postun -p /sbin/ldconfig
-
-%files
-%defattr(-,root,root)
-%doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog
-%{_prefix}/bin/bppml
-%{_prefix}/bin/bppseqgen
-%{_prefix}/bin/bppancestor
-%{_prefix}/bin/bppdist
-%{_prefix}/bin/bpppars
-%{_prefix}/bin/bppseqman
-%{_prefix}/bin/bppconsense
-%{_prefix}/bin/bppphysamp
-%{_prefix}/bin/bppreroot
-%{_prefix}/bin/bpptreedraw
-%{_prefix}/share/info/bppsuite.info.gz
-%{_prefix}/share/man/man1/bppml.1.gz
-%{_prefix}/share/man/man1/bppseqgen.1.gz
-%{_prefix}/share/man/man1/bppancestor.1.gz
-%{_prefix}/share/man/man1/bpppars.1.gz
-%{_prefix}/share/man/man1/bppdist.1.gz
-%{_prefix}/share/man/man1/bppconsense.1.gz
-%{_prefix}/share/man/man1/bppseqman.1.gz
-%{_prefix}/share/man/man1/bppreroot.1.gz
-%{_prefix}/share/man/man1/bppphysamp.1.gz
-%{_prefix}/share/man/man1/bpptreedraw.1.gz
-
-%changelog
-* Wed Feb 15 2012 Julien Dutheil <julien.dutheil at univ-montp2.fr>
-- BppSuite 0.7.0 release
-* Thu Jun 09 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
-- BppSuite 0.6.2 release
-* Mon Feb 28 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
-- BppSuite 0.6.1 release
-* Mon Feb 07 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
-- BppSuite 0.6.0 release
-* Thu Mar 25 2010 Julien Dutheil <julien.dutheil at univ-montp2.fr>
-- BppSuite 0.5.0 release
-* Wed Jun 10 2009 Julien Dutheil <jdutheil at birc.au.dk>
-- BppSuite 0.4.0 release
-* Thu Dec 11 2008 Julien Dutheil <jdutheil at birc.au.dk>
-- BppSuite 0.3.1 release
-* Thu Sep 23 2008 Julien Dutheil <jdutheil at birc.au.dk>
-- BppSuite 0.3.0 release
-
diff --git a/bppSuite/CMakeLists.txt b/bppSuite/CMakeLists.txt
index 594751a..af337cc 100644
--- a/bppSuite/CMakeLists.txt
+++ b/bppSuite/CMakeLists.txt
@@ -30,6 +30,10 @@ ADD_EXECUTABLE(bppancestor bppAncestor.cpp)
TARGET_LINK_LIBRARIES(bppancestor ${LIBS})
SET_TARGET_PROPERTIES(bppancestor PROPERTIES LINK_SEARCH_END_STATIC ${BUILD_STATIC})
+ADD_EXECUTABLE(bppmixedlikelihoods bppMixedLikelihoods.cpp)
+TARGET_LINK_LIBRARIES(bppmixedlikelihoods ${LIBS})
+SET_TARGET_PROPERTIES(bppmixedlikelihoods PROPERTIES LINK_SEARCH_END_STATIC ${BUILD_STATIC})
+
ADD_EXECUTABLE(bppreroot bppReRoot.cpp)
TARGET_LINK_LIBRARIES(bppreroot ${LIBS})
SET_TARGET_PROPERTIES(bppreroot PROPERTIES LINK_SEARCH_END_STATIC ${BUILD_STATIC})
@@ -55,6 +59,7 @@ INSTALL(TARGETS
bppseqman
bppconsense
bppancestor
+ bppmixedlikelihoods
bppreroot
bppphysamp
bpptreedraw
diff --git a/bppSuite/bppAlnScore.cpp b/bppSuite/bppAlnScore.cpp
index 7feb507..4690198 100644
--- a/bppSuite/bppAlnScore.cpp
+++ b/bppSuite/bppAlnScore.cpp
@@ -5,37 +5,37 @@
//
/*
-Copyright or � or Copr. Bio++ Development Team
-
-This software is a computer program whose purpose is to simulate sequence
-data according to a phylogenetic tree and an evolutionary model.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
+ Copyright or � or Copr. Bio++ Development Team
+
+ This software is a computer program whose purpose is to simulate sequence
+ data according to a phylogenetic tree and an evolutionary model.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
// From the STL:
#include <iostream>
@@ -75,148 +75,175 @@ int main(int args, char** argv)
cout << "* Author: J. Dutheil Last Modif. 15/12/11 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
-
+
if (args == 1)
{
help();
return 0;
}
-
- try {
-
- BppApplication bppalnscore(args, argv, "BppAlnScore");
- bppalnscore.startTimer();
-
- // Get alphabet
- Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppalnscore.getParams(), "", false, true, true);
-
- // Get the test alignment:
- auto_ptr<SiteContainer> sitesTest(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".test", false, true));
-
- // Get the reference alignment:
- auto_ptr<SiteContainer> sitesRef(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".ref", false, true));
-
- //We check if the two alignments are compatible:
- vector<string> namesTest = sitesTest->getSequencesNames();
- vector<string> namesRef = sitesRef->getSequencesNames();
- if (namesTest != namesRef) {
- ApplicationTools::displayTask("Reorder sequences in ref. alignment", true);
- auto_ptr<AlignedSequenceContainer> tmp(new AlignedSequenceContainer(sitesRef->getAlphabet()));
- for (size_t i = 0; i < namesTest.size(); ++i) {
- ApplicationTools::displayGauge(i, namesTest.size() - 1);
- try {
- tmp->addSequence(sitesRef->getSequence(namesTest[i]));
- } catch(SequenceNotFoundException& ex) {
- throw Exception("ERROR!!! Reference alignment should contain the same sequences as the test alignment!");
- }
- }
- ApplicationTools::displayTaskDone();
- sitesRef = tmp;
- }
- //Build alignment indexes:
- RowMatrix<unsigned int> indexTest, indexRef;
- SiteContainerTools::getSequencePositions(*sitesTest, indexTest);
- SiteContainerTools::getSequencePositions(*sitesRef, indexRef);
-
- //Now build scores:
- int na = ApplicationTools::getIntParameter("score.na", bppalnscore.getParams(), 0);
- ApplicationTools::displayResult("NA value to used", na);
- vector<int> cs = SiteContainerTools::getColumnScores(indexTest, indexRef, na);
- vector<double> sps = SiteContainerTools::getSumOfPairsScores(indexTest, indexRef, static_cast<double>(na));
-
- //Should scores be averaged for words?
- size_t wsize = ApplicationTools::getParameter<size_t>("score.word_size", bppalnscore.getParams(), 1);
- size_t phase = 0;
- if (wsize > 1) {
- ApplicationTools::displayResult("Scores uniformized for words of size", wsize);
- string phaseOpt = ApplicationTools::getStringParameter("score.phase", bppalnscore.getParams(), "1");
- if (TextTools::isDecimalInteger(phaseOpt)) {
- phase = TextTools::toInt(phaseOpt);
- if (phase == 0)
- throw Exception("ERROR: positions are 1-based.");
- phase--;
- } else {
- //We look for the first occurrence of the given motif:
- try {
- BasicSequence motif("motif", phaseOpt, sitesTest->getAlphabet());
- ApplicationTools::displayResult("Phase based on 1st occurence of", motif.toString());
- unsigned int pos = sitesTest->getNumberOfSites();
- for (unsigned int i = 0; i < sitesTest->getNumberOfSequences(); ++i) {
- unsigned int p = SequenceTools::findFirstOf(sitesTest->getSequence(i), motif);
- if (p < pos)
- pos = p;
+ try
+ {
+ BppApplication bppalnscore(args, argv, "BppAlnScore");
+ bppalnscore.startTimer();
+
+ // Get alphabet
+ Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppalnscore.getParams(), "", false, true, true);
+
+ // Get the test alignment:
+ auto_ptr<SiteContainer> sitesTest(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".test", false, true));
+
+ // Get the reference alignment:
+ auto_ptr<SiteContainer> sitesRef(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".ref", false, true));
+
+ // We check if the two alignments are compatible:
+ vector<string> namesTest = sitesTest->getSequencesNames();
+ vector<string> namesRef = sitesRef->getSequencesNames();
+ if (namesTest != namesRef)
+ {
+ ApplicationTools::displayTask("Reorder sequences in ref. alignment", true);
+ auto_ptr<AlignedSequenceContainer> tmp(new AlignedSequenceContainer(sitesRef->getAlphabet()));
+ for (size_t i = 0; i < namesTest.size(); ++i)
+ {
+ ApplicationTools::displayGauge(i, namesTest.size() - 1);
+ try
+ {
+ tmp->addSequence(sitesRef->getSequence(namesTest[i]));
+ }
+ catch (SequenceNotFoundException& ex)
+ {
+ throw Exception("ERROR!!! Reference alignment should contain the same sequences as the test alignment!");
}
- phase = pos;
- } catch(Exception& ex) {
- throw Exception("Error, unvalid motif specified for phase option.");
}
+ ApplicationTools::displayTaskDone();
+ sitesRef = tmp;
}
- ApplicationTools::displayResult("First word starts at", phase + 1);
- //Now perform the smoothing:
- size_t i;
- for (i = 0; i < phase; ++i) {
- cs[i] = 0;
- sps[i] = 0;
- }
- for (; i + wsize <= cs.size(); i += wsize) {
- //First compute minimum criterion:
- int csmin = 1;
- double spsmin = 1;
- for (size_t j = i; j < i + wsize; ++j) {
- if (cs[j] < csmin) csmin = cs[j];
- if (sps[j] < spsmin) spsmin = sps[j];
+ // Build alignment indexes:
+ RowMatrix<size_t> indexTest, indexRef;
+ SiteContainerTools::getSequencePositions(*sitesTest, indexTest);
+ SiteContainerTools::getSequencePositions(*sitesRef, indexRef);
+
+ // Now build scores:
+ int na = ApplicationTools::getIntParameter("score.na", bppalnscore.getParams(), 0);
+ ApplicationTools::displayResult("NA value to used", na);
+ vector<int> cs = SiteContainerTools::getColumnScores(indexTest, indexRef, na);
+ vector<double> sps = SiteContainerTools::getSumOfPairsScores(indexTest, indexRef, static_cast<double>(na));
+
+ // Should scores be averaged for words?
+ size_t wsize = ApplicationTools::getParameter<size_t>("score.word_size", bppalnscore.getParams(), 1);
+ size_t phase = 0;
+ if (wsize > 1)
+ {
+ ApplicationTools::displayResult("Scores uniformized for words of size", wsize);
+ string phaseOpt = ApplicationTools::getStringParameter("score.phase", bppalnscore.getParams(), "1");
+ if (TextTools::isDecimalInteger(phaseOpt))
+ {
+ phase = TextTools::toInt(phaseOpt);
+ if (phase == 0)
+ throw Exception("ERROR: positions are 1-based.");
+ phase--;
+ }
+ else
+ {
+ // We look for the first occurrence of the given motif:
+ try
+ {
+ BasicSequence motif("motif", phaseOpt, sitesTest->getAlphabet());
+ ApplicationTools::displayResult("Phase based on 1st occurence of", motif.toString());
+ size_t pos = sitesTest->getNumberOfSites();
+ for (size_t i = 0; i < sitesTest->getNumberOfSequences(); ++i)
+ {
+ size_t p = SequenceTools::findFirstOf(sitesTest->getSequence(i), motif);
+ if (p < pos)
+ pos = p;
+ }
+ phase = pos;
+ }
+ catch (Exception& ex)
+ {
+ throw Exception("Error, unvalid motif specified for phase option.");
+ }
}
- //Assign min to all positions in word:
- for (size_t j = i; j < i + wsize; ++j) {
- cs[j] = csmin;
- sps[j] = spsmin;
+ ApplicationTools::displayResult("First word starts at", phase + 1);
+
+ // Now perform the smoothing:
+ size_t i;
+ for (i = 0; i < phase; ++i)
+ {
+ cs[i] = 0;
+ sps[i] = 0;
+ }
+ for ( ; i + wsize <= cs.size(); i += wsize)
+ {
+ // First compute minimum criterion:
+ int csmin = 1;
+ double spsmin = 1;
+ for (size_t j = i; j < i + wsize; ++j)
+ {
+ if (cs[j] < csmin)
+ csmin = cs[j];
+ if (sps[j] < spsmin)
+ spsmin = sps[j];
+ }
+ // Assign min to all positions in word:
+ for (size_t j = i; j < i + wsize; ++j)
+ {
+ cs[j] = csmin;
+ sps[j] = spsmin;
+ }
+ }
+ for ( ; i < cs.size(); ++i)
+ {
+ cs[i] = 0;
+ sps[i] = 0;
}
}
- for (; i < cs.size(); ++i) {
- cs[i] = 0;
- sps[i] = 0;
- }
- }
- //Output scores to file:
- string outputScores = ApplicationTools::getAFilePath("output.scores", bppalnscore.getParams(), false, false);
- if (outputScores != "none") {
- ApplicationTools::displayResult("Output scores to", outputScores);
- ofstream output(outputScores.c_str(), ios::out);
- output << "Site\tColumnScore\tSumOfPairsScore" << endl;
- for (size_t i = 0; i < cs.size(); ++i) {
- output << sitesTest->getSite(i).getPosition() << "\t" << cs[i] << "\t" << sps[i] << endl;
+ // Output scores to file:
+ string outputScores = ApplicationTools::getAFilePath("output.scores", bppalnscore.getParams(), false, false);
+ if (outputScores != "none")
+ {
+ ApplicationTools::displayResult("Output scores to", outputScores);
+ ofstream output(outputScores.c_str(), ios::out);
+ output << "Site\tColumnScore\tSumOfPairsScore" << endl;
+ for (size_t i = 0; i < cs.size(); ++i)
+ {
+ output << sitesTest->getSite(i).getPosition() << "\t" << cs[i] << "\t" << sps[i] << endl;
+ }
+ output.close();
}
- output.close();
- }
- //Create a sequence filter:
- string outputFilter = ApplicationTools::getAFilePath("output.mase", bppalnscore.getParams(), false, false);
- if (outputFilter != "none") {
- ApplicationTools::displayResult("Output mase with site filter to", outputFilter);
- double spsThreshold = ApplicationTools::getDoubleParameter("output.sps_thresholds", bppalnscore.getParams(), 0.8);
- ApplicationTools::displayResult("Threshold for SPS", spsThreshold);
-
- MultiRange<unsigned int> csRanges;
- MultiRange<unsigned int> spsRanges;
- for (size_t i = 0; i < cs.size(); ++i) {
- if (cs[i] == 1) csRanges.addRange(Range<unsigned int>(i, i + 1));
- if (sps[i] >= spsThreshold) spsRanges.addRange(Range<unsigned int>(i, i + 1));
+ // Create a sequence filter:
+ string outputFilter = ApplicationTools::getAFilePath("output.mase", bppalnscore.getParams(), false, false);
+ if (outputFilter != "none")
+ {
+ ApplicationTools::displayResult("Output mase with site filter to", outputFilter);
+ double spsThreshold = ApplicationTools::getDoubleParameter("output.sps_thresholds", bppalnscore.getParams(), 0.8);
+ ApplicationTools::displayResult("Threshold for SPS", spsThreshold);
+
+ MultiRange<size_t> csRanges;
+ MultiRange<size_t> spsRanges;
+ for (size_t i = 0; i < cs.size(); ++i)
+ {
+ if (cs[i] == 1)
+ csRanges.addRange(Range<size_t>(i, i + 1));
+ if (sps[i] >= spsThreshold)
+ spsRanges.addRange(Range<size_t>(i, i + 1));
+ }
+
+ MaseHeader header;
+ header.setSiteSelection("CS", csRanges);
+ header.setSiteSelection("SPS", spsRanges);
+ Mase writer;
+ writer.writeMeta(outputFilter, *sitesTest, header);
}
- MaseHeader header;
- header.setSiteSelection("CS", csRanges);
- header.setSiteSelection("SPS", spsRanges);
- Mase writer;
- writer.writeMeta(outputFilter, *sitesTest, header);
+ // We're done!
+ bppalnscore.done();
}
-
- //We're done!
- bppalnscore.done();
-
- } catch(exception & e) {
+ catch (exception& e)
+ {
cout << e.what() << endl;
return 1;
}
diff --git a/bppSuite/bppAncestor.cpp b/bppSuite/bppAncestor.cpp
index 4d0f723..40a2bee 100644
--- a/bppSuite/bppAncestor.cpp
+++ b/bppSuite/bppAncestor.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS
+Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -60,6 +60,7 @@ using namespace std;
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
#include <Bpp/Seq/Container/SequenceContainerTools.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
// From PhylLib:
@@ -71,6 +72,7 @@ using namespace std;
#include <Bpp/Phyl/Model/MarkovModulatedSubstitutionModel.h>
#include <Bpp/Phyl/Model/SubstitutionModelSet.h>
#include <Bpp/Phyl/Model/SubstitutionModelSetTools.h>
+#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
#include <Bpp/Phyl/Io/Newick.h>
using namespace bpp;
@@ -138,10 +140,12 @@ int main(int args, char ** argv)
ApplicationTools::displayResult("Writing tagged tree to", treeWIdPath);
treeWriter.write(ttree, treeWIdPath);
delete tree;
- cout << "BppML's done." << endl;
+ cout << "BppAncestor's done." << endl;
exit(0);
}
+ bool checkTree = ApplicationTools::getBooleanParameter("input.tree.check_root", bppancestor.getParams(), true, "", true, false);
+
DRTreeLikelihood *tl;
string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppancestor.getParams(), "no", "", true, false);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
@@ -149,22 +153,26 @@ int main(int args, char ** argv)
SubstitutionModel *model = 0;
SubstitutionModelSet *modelSet = 0;
DiscreteDistribution *rDist = 0;
- unsigned int nbStates;
+ size_t nbStates;
if (nhOpt == "no")
{
model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppancestor.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
- if(model->getNumberOfStates() > model->getAlphabet()->getSize())
+ if (model->getNumberOfStates() > model->getAlphabet()->getSize())
{
//Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppancestor.getParams());
}
- tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true);
+ if (dynamic_cast<MixedSubstitutionModel*>(model))
+ tl = new DRHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, checkTree, true, true);
+ else
+ tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, checkTree);
+
nbStates = model->getNumberOfStates();
}
else if (nhOpt == "one_per_branch")
@@ -174,7 +182,7 @@ int main(int args, char ** argv)
if (model->getNumberOfStates() > model->getAlphabet()->getSize())
{
//Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
@@ -192,6 +200,8 @@ int main(int args, char ** argv)
vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppancestor.getParams(), ',', "");
modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters);
model = 0;
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
+ throw Exception("Non-homogeneous mixed substitution ancestor reconstruction not implemented, sorry!");
tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
nbStates = modelSet->getNumberOfStates();
}
@@ -208,6 +218,8 @@ int main(int args, char ** argv)
{
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppancestor.getParams());
}
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
+ throw Exception("Non-homogeneous mixed substitution ancestor reconstruction not implemented, sorry!");
tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
nbStates = modelSet->getNumberOfStates();
}
@@ -296,10 +308,10 @@ int main(int args, char ** argv)
ofstream out(outputFile.c_str(), ios::out);
TreeTemplate<Node> ttree(*tree);
vector<Node *> nodes = ttree.getInnerNodes();
- unsigned int nbNodes = nodes.size();
+ size_t nbNodes = nodes.size();
// Get the rate class with maximum posterior probability:
- vector<unsigned int> classes = tl->getRateClassWithMaxPostProbOfEachSite();
+ vector<size_t> classes = tl->getRateClassWithMaxPostProbOfEachSite();
// Get the posterior rate, i.e. rate averaged over all posterior probabilities:
Vdouble rates = tl->getPosteriorRateOfEachSite();
// Get the ancestral sequences:
@@ -313,7 +325,7 @@ int main(int args, char ** argv)
colNames.push_back("lnL");
colNames.push_back("rc");
colNames.push_back("pr");
- for (unsigned int i = 0; i < nbNodes; i++) {
+ for (size_t i = 0; i < nbNodes; i++) {
Node *node = nodes[i];
colNames.push_back("max." + TextTools::toString(node->getId()));
if (probs) {
@@ -339,7 +351,7 @@ int main(int args, char ** argv)
vector<string> row(colNames.size());
DataTable* infos = new DataTable(colNames);
- for (unsigned int i = 0; i < sites->getNumberOfSites(); i++)
+ for (size_t i = 0; i < sites->getNumberOfSites(); i++)
{
double lnL = tl->getLogLikelihoodForASite(i);
const Site* currentSite = &sites->getSite(i);
diff --git a/bppSuite/bppDist.cpp b/bppSuite/bppDist.cpp
index a47c69c..a88ac87 100644
--- a/bppSuite/bppDist.cpp
+++ b/bppSuite/bppDist.cpp
@@ -6,7 +6,7 @@
//
/*
-Copyright or © or Copr. CNRS
+Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -55,6 +55,7 @@ using namespace std;
// From SeqLib:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
@@ -66,6 +67,7 @@ using namespace std;
#include <Bpp/Phyl/Distance.all>
#include <Bpp/Phyl/OptimizationTools.h>
#include <Bpp/Phyl/Model/MarkovModulatedSubstitutionModel.h>
+#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
using namespace bpp;
@@ -112,10 +114,10 @@ int main(int args, char ** argv)
SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppdist.getParams());
DiscreteDistribution* rDist = 0;
- if(model->getNumberOfStates() > model->getAlphabet()->getSize())
+ if (model->getNumberOfStates() > model->getAlphabet()->getSize())
{
//Markov-modulated Markov model!
- rDist = new ConstantDistribution(1.);
+ rDist = new ConstantRateDistribution();
}
else
{
@@ -165,14 +167,14 @@ int main(int args, char ** argv)
OutputStream* messenger =
(mhPath == "none") ? 0 :
(mhPath == "std") ? ApplicationTools::message :
- new StlOutputStream(auto_ptr<ostream>(new ofstream(mhPath.c_str(), ios::out)));
+ new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
ApplicationTools::displayResult("Message handler", mhPath);
string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false);
OutputStream* profiler =
(prPath == "none") ? 0 :
(prPath == "std") ? ApplicationTools::message :
- new StlOutputStream(auto_ptr<ostream>(new ofstream(prPath.c_str(), ios::out)));
+ new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
if(profiler) profiler->setPrecision(20);
ApplicationTools::displayResult("Profiler", prPath);
@@ -215,7 +217,7 @@ int main(int args, char ** argv)
//Here it is:
ofstream warn("warnings", ios::out);
ApplicationTools::warning = new StlOutputStreamWrapper(&warn);
- tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, false, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
+ tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
warn.close();
delete ApplicationTools::warning;
ApplicationTools::warning = ApplicationTools::message;
@@ -301,7 +303,6 @@ int main(int args, char ** argv)
*distMethod,
parametersToIgnore,
ignoreBrLen,
- false,
type,
tolerance,
nbEvalMax,
@@ -327,8 +328,6 @@ int main(int args, char ** argv)
delete alphabet;
delete sites;
- delete model;
- delete rDist;
delete distMethod;
delete tree;
diff --git a/bppSuite/bppML.cpp b/bppSuite/bppML.cpp
index 10643e7..a2bc90d 100644
--- a/bppSuite/bppML.cpp
+++ b/bppSuite/bppML.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS
+ Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -60,6 +60,7 @@ using namespace std;
// From SeqLib:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
@@ -70,6 +71,9 @@ using namespace std;
#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
#include <Bpp/Phyl/OptimizationTools.h>
#include <Bpp/Phyl/Model.all>
+#include <Bpp/Phyl/Model/Protein/CoalaCore.h>
+#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
+#include <Bpp/Phyl/Model/FrequenciesSet/MvaFrequenciesSet.h>
#include <Bpp/Phyl/Io/Newick.h>
using namespace bpp;
@@ -89,11 +93,12 @@ void help()
int main(int args, char** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Maximum Likelihood Computation, version 1.5.0 *" << endl;
+ cout << "* Bio++ Maximum Likelihood Computation, version 1.6.0 *" << endl;
cout << "* *" << endl;
- cout << "* Authors: J. Dutheil Last Modif. 07/02/11 *" << endl;
+ cout << "* Authors: J. Dutheil Last Modif. 29/01/13 *" << endl;
cout << "* B. Boussau *" << endl;
- cout << "* L. Gueguen *" << endl;
+ cout << "* L. Guéguen *" << endl;
+ cout << "* M. Groussin *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -156,7 +161,10 @@ int main(int args, char** argv)
KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
if (cmdName == "Input")
{
- // Do nothing!
+ // Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
+ string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, false);
+ if(midPointRootBrLengths == "yes")
+ TreeTools::constrainedMidPointRooting(*tree);
}
else if (cmdName == "Equal")
{
@@ -199,7 +207,7 @@ int main(int args, char** argv)
{
TreeTemplate<Node> ttree(*tree);
vector<Node*> nodes = ttree.getNodes();
- for (unsigned int i = 0; i < nodes.size(); i++)
+ for (size_t i = 0; i < nodes.size(); i++)
{
if (nodes[i]->isLeaf())
nodes[i]->setName(TextTools::toString(nodes[i]->getId()) + "_" + nodes[i]->getName());
@@ -219,6 +227,7 @@ int main(int args, char** argv)
string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppml.getParams(), "no", "", true, false);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
+ bool checkTree = ApplicationTools::getBooleanParameter("input.tree.check_root", bppml.getParams(), true, "", true, false);
bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", bppml.getParams(), false, "", true, false);
unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppml.getParams(), 0, "", true, false);
@@ -235,14 +244,14 @@ int main(int args, char** argv)
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
// Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
}
if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new NNIHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true, true);
+ tl = new NNIHomogeneousTreeLikelihood(*tree, *sites, model, rDist, checkTree, true);
else
throw Exception("Topology estimation with Mixed model not supported yet, sorry :(");
}
@@ -253,7 +262,7 @@ int main(int args, char** argv)
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
// Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
@@ -266,25 +275,25 @@ int main(int args, char** argv)
string compression = ApplicationTools::getStringParameter("likelihood.recursion_simple.compression", bppml.getParams(), "recursive", "", true, false);
ApplicationTools::displayResult("Likelihood data compression", compression);
if (compression == "simple")
- if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, false, true, false);
+ if (dynamic_cast<MixedSubstitutionModel*>(model))
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, checkTree, true, false);
else
- tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, false, true, false);
+ tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, checkTree, true, false);
else if (compression == "recursive")
if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, false, true, true);
+ tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, checkTree, true, true);
else
- tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, false, true, true);
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, checkTree, true, true);
else throw Exception("Unknown likelihood data compression method: " + compression);
}
else if (recursion == "double")
{
- if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true);
+ if (dynamic_cast<MixedSubstitutionModel*>(model))
+ tl = new DRHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, checkTree);
else
- tl = new DRHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true);
+ tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, checkTree);
}
else throw Exception("Unknown recursion option: " + recursion);
}
@@ -295,7 +304,7 @@ int main(int args, char** argv)
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
// Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
@@ -316,6 +325,17 @@ int main(int args, char** argv)
{
rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppml.getParams(), rateFreqs);
stationarity = !rootFreqs;
+ string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", bppml.getParams(), "");
+ if (freqDescription == "MVAprotein")
+ {
+ if (dynamic_cast<CoalaCore*>(model))
+ {
+ dynamic_cast<MvaFrequenciesSet*>(rootFreqs)->setModelName("MVAprotein");
+ dynamic_cast<MvaFrequenciesSet*>(rootFreqs)->initSet(dynamic_cast<CoalaCore*>(model));
+ }
+ else
+ throw Exception("The MVAprotein frequencies set at the root can only be used if a COaLA model is used on branches.");
+ }
}
ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
@@ -336,7 +356,7 @@ int main(int args, char** argv)
}
else if (recursion == "double")
{
- if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet)!=NULL)
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
throw Exception("Double recursion with non homogeneous mixed models is not implemented yet.");
// tl = new DRNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
else
@@ -351,7 +371,7 @@ int main(int args, char** argv)
if (modelSet->getNumberOfStates() >= 2 * modelSet->getAlphabet()->getSize())
{
// Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
@@ -415,15 +435,15 @@ int main(int args, char** argv)
if (isinf(logL))
{
ApplicationTools::displayError("!!! Unexpected initial likelihood == 0.");
- CodonAlphabet *pca=dynamic_cast<CodonAlphabet*>(alphabet);
+ CodonAlphabet *pca = dynamic_cast<CodonAlphabet*>(alphabet);
if (pca) {
bool f = false;
- unsigned int s;
- for (unsigned int i = 0; i < sites->getNumberOfSites(); i++) {
+ size_t s;
+ for (size_t i = 0; i < sites->getNumberOfSites(); i++) {
if (isinf(tl->getLogLikelihoodForASite(i))) {
const Site& site=sites->getSite(i);
s = site.size();
- for (unsigned int j = 0; j < s; j++) {
+ for (size_t j = 0; j < s; j++) {
if (pca->isStop(site.getValue(j))) {
(*ApplicationTools::error << "Stop Codon at site " << site.getPosition() << " in sequence " << sites->getSequence(j).getName()).endLine();
f = true;
@@ -445,7 +465,7 @@ int main(int args, char** argv)
exit(1);
} else {
ApplicationTools::displayBooleanResult("Saturated site removal enabled", true);
- for (unsigned int i = sites->getNumberOfSites(); i > 0; --i) {
+ for (size_t i = sites->getNumberOfSites(); i > 0; --i) {
if (isinf(tl->getLogLikelihoodForASite(i - 1))) {
ApplicationTools::displayResult("Ignore saturated site", sites->getSite(i - 1).getPosition());
sites->deleteSite(i - 1);
@@ -490,7 +510,7 @@ int main(int args, char** argv)
ApplicationTools::displayResult("Output estimates to file", parametersFile);
if (parametersFile != "none")
{
- StlOutputStream out(auto_ptr<ostream>(new ofstream(parametersFile.c_str(), ios::out)));
+ StlOutputStream out(new ofstream(parametersFile.c_str(), ios::out));
out << "# Log likelihood = ";
out.setPrecision(20) << (-tl->getValue());
out.endLine();
@@ -531,7 +551,8 @@ int main(int args, char** argv)
ofstream out(infosFile.c_str(), ios::out);
// Get the rate class with maximum posterior probability:
- vector<unsigned int> classes = tl->getRateClassWithMaxPostProbOfEachSite();
+ vector<size_t> classes = tl->getRateClassWithMaxPostProbOfEachSite();
+
// Get the posterior rate, i.e. rate averaged over all posterior probabilities:
Vdouble rates = tl->getPosteriorRateOfEachSite();
diff --git a/bppSuite/bppMixedLikelihoods.cpp b/bppSuite/bppMixedLikelihoods.cpp
new file mode 100644
index 0000000..ff73b6f
--- /dev/null
+++ b/bppSuite/bppMixedLikelihoods.cpp
@@ -0,0 +1,507 @@
+//
+// File: bppMixedLikelihoods.cpp
+// Created by: Laurent Guéguen
+// Created on: lundi 12 novembre 2012, à 07h 02
+//
+
+/*
+ Copyright or © or Copr. CNRS
+
+ This software is a computer program whose purpose is to estimate
+ phylogenies and evolutionary parameters from a dataset according to
+ the maximum likelihood principle.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+// From the STL:
+#include <iostream>
+#include <iomanip>
+
+using namespace std;
+
+#include <Bpp/App/BppApplication.h>
+#include <Bpp/App/ApplicationTools.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Numeric/Prob/DiscreteDistribution.h>
+#include <Bpp/Numeric/Prob/ConstantDistribution.h>
+#include <Bpp/Numeric/DataTable.h>
+#include <Bpp/Numeric/Matrix/MatrixTools.h>
+#include <Bpp/Numeric/VectorTools.h>
+#include <Bpp/Numeric/AutoParameter.h>
+
+// From SeqLib:
+#include <Bpp/Seq/SiteTools.h>
+#include <Bpp/Seq/Alphabet/Alphabet.h>
+#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SequenceContainerTools.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
+#include <Bpp/Seq/App/SequenceApplicationTools.h>
+
+// From PhylLib:
+#include <Bpp/Phyl/Tree.h>
+#include <Bpp/Phyl/Likelihood.all>
+#include <Bpp/Phyl/PatternTools.h>
+#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
+#include <Bpp/Phyl/OptimizationTools.h>
+#include <Bpp/Phyl/Model.all>
+#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
+#include <Bpp/Phyl/Io/Newick.h>
+
+using namespace bpp;
+
+/******************************************************************************/
+
+void help()
+{
+ (*ApplicationTools::message << "__________________________________________________________________________").endLine();
+ (*ApplicationTools::message << "bppmixedlikelihoods parameter1_name=parameter1_value ").endLine();
+ (*ApplicationTools::message << " parameter2_name=parameter2_value ... param=option_file").endLine();
+ (*ApplicationTools::message).endLine();
+ (*ApplicationTools::message << " Refer to the Bio++ Program Suite Manual for a list of available options.").endLine();
+ (*ApplicationTools::message << "__________________________________________________________________________").endLine();
+}
+
+int main(int args, char** argv)
+{
+ cout << "******************************************************************" << endl;
+ cout << "* Bio++ Computation of site likelihoods inside mixed models *" << endl;
+ cout << "* Author: L. Guéguen Created on: 12/11/12 *" << endl;
+ cout << "******************************************************************" << endl;
+ cout << endl;
+
+ if (args == 1)
+ {
+ help();
+ return 0;
+ }
+
+ try
+ {
+ BppApplication bppmixedlikelihoods(args, argv, "BppMixedLikelihoods");
+ bppmixedlikelihoods.startTimer();
+
+ Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppmixedlikelihoods.getParams(), "", false);
+
+ // get the data
+
+ VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppmixedlikelihoods.getParams());
+
+ VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(*allSites, bppmixedlikelihoods.getParams(), "", true, false);
+ delete allSites;
+
+ ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
+ ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
+
+ // Get the tree
+ Tree* tree = PhylogeneticsApplicationTools::getTree(bppmixedlikelihoods.getParams());
+ ApplicationTools::displayResult("Number of leaves", TextTools::toString(tree->getNumberOfLeaves()));
+
+
+ AbstractDiscreteRatesAcrossSitesTreeLikelihood* tl;
+ string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppmixedlikelihoods.getParams(), "no", "", true, false);
+ ApplicationTools::displayResult("Heterogeneous model", nhOpt);
+
+ MixedSubstitutionModel* model = 0;
+ MixedSubstitutionModelSet* modelSet = 0;
+ DiscreteDistribution* rDist = 0;
+
+ if (nhOpt == "no")
+ {
+ model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppmixedlikelihoods.getParams()));
+ if (model == 0)
+ {
+ cout << "Model is not a Mixed model" << endl;
+ exit(0);
+ }
+
+ SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (model->getNumberOfStates() > model->getAlphabet()->getSize())
+ {
+ // Markov-modulated Markov model!
+ rDist = new ConstantRateDistribution();
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
+ }
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true);
+ }
+ else if (nhOpt == "one_per_branch")
+ {
+ model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppmixedlikelihoods.getParams()));
+ if (model == 0)
+ {
+ cout << "Model is not a Mixed model" << endl;
+ exit(0);
+ }
+
+ SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (model->getNumberOfStates() > model->getAlphabet()->getSize())
+ {
+ // Markov-modulated Markov model!
+ rDist = new ConstantRateDistribution();
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
+ }
+ vector<double> rateFreqs;
+ if (model->getNumberOfStates() != alphabet->getSize())
+ {
+ // Markov-Modulated Markov Model...
+ unsigned int n = (unsigned int)(model->getNumberOfStates() / alphabet->getSize());
+ rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
+ // we should assume a rate distribution for the root also!!!
+ }
+ FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppmixedlikelihoods.getParams(), rateFreqs);
+ vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppmixedlikelihoods.getParams(), ',', "");
+ modelSet = dynamic_cast<MixedSubstitutionModelSet*>(SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters));
+ model = 0;
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
+ }
+ else if (nhOpt == "general")
+ {
+ modelSet = dynamic_cast<MixedSubstitutionModelSet*>(PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppmixedlikelihoods.getParams()));
+ if (modelSet == 0)
+ {
+ cout << "Missing a Mixed model" << endl;
+ exit(0);
+ }
+
+ SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize())
+ {
+ // Markov-modulated Markov model!
+ rDist = new ConstantDistribution(1.);
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
+ }
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
+ }
+ else
+ throw Exception("Unknown option for nonhomogeneous: " + nhOpt);
+ tl->initialize();
+
+ double logL = tl->getValue();
+ if (isinf(logL))
+ {
+ // This may be due to null branch lengths, leading to null likelihood!
+ ApplicationTools::displayWarning("!!! Warning!!! Likelihood is zero.");
+ ApplicationTools::displayWarning("!!! This may be due to branch length == 0.");
+ ApplicationTools::displayWarning("!!! All null branch lengths will be set to 0.000001.");
+ ParameterList pl = tl->getBranchLengthsParameters();
+ for (unsigned int i = 0; i < pl.size(); i++)
+ {
+ if (pl[i].getValue() < 0.000001)
+ pl[i].setValue(0.000001);
+ }
+ tl->matchParametersValues(pl);
+ logL = tl->getValue();
+ }
+ if (isinf(logL))
+ {
+ ApplicationTools::displayError("!!! Unexpected likelihood == 0.");
+ ApplicationTools::displayError("!!! Looking at each site:");
+ for (unsigned int i = 0; i < sites->getNumberOfSites(); i++)
+ {
+ (*ApplicationTools::error << "Site " << sites->getSite(i).getPosition() << "\tlog likelihood = " << tl->getLogLikelihoodForASite(i)).endLine();
+ }
+ ApplicationTools::displayError("!!! 0 values (inf in log) may be due to computer overflow, particularily if datasets are big (>~500 sequences).");
+ exit(-1);
+ }
+
+
+ // Write parameters to screen:
+ ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
+ ParameterList parameters = tl->getSubstitutionModelParameters();
+ for (unsigned int i = 0; i < parameters.size(); i++)
+ {
+ ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
+ }
+ parameters = tl->getRateDistributionParameters();
+ for (unsigned int i = 0; i < parameters.size(); i++)
+ {
+ ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
+ }
+
+
+ // /////////////////////////////////////////////
+ // Getting likelihoods per submodel
+
+ string outputFile;
+ outputFile = ApplicationTools::getAFilePath("output.likelihoods.file", bppmixedlikelihoods.getParams(), true, false);
+ ApplicationTools::displayResult("Output file for likelihoods", outputFile);
+ ofstream out(outputFile.c_str(), ios::out);
+
+ size_t nSites = sites->getNumberOfSites();
+
+ size_t nummodel = ApplicationTools::getParameter<size_t>("likelihoods.model_number", bppmixedlikelihoods.getParams(), 1, "", true, true);
+
+ string parname = ApplicationTools::getStringParameter("likelihoods.parameter_name", bppmixedlikelihoods.getParams(), "", "", true, false);
+
+ if (modelSet && ((nummodel <= 0) || (nummodel > modelSet->getNumberOfModels())))
+ {
+ ApplicationTools::displayError("Bad number of model " + TextTools::toString(nummodel) + ".");
+ exit(-1);
+ }
+
+ MixedSubstitutionModel* p0 = dynamic_cast<MixedSubstitutionModel*>(model ? model : modelSet->getModel(nummodel - 1));
+
+ if (!p0)
+ {
+ ApplicationTools::displayError("Model " + TextTools::toString(nummodel) + " is not a Mixed Model.");
+ exit(-1);
+ }
+
+ bool fromBiblio=false;
+
+ //this is an uglly fix because getMixedModel is private... can't we use clone instead or const everywhere?
+ const AbstractBiblioMixedSubstitutionModel* ptmp = dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(p0);
+ if (ptmp) {
+ p0 = ptmp->getMixedModel().clone();
+ fromBiblio=true;
+ }
+
+
+ // Case of a MixtureOfSubstitutionModels
+
+ MixtureOfSubstitutionModels* pMSM = dynamic_cast<MixtureOfSubstitutionModels*>(p0);
+ if (pMSM)
+ {
+ vector<string> colNames;
+ colNames.push_back("Sites");
+
+ size_t nummod = pMSM->getNumberOfModels();
+ for (unsigned int i = 0; i < nummod; i++)
+ {
+ colNames.push_back(pMSM->getNModel(i)->getName());
+ }
+
+ DataTable* rates = new DataTable(nSites, colNames.size());
+ rates->setColumnNames(colNames);
+
+ for (unsigned int i = 0; i < nSites; i++)
+ {
+ const Site* currentSite = &sites->getSite(i);
+ int currentSitePosition = currentSite->getPosition();
+ (*rates)(i, "Sites") = string("[" + TextTools::toString(currentSitePosition) + "]");
+ }
+
+ Vdouble vprob = pMSM->getProbabilities();
+ for (unsigned int i = 0; i < nummod; i++)
+ {
+ string modname = pMSM->getNModel(i)->getName();
+
+ for (unsigned int j = 0; j < nummod; j++)
+ {
+ pMSM->setNProbability(j, (j == i) ? 1 : 0);
+ }
+
+ if (tl)
+ delete tl;
+
+ if (nhOpt == "no")
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true);
+ else
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true);
+
+ tl->initialize();
+ logL = tl->getValue();
+ Vdouble Vd = tl->getLogLikelihoodForEachSite();
+ for (unsigned int j = 0; j < nSites; j++)
+ {
+ (*rates)(j, modname) = TextTools::toString(Vd[j]);
+ }
+
+ ApplicationTools::displayMessage("\n");
+ ApplicationTools::displayMessage("Model " + modname + ":");
+ ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
+ ApplicationTools::displayResult("Probability", TextTools::toString(vprob[i], 15));
+ }
+
+ DataTable::write(*rates, out, "\t");
+ }
+
+ // Case of a MixtureOfASubstitutionModel
+
+ else
+ {
+ if (fromBiblio)
+ {
+ ApplicationTools::displayError("!!! Not available for models parametrized upon bibliography.");
+ ApplicationTools::displayError("!!! Please convert into MixedModel declaration.");
+ exit(-1);
+ }
+
+ MixtureOfASubstitutionModel* pMSM2 = dynamic_cast<MixtureOfASubstitutionModel*>(p0);
+ if (pMSM2 != NULL)
+ {
+ if (parname == "")
+ {
+ ApplicationTools::displayError("Argument likelihoods.parameter_name is required.");
+ exit(-1);
+ }
+
+ size_t nummod = pMSM2->getNumberOfModels();
+
+ vector<vector<int> > vvnmod;
+ size_t i2 = 0;
+ while (i2 < nummod)
+ {
+ string par2 = parname + "_" + TextTools::toString(i2 + 1);
+ Vint vnmod = pMSM2->getSubmodelNumbers(par2);
+ if (vnmod.size() == 0)
+ break;
+ vvnmod.push_back(vnmod);
+ i2++;
+ }
+
+ size_t nbcl = vvnmod.size();
+
+ Vdouble vprob = pMSM2->getProbabilities();
+
+ vector<vector<double> > vvprob;
+ vector<double> vsprob;
+
+ for (size_t i = 0; i < nbcl; i++)
+ {
+ vector<double> vprob2;
+ for (unsigned int j = 0; j < vvnmod[i].size(); j++)
+ {
+ vprob2.push_back(vprob[vvnmod[i][j]]);
+ }
+
+ vvprob.push_back(vprob2);
+ vsprob.push_back(VectorTools::sum(vvprob[i]));
+ }
+
+ vector<string> colNames;
+ colNames.push_back("Sites");
+
+ Vdouble dval;
+ for (unsigned int i = 0; i < nbcl; i++)
+ {
+ SubstitutionModel* pSM = pMSM2->getNModel(vvnmod[i][0]);
+ double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
+ dval.push_back(valPar);
+ colNames.push_back("Ll_" + parname + "=" + TextTools::toString(valPar));
+ }
+ for (unsigned int i = 0; i < nbcl; i++)
+ {
+ SubstitutionModel* pSM = pMSM2->getNModel(vvnmod[i][0]);
+ double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
+ colNames.push_back("Pr_" + parname + "=" + TextTools::toString(valPar));
+ }
+ colNames.push_back("mean");
+
+ DataTable* rates = new DataTable(nSites, colNames.size());
+ rates->setColumnNames(colNames);
+
+ for (unsigned int i = 0; i < nSites; i++)
+ {
+ const Site* currentSite = &sites->getSite(i);
+ int currentSitePosition = currentSite->getPosition();
+ (*rates)(i,"Sites")=TextTools::toString(currentSitePosition);
+ }
+
+ VVdouble vvd;
+
+ for (unsigned int i = 0; i < nbcl; i++)
+ {
+ string par2 = parname + "_" + TextTools::toString(i + 1);
+ for (unsigned int j = 0; j < nummod; j++)
+ pMSM2->setNProbability(j, 0);
+
+ for (unsigned int j = 0; j < vvprob[i].size(); j++)
+ pMSM2->setNProbability(vvnmod[i][j], vvprob[i][j] / vsprob[i]);
+
+ if (tl)
+ delete tl;
+
+ if (nhOpt == "no")
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true);
+ else
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true);
+
+ tl->initialize();
+ logL = tl->getValue();
+ Vdouble vd = tl->getLogLikelihoodForEachSite();
+
+ for (unsigned int j = 0; j < nSites; j++)
+ (*rates)(j, i + 1) = TextTools::toString(vd[j]);
+
+ vvd.push_back(vd);
+
+ ApplicationTools::displayMessage("\n");
+ ApplicationTools::displayMessage("Parameter " + par2 + ":");
+
+ ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
+ ApplicationTools::displayResult("Probability", TextTools::toString(vsprob[i], 15));
+ }
+
+ for (unsigned int j = 0; j < nSites; j++)
+ {
+ Vdouble vd;
+ for (unsigned int i = 0; i < nbcl; i++)
+ vd.push_back(std::log(vsprob[i])+vvd[i][j]);
+
+ VectorTools::logNorm(vd);
+ for (unsigned int i = 0; i < nbcl; i++)
+ (*rates)(j,nbcl + i + 1) = TextTools::toString(std::exp(vd[i]));
+ (*rates)(j, 2 * nbcl + 1) = TextTools::toString(VectorTools::sumExp(vd, dval));
+ }
+
+ DataTable::write(*rates, out, "\t");
+ }
+ }
+
+ delete alphabet;
+ delete sites;
+ if (model)
+ delete model;
+ if (modelSet)
+ delete modelSet;
+ delete rDist;
+ delete tl;
+ delete tree;
+ ApplicationTools::displayMessage("\n");
+ bppmixedlikelihoods.done();
+ }
+
+ catch (exception& e)
+ {
+ cout << e.what() << endl;
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/bppSuite/bppPars.cpp b/bppSuite/bppPars.cpp
index 2b51d0e..4065f48 100644
--- a/bppSuite/bppPars.cpp
+++ b/bppSuite/bppPars.cpp
@@ -6,7 +6,7 @@
//
/*
-Copyright or © or Copr. CNRS
+Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -53,6 +53,7 @@ using namespace std;
// From SeqLib:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
@@ -79,13 +80,13 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Parsimony Methods, version 0.1.0 *" << endl;
+ cout << "* Bio++ Parsimony Methods, version 0.2.0 *" << endl;
cout << "* Author: J. Dutheil Created 05/05/07 *" << endl;
- cout << "* Last Modif. 08/08/09 *" << endl;
+ cout << "* Last Modif. 13/06/12 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
- if(args == 1)
+ if (args == 1)
{
help();
return 0;
@@ -96,13 +97,16 @@ int main(int args, char ** argv)
BppApplication bpppars(args, argv, "BppPars");
bpppars.startTimer();
- Alphabet * alphabet = SequenceApplicationTools::getAlphabet(bpppars.getParams(), "", false);
+ Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bpppars.getParams(), "", false);
+
+ bool includeGaps = ApplicationTools::getBooleanParameter("use.gaps", bpppars.getParams(), false, "", false, false);
+ ApplicationTools::displayBooleanResult("Use gaps", includeGaps);
- VectorSiteContainer * allSites = SequenceApplicationTools::getSiteContainer(alphabet, bpppars.getParams());
+ VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bpppars.getParams());
- VectorSiteContainer * sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bpppars.getParams());
+ VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bpppars.getParams(), "", true, !includeGaps, true);
delete allSites;
-
+
ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
@@ -124,7 +128,7 @@ int main(int args, char ** argv)
else throw Exception("Unknown init tree method.");
ApplicationTools::displayTask("Initializing parsimony");
- DRTreeParsimonyScore* tp = new DRTreeParsimonyScore(*tree, *sites, false);
+ DRTreeParsimonyScore* tp = new DRTreeParsimonyScore(*tree, *sites, false, includeGaps);
delete tree;
ApplicationTools::displayTaskDone();
double score = tp->getScore();
@@ -143,7 +147,7 @@ int main(int args, char ** argv)
//Bootstrap:
unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bpppars.getParams(), 0);
- if(nbBS > 0)
+ if (nbBS > 0)
{
ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
const Tree* initTree = tree;
@@ -156,7 +160,7 @@ int main(int args, char ** argv)
string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bpppars.getParams(), false, false);
ofstream *out = 0;
- if(bsTreesPath != "none")
+ if (bsTreesPath != "none")
{
ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath);
out = new ofstream(bsTreesPath.c_str(), ios::out);
@@ -185,7 +189,8 @@ int main(int args, char ** argv)
ApplicationTools::displayTask("Compute bootstrap values", true);
TreeTools::computeBootstrapValues(*tree, bsTrees);
ApplicationTools::displayTaskDone();
- for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i];
+ for (unsigned int i = 0; i < nbBS; i++)
+ delete bsTrees[i];
//Write resulting tree:
PhylogeneticsApplicationTools::writeTree(*tree, bpppars.getParams());
@@ -197,7 +202,7 @@ int main(int args, char ** argv)
bpppars.done();
}
- catch(exception & e)
+ catch (exception & e)
{
cout << e.what() << endl;
return 1;
diff --git a/bppSuite/bppPhyloSampler.cpp b/bppSuite/bppPhyloSampler.cpp
index d9e2fa9..9737a4b 100755
--- a/bppSuite/bppPhyloSampler.cpp
+++ b/bppSuite/bppPhyloSampler.cpp
@@ -148,12 +148,12 @@ int main(int args, char ** argv)
//Compute lengths:
vector<string> seqNames;
- vector<unsigned int> seqLen(dist->size());
+ vector<size_t> seqLen(dist->size());
string name;
- for(unsigned int i = 0; i < dist->size(); i++)
+ for(size_t i = 0; i < dist->size(); i++)
{
name = dist->getName(i);
- if(critMeth == "length.complete")
+ if (critMeth == "length.complete")
seqLen[i] = SequenceTools::getNumberOfCompleteSites(seqs->getSequence(name));
else
seqLen[i] = SequenceTools::getNumberOfSites(seqs->getSequence(name));
@@ -206,7 +206,7 @@ int main(int args, char ** argv)
else throw Exception("Unknown criterion: " + critMeth);
//Remove sequence in list:
- unsigned int pos = VectorTools::which(seqNames, dist->getName(rm));
+ size_t pos = VectorTools::which(seqNames, dist->getName(rm));
ApplicationTools::displayResult("Remove sequence", seqNames[pos]);
seqNames.erase(seqNames.begin() + pos);
@@ -239,7 +239,7 @@ int main(int args, char ** argv)
else throw Exception("Unknown criterion: " + critMeth);
//Remove sequence in list:
- unsigned int pos = VectorTools::which(seqNames, dist->getName(rm));
+ size_t pos = VectorTools::which(seqNames, dist->getName(rm));
ApplicationTools::displayResult("Remove sequence", seqNames[pos]);
seqNames.erase(seqNames.begin() + pos);
@@ -252,7 +252,7 @@ int main(int args, char ** argv)
//Write sequences to file:
AlignedSequenceContainer asc(alphabet);
- for(unsigned int i = 0; i < seqNames.size(); i++)
+ for (size_t i = 0; i < seqNames.size(); i++)
asc.addSequence(seqs->getSequence(seqNames[i]));
SequenceApplicationTools::writeAlignmentFile(asc, bppphysamp.getParams());
diff --git a/bppSuite/bppReRoot.cpp b/bppSuite/bppReRoot.cpp
index 5260c6f..a2acf93 100644
--- a/bppSuite/bppReRoot.cpp
+++ b/bppSuite/bppReRoot.cpp
@@ -171,10 +171,10 @@ int main(int args, char ** argv)
vector<string> leavesTree;
leavesTree = (*tree).getLeavesNames();
- unsigned int numNodes = tree->getNumberOfNodes() - 1;
- unsigned int numNodeWithBranchLength = 0;
+ size_t numNodes = tree->getNumberOfNodes() - 1;
+ size_t numNodeWithBranchLength = 0;
vector<Node *> nodes = tree->getNodes();
- for (unsigned int i = 0; i < nodes.size(); i++)
+ for (size_t i = 0; i < nodes.size(); i++)
{
if(nodes[i]->hasDistanceToFather())
numNodeWithBranchLength++;
@@ -187,7 +187,7 @@ int main(int args, char ** argv)
vector<string> outGroup;
bool found = false;
bool analyseOutgroupLevel = true;
- for (unsigned int t = 0; t < levelOutgroup.size() && analyseOutgroupLevel; t++)
+ for (size_t t = 0; t < levelOutgroup.size() && analyseOutgroupLevel; t++)
{
outGroup.clear();
vector<string>::iterator Iterator;
@@ -227,13 +227,13 @@ int main(int args, char ** argv)
{
bool monophylOk = true;
- for(unsigned f = 0; f < newRoot->getNumberOfSons() && monophylOk; f++)
+ for (size_t f = 0; f < newRoot->getNumberOfSons() && monophylOk; f++)
{
tempLeaves = TreeTemplateTools::getLeavesNames(*newRoot->getSon(f));
vector<string> diff;
VectorTools::diff(outGroup, tempLeaves, diff);
- unsigned int difference = diff.size();
+ size_t difference = diff.size();
if (!( (difference == 0) || (difference == tempLeaves.size()) ) )
{
//The proposed outgroup is not monophyletic. The analysis for this tree is interrupted
diff --git a/bppSuite/bppSeqGen.cpp b/bppSuite/bppSeqGen.cpp
index 04c0601..9e22698 100644
--- a/bppSuite/bppSeqGen.cpp
+++ b/bppSuite/bppSeqGen.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or � or Copr. CNRS
+Copyright or � or Copr. Bio++ Development Team
This software is a computer program whose purpose is to simulate sequence
data according to a phylogenetic tree and an evolutionary model.
@@ -55,6 +55,7 @@ using namespace std;
// From SeqLib:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SequenceContainerTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
// From PhylLib:
@@ -62,6 +63,8 @@ using namespace std;
#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
#include <Bpp/Phyl/Simulation.all>
#include <Bpp/Phyl/Model/SubstitutionModelSetTools.h>
+#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
+#include <Bpp/Phyl/Model/FrequenciesSet/MvaFrequenciesSet.h>
#include <Bpp/Phyl/Io/Newick.h>
using namespace bpp;
@@ -126,9 +129,12 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Sequence Generator, version 1.1.0 *" << endl;
- cout << "* Author: J. Dutheil *" << endl;
- cout << "* B. Boussau Last Modif. 08/08/09 *" << endl;
+ cout << "* Bio++ Sequence Generator, version 1.2.0 *" << endl;
+ cout << "* *" << endl;
+ cout << "* Authors: J. Dutheil *" << endl;
+ cout << "* B. Boussau Last Modif. 29/01/13 *" << endl;
+ cout << "* L. Gu�guen *" << endl;
+ cout << "* M. Groussin *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -204,41 +210,70 @@ int main(int args, char ** argv)
{
if(inputTrees == "multiple")
throw Exception("Multiple input trees cannot be used with non-homogeneous simulations.");
- SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, 0, bppseqgen.getParams());
+ SubstitutionModel* model = 0;
+ string modelName = ApplicationTools::getStringParameter("model", bppseqgen.getParams(), "");
+ if (!TextTools::hasSubstring(modelName,"COaLA"))
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, 0, bppseqgen.getParams());
+ else
+ {
+ //COaLA model
+ VectorSiteContainer* allSitesAln = 0;
+ allSitesAln = SequenceApplicationTools::getSiteContainer(alphabet, bppseqgen.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, allSitesAln, bppseqgen.getParams());
+ }
+
vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppseqgen.getParams(), ',', "");
vector<double> rateFreqs;
if (model->getNumberOfStates() != alphabet->getSize())
{
//Markov-Modulated Markov Model...
- unsigned int n =(unsigned int)(model->getNumberOfStates() / alphabet->getSize());
- rateFreqs = vector<double>(n, 1./(double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
+ unsigned int n = static_cast<unsigned int>(model->getNumberOfStates() / alphabet->getSize());
+ rateFreqs = vector<double>(n, 1./static_cast<double>(n)); // Equal rates assumed for now, may be changed later (actually, in the most general case,
// we should assume a rate distribution for the root also!!!
}
FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, 0, bppseqgen.getParams(), rateFreqs);
+ string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", bppseqgen.getParams(), "Full(init=observed)");
+ if (freqDescription.substr(0,10) == "MVAprotein")
+ {
+ dynamic_cast<MvaFrequenciesSet*>(rootFreqs)->setModelName("MVAprotein");
+ dynamic_cast<MvaFrequenciesSet*>(rootFreqs)->initSet(dynamic_cast<CoalaCore*>(model));
+ }
modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, trees[0], globalParameters);
}
//General case:
else if (nhOpt == "general")
{
- if(inputTrees == "multiple")
+ if (inputTrees == "multiple")
throw Exception("Multiple input trees cannot be used with non-homogeneous simulations.");
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, 0, bppseqgen.getParams());
+ string modelName = ApplicationTools::getStringParameter("model1",bppseqgen.getParams(),"");
+ if (!TextTools::hasSubstring(modelName,"COaLA"))
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, 0, bppseqgen.getParams());
+ else
+ {
+ //COaLA model
+ VectorSiteContainer* allSitesAln = 0;
+ allSitesAln = SequenceApplicationTools::getSiteContainer(alphabet, bppseqgen.getParams());
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, allSitesAln, bppseqgen.getParams());
+ }
}
else throw Exception("Unknown non-homogeneous option: " + nhOpt);
- DiscreteDistribution* rDist = 0;
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
+ throw Exception("Non-homogeneous mixed substitution sequence generation not implemented, sorry!");
+
+ DiscreteDistribution* rDist = 0;
NonHomogeneousSequenceSimulator* seqsim = 0;
SiteContainer* sites = 0;
if (infosFile != "none")
{
ifstream in(infosFile.c_str());
DataTable* infos = DataTable::read(in, "\t");
- rDist = new ConstantDistribution(1., true);
- unsigned int nbSites = infos->getNumberOfRows();
+ rDist = new ConstantRateDistribution();
+ size_t nbSites = infos->getNumberOfRows();
ApplicationTools::displayResult("Number of sites", TextTools::toString(nbSites));
vector<double> rates(nbSites);
vector<string> ratesStrings = infos->getColumn(string("pr"));
- for(unsigned int i = 0; i < nbSites; i++)
+ for (size_t i = 0; i < nbSites; i++)
{
rates[i] = TextTools::toDouble(ratesStrings[i]);
}
@@ -287,14 +322,14 @@ int main(int args, char ** argv)
if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize())
{
//Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
+ rDist = new ConstantRateDistribution();
}
else
{
- rDist = PhylogeneticsApplicationTools::getRateDistribution(bppseqgen.getParams());
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppseqgen.getParams());
}
- unsigned int nbSites = ApplicationTools::getParameter<unsigned int>("number_of_sites", bppseqgen.getParams(), 100);
+ size_t nbSites = ApplicationTools::getParameter<size_t>("number_of_sites", bppseqgen.getParams(), 100);
if (trees.size() == 1)
{
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
@@ -308,8 +343,8 @@ int main(int args, char ** argv)
ApplicationTools::displayTask("Perform simulations", true);
ApplicationTools::displayGauge(0, trees.size() - 1, '=');
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
- unsigned int previousPos = 0;
- unsigned int currentPos = static_cast<unsigned int>(round(positions[1]*static_cast<double>(nbSites)));
+ size_t previousPos = 0;
+ size_t currentPos = static_cast<unsigned int>(round(positions[1]*static_cast<double>(nbSites)));
SequenceContainer* tmpCont1 = seqsim->simulate(currentPos - previousPos);
previousPos = currentPos;
delete seqsim;
diff --git a/bppSuite/bppSeqMan.cpp b/bppSuite/bppSeqMan.cpp
index 02f1b63..ea5c6af 100644
--- a/bppSuite/bppSeqMan.cpp
+++ b/bppSuite/bppSeqMan.cpp
@@ -52,6 +52,7 @@ using namespace std;
// From SeqLib:
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/Alphabet/Alphabet.h>
+#include <Bpp/Seq/Alphabet/AlphabetTools.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
#include <Bpp/Seq/Io.all>
@@ -313,7 +314,7 @@ int main(int args, char** argv)
throw Exception("'RemoveColumnsWithStop' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
}
- for (unsigned int i = sites->getNumberOfSites(); i > 0; i--)
+ for (size_t i = sites->getNumberOfSites(); i > 0; i--)
{
if (SiteTools::hasStopCodon(sites->getSite(i-1)))
sites->deleteSite(i - 1);
@@ -331,10 +332,10 @@ int main(int args, char** argv)
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
BasicSequence seq = sequences->getSequence(i);
- unsigned int len = seq.size();
+ size_t len = seq.size();
SequenceTools::getCDS(seq, false, true, true, false);
if (aligned) {
- for (unsigned int c = seq.size(); c < len; ++c)
+ for (size_t c = seq.size(); c < len; ++c)
seq.addElement(seq.getAlphabet()->getGapCharacterCode());
}
sc->addSequence(seq, false);
@@ -390,7 +391,7 @@ int main(int args, char** argv)
if (maxGapOption[maxGapOption.size()-1] == '%')
{
double gapFreq = TextTools::toDouble(maxGapOption.substr(0, maxGapOption.size()-1)) / 100.;
- for (unsigned int i = sites->getNumberOfSites(); i > 0; i--)
+ for (size_t i = sites->getNumberOfSites(); i > 0; i--)
{
map<int, double> freqs;
SiteTools::getFrequencies(sites->getSite(i - 1), freqs);
@@ -399,10 +400,10 @@ int main(int args, char** argv)
}
else
{
- unsigned int gapNum=TextTools::to<unsigned int>(maxGapOption);
- for (unsigned int i = sites->getNumberOfSites(); i > 0; i--)
+ size_t gapNum = TextTools::to<size_t>(maxGapOption);
+ for (size_t i = sites->getNumberOfSites(); i > 0; i--)
{
- map<int, unsigned int> counts;
+ map<int, size_t> counts;
SiteTools::getCounts(sites->getSite(i - 1), counts);
counts[-1]; //Needed in case this entry does not exist in the map. This will set it to 0.
if (counts[-1] > gapNum) sites->deleteSite(i-1);
diff --git a/bppsuite.spec b/bppsuite.spec
new file mode 100644
index 0000000..18933f0
--- /dev/null
+++ b/bppsuite.spec
@@ -0,0 +1,136 @@
+%define _basename bppsuite
+%define _version 0.8.0
+%define _release 1
+%define _prefix /usr
+
+URL: http://home.gna.org/bppsuite/
+
+Name: %{_basename}
+Version: %{_version}
+Release: %{_release}
+License: CECILL-2.0
+Vendor: The Bio++ Project
+Source: http://biopp.univ-montp2.fr/repos/sources/%{_basename}-%{_version}.tar.gz
+Summary: The Bio++ Program Suite
+Group: Productivity/Scientific/Other
+
+Requires: libbpp-phyl9 = 2.1.0
+Requires: libbpp-seq9 = 2.1.0
+Requires: libbpp-core2 = 2.1.0
+
+BuildRoot: %{_builddir}/%{_basename}-root
+BuildRequires: cmake >= 2.6.0
+BuildRequires: gcc-c++ >= 4.0.0
+BuildRequires: groff
+BuildRequires: texinfo >= 4.0.0
+BuildRequires: libbpp-core2 = 2.1.0
+BuildRequires: libbpp-core-devel = 2.1.0
+BuildRequires: libbpp-seq9 = 2.1.0
+BuildRequires: libbpp-seq-devel = 2.1.0
+BuildRequires: libbpp-phyl9 = 2.1.0
+BuildRequires: libbpp-phyl-devel = 2.1.0
+
+
+AutoReq: yes
+AutoProv: yes
+%if 0%{?mdkversion}
+%if 0%{?mdkversion} >= 201100
+BuildRequires: xz
+%define zipext xz
+%else
+BuildRequires: lzma
+%define zipext lzma
+%endif
+%else
+BuildRequires: gzip
+%define zipext gz
+%endif
+
+%description
+Bio++ program suite includes programs:
+ - BppML for maximum likelihood analysis,
+ - BppSeqGen for sequences simulation,
+ - BppAncestor for ancestral states reconstruction,
+ - BppDist for distance methods,
+ - BppPars for parsimony analysis,
+ - BppSeqMan for file conversion and sequence manipulation,
+ - BppConsense for building consensus tree and computing bootstrap values,
+ - BppPhySamp for phylogenetic sampling,
+ - BppReRoot for tree rerooting.
+ - BppTreeDraw for tree drawing.
+ - BppAlnScore for comparing alignments and computing alignment scores.
+ - BppMixedLikelioods for computing the site per site likelihoods of submodels from a mixture model.
+
+%prep
+%setup -q
+
+%build
+CFLAGS="-I%{_prefix}/include $RPM_OPT_FLAGS"
+CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=%{_prefix}"
+if [ %{_lib} == 'lib64' ] ; then
+ CMAKE_FLAGS="$CMAKE_FLAGS -DLIB_SUFFIX=64"
+fi
+if [ %{zipext} == 'lzma' ] ; then
+ CMAKE_FLAGS="$CMAKE_FLAGS -DDOC_COMPRESS=lzma -DDOC_COMPRESS_EXT=lzma"
+fi
+if [ %{zipext} == 'xz' ] ; then
+ CMAKE_FLAGS="$CMAKE_FLAGS -DDOC_COMPRESS=xz -DDOC_COMPRESS_EXT=xz"
+fi
+
+cmake $CMAKE_FLAGS .
+make
+make info
+
+%install
+make DESTDIR=$RPM_BUILD_ROOT install
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%files
+%defattr(-,root,root)
+%doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog
+%{_prefix}/bin/bppml
+%{_prefix}/bin/bppseqgen
+%{_prefix}/bin/bppancestor
+%{_prefix}/bin/bppdist
+%{_prefix}/bin/bpppars
+%{_prefix}/bin/bppseqman
+%{_prefix}/bin/bppconsense
+%{_prefix}/bin/bppphysamp
+%{_prefix}/bin/bppreroot
+%{_prefix}/bin/bpptreedraw
+%{_prefix}/bin/bppalnscore
+%{_prefix}/bin/bppmixedlikelihoods
+%{_prefix}/share/info/bppsuite.info.%{zipext}
+%{_prefix}/share/man/man1/bppml.1.%{zipext}
+%{_prefix}/share/man/man1/bppseqgen.1.%{zipext}
+%{_prefix}/share/man/man1/bppancestor.1.%{zipext}
+%{_prefix}/share/man/man1/bpppars.1.%{zipext}
+%{_prefix}/share/man/man1/bppdist.1.%{zipext}
+%{_prefix}/share/man/man1/bppconsense.1.%{zipext}
+%{_prefix}/share/man/man1/bppseqman.1.%{zipext}
+%{_prefix}/share/man/man1/bppreroot.1.%{zipext}
+%{_prefix}/share/man/man1/bppphysamp.1.%{zipext}
+%{_prefix}/share/man/man1/bpptreedraw.1.%{zipext}
+%{_prefix}/share/man/man1/bppalnscore.1.%{zipext}
+%{_prefix}/share/man/man1/bppmixedlikelihoods.1.%{zipext}
+
+%changelog
+* Fri Mar 08 2013 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.8.0-1
+- New models for proteins (COaLA)
+- New program bppMixedLikelihoods
+* Wed Feb 15 2012 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.7.0-1
+- More models, sequence formats and bugs fixed. New bppAlnScore program.
+* Thu Jun 09 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.6.2-1
+* Mon Feb 28 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.6.1-1
+* Mon Feb 07 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.6.0-1
+* Thu Mar 25 2010 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.5.0-1
+* Wed Jun 10 2009 Julien Dutheil <jdutheil at birc.au.dk> 0.4.0-1
+* Thu Dec 11 2008 Julien Dutheil <jdutheil at birc.au.dk> 0.3.1-1
+* Thu Sep 23 2008 Julien Dutheil <jdutheil at birc.au.dk> 0.3.0-1
+- Initial spec file.
diff --git a/debian/bppsuite.manpages b/debian/bppsuite.manpages
index 008318e..58f176a 100644
--- a/debian/bppsuite.manpages
+++ b/debian/bppsuite.manpages
@@ -8,3 +8,4 @@ man/bppphysamp.1.gz
man/bppreroot.1.gz
man/bppconsense.1.gz
man/bpptreedraw.1.gz
+man/bppmixedlikelihoods.1.gz
diff --git a/debian/changelog b/debian/changelog
index 6516fcc..999f8e9 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+bppsuite (0.8.0-1) unstable; urgency=low
+
+ * New models for proteins (COaLA)
+ * New program bppMixedLikelihoods
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Fri, 08 Mar 2013 11:41:00 +0100
+
bppsuite (0.7.0-1) unstable; urgency=low
* Several program improvements (more models, options, etc.)
diff --git a/debian/control b/debian/control
index f7c1347..a26ceba 100644
--- a/debian/control
+++ b/debian/control
@@ -23,4 +23,5 @@ Description: Bio++ program suite
- BppReRoot for tree rerooting.
- BppTreeDraw for tree drawing.
- BppAlnScore for comparing alignments and computing alignment scores.
+ - BppMixedLikelihoods for computing site per site likelihoods of components of mixture models.
diff --git a/debian/copyright b/debian/copyright
index c5ecbbd..3bdf7c9 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,15 +1,15 @@
This package was debianized by Julien Dutheil <julien.dutheil at univ-montp2.fr> on
-Mon, 28 Feb 2011 09:00:00 +0100.
+Fri, 08 Mar 2013 11:41:00 +0100
It was downloaded from <http://download.gna.org/bppsuite/source>
-Upstream Author(s):
+Upstream Author:
Julien Dutheil <julien.dutheil at univ-montp2.fr>
Copyright:
- Copyright (C) 2011 Bio++ Development Team
+ Copyright (C) 2013 Bio++ Development Team
License:
@@ -27,7 +27,7 @@ License:
along with this package; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-The Debian packaging is (C) 2011, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
+The Debian packaging is (C) 2013, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
The provided software is distributed under the CeCILL license:
diff --git a/debian/rules b/debian/rules
index 2270004..14221ee 100755
--- a/debian/rules
+++ b/debian/rules
@@ -64,16 +64,24 @@ clean:
# Add here commands to clean up after the build process.
[ ! -f Makefile ] || $(MAKE) clean;
[ ! -f Makefile ] || rm Makefile;
- [ ! -f Makefile ] || rm bppSuite/Makefile;
+ [ ! -f bppSuite/Makefile ] || rm bppSuite/Makefile;
+ [ ! -f doc/Makefile ] || rm doc/Makefile;
+ [ ! -f man/Makefile ] || rm man/Makefile;
+ rm -f man/*.gz;
rm -f config.sub config.guess
rm -f build-stamp
rm -f CMakeCache.txt
rm -f *.cmake
rm -f bppSuite/*.cmake
#rm -f test/*.cmake
+ rm -f man/*.cmake
+ rm -f doc/*.cmake
rm -rf CMakeFiles
rm -rf bppSuite/CMakeFiles
#rm -rf test/CMakeFiles
+ rm -rf man/CMakeFiles
+ rm -rf doc/CMakeFiles
+ rm -rf doc/bppsuite.info
rm -rf _CPack_Packages
#rm -rf Testing
#rm -f DartConfiguration.tcl
diff --git a/doc/bppsuite.texi b/doc/bppsuite.texi
index 48af52f..b8f527b 100644
--- a/doc/bppsuite.texi
+++ b/doc/bppsuite.texi
@@ -21,7 +21,7 @@
@c %**end of header
@copying
-This is the manual of the Bio++ Program Suite, version 0.6.0.
+This is the manual of the Bio++ Program Suite, version 0.7.0.
Copyright @copyright{} 2007, 2008, 2009, 2010, 2011, 2012 Bio++ development team
@end copying
@@ -60,16 +60,37 @@ Common options encountered in several programs.
* Sequences:: Loading sequences/alignments.
* Tree:: Loading trees.
+* AlphabetIndex:: Setting alphabet indexes.
* Model:: Setting up a substitution model.
+* Distribution:: Setting of the discrete distributions.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
* WritingTrees:: Writing trees to files.
+Model specification
+
+* Declaration:: Numerous declarations of models.
+* Non-homogeneity:: Specific declaration of non-homogeneous modelling.
+* FrequenciesSet:: Frequencies
+* Rates:: Rates across sites
+
+Setting up the substitution model
+
+* Nucleotide:: Nucleotide models
+* Protein:: Protein models
+* Miscellaneous:: Miscellaneous models
+* Codon:: Codon models
+* Multiple:: General multiple site models
+* Meta:: Meta models
+* Mixture:: Mixture of models
+* Linking:: Linking parameters
+
Bio++ Program Suite Reference
* bppml:: Bio++ Maximum Likelihood.
* bppseqgen:: Bio++ Sequence Generator.
* bppancestor:: Bio++ Ancestral Sequences and Rates reconstruction.
+* bppmixedlikelihoods:: Bio++ Site-Likelihoods Inside Mixed Models.
* bppdist:: Bio++ Distance Methods.
* bpppars:: Bio++ Maximum Parsimony.
* bppconsense:: Bio++ Consensus Trees.
@@ -284,7 +305,9 @@ data=LSU
@menu
* Sequences:: Loading sequences/alignments.
* Tree:: Loading trees.
+* AlphabetIndex::
* Model:: Setting up a substitution model.
+* Distribution:: Setting of the discrete distributions.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
* WritingTrees:: Writing trees to files.
@@ -377,11 +400,19 @@ of sequence or a percentage.
Sites not matching the criterion will not be included in the analysis, but the
original site numbering will be used in the output files (if relevant).
+ at item input.sequence.max_unresolved_allowed=100%
+This option only works if the program requires an alignment. Only
+works when the @option{all} option is selected. It specifies the
+maximum amount of unresolved states per site, as a number of sequence
+or a percentage. Sites not matching the criterion will not be included
+in the analysis, but the original site numbering will be used in the
+output files (if relevant).
+
@end table
@c ------------------------------------------------------------------------------------------------------------------
- at node Tree, Model, Sequences, Common
+ at node Tree, AlphabetIndex, Sequences, Common
@section Reading trees
@table @command
@@ -393,6 +424,15 @@ The format of the input tree file.
@end table
+In case the input tree does not specify node identifiers, some will be generated automatically.
+Nodes identifiers can be outputed using the following option:
+ at table @command
+ at item output.tree_ids.file = @{@{path@}|none@}
+A tree file in newick format, with node ids instead of bootstrap
+values, and leaf names with their id as suffix.
+ at end table
+In case it is supported by the program, the use of that option will cause the program to exit just after producing the tagged tree.
+
Some programs may require that your file contains several trees.
The corresponding options are then:
@@ -405,12 +445,70 @@ The format of the input tree file.
@end table
+ at c ------------------------------------------------------------------------------------------------------------------
+
+ at node AlphabetIndex, Model, Tree, Common
+ at section Specifying alphabet indexes
+
+Some methods require an "alphabet index" to be specified.
+Alphabet indexes associate a value with each alphabet state (Index1, e.g. a biochemical property) or for a pair of states (Index2, e.g. a biochemical distance).
+This section describes the supported indexes:
+
+ at subsection Index1
+
+ at table @command
+ at item None
+If no index should be used.
+ at item Surface, Mass, Volume, Charge @{AA@}
+Basic amino acids properties.
+ at item GranthamPolarity, GranthamVolume @{AA@}
+Grantham's polarity and volume index.
+ at item KleinCharge @{AA@}
+Klein's charge.
+ at item ChouFasmanAHelix, ChouFasmanBSheet, ChouFasmanTurn @{AA@}
+Chou and Fasmani score for secondary structure prediction.
+ at item ChenGuHuangHydrophobicity @{AA@}
+Hydrophobicity according to Chen, Gu and Huang.
+ at item SEALow, SEAMedium, SEAHigh @{AA@}
+Solvent Exposed Area, percent of amino acids having a SEA below 10, between 10 and 30, or higher than 30, respectively.
+ at item User
+A user defined Index1, from a file in the AAIndex1 syntax. The input file is specified using the @command{file=@{path@}} argument.
+ at command{file}
+
+ at end table
+
+
+ at subsection Index2
+
+ at table @command
+ at item None
+If no index should be used.
+ at item Blosum50 @{AA@}
+The BLOSUM 50 amino acid distance matrix.
+ at item Grantham, Miyata @{AA@}
+Two biochemical distance matrices. Both accept an optional argument @command{symmetrical=@{boolean@}} allowing to specify if the matrix should be symmetric or not. If not, the distance measure will be signed.
+ at item Diff
+Allow to compute a distance matrix by taking the difference for, each pair of state, of an Index1 value.
+The Index1 to use is specified using the @command{index1=@{Index1 description@}} argument. An additional argument allow to specify whether the resulting matrix should be symetric (@command{symmetrical=@{boolean@}}):
+ if so, the absolute difference will be used. Alternatively, the distance will be signed and d[i,j] = - d[j,i].
+ at item User
+A user defined Index2, from a file in the AAIndex2 syntax. The input file is specified using the @command{file=@{path@}} argument.
+The @command{symmetrical=@{boolean@}} argument can be used to specify whether distances should be signed or not.
+ at end table
+
@c ------------------------------------------------------------------------------------------------------------------
- at node Model, Estimation, Tree, Common
+ at node Model, Distribution, AlphabetIndex, Common
@section Model specification
+ at menu
+* Declaration:: Numerous declarations of models.
+* Non-homogeneity:: Specific declaration of non-homogeneous modelling.
+* FrequenciesSet:: Frequencies
+* Rates:: Rates across sites
+ at end menu
+
The substitution model specification over the tree is set up in different parts.
@table @command
@item nonhomogeneous = @{no|one_per_branch|general@}
@@ -426,8 +524,20 @@ also non-homogeneous.
In combination with those models, one can also specify a distribution of site-specific rate.
+ at node Declaration, Non-homogeneity, Model, Model
@subsection Setting up the substitution model
+ at menu
+* Nucleotide:: Nucleotide models
+* Protein:: Protein models
+* Miscellaneous:: Miscellaneous models
+* Codon:: Codon models
+* Multiple:: General multiple site models
+* Meta:: Meta models
+* Mixture:: Mixture of models
+* Linking:: Linking parameters
+ at end menu
+
@table @command
@item model = @{model description@}
@@ -465,105 +575,152 @@ the frequencies are computed from observed data.
@end table
+ at node Nucleotide, Protein, Declaration, Declaration
@subsubsection Nucleotide models
@table @command
@item JC69
The Jukes and Cantor model. This model has no additional parameter.
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1JCnuc.html#_details, Bio++ description}.
@item K80([kappa=@{real>0@}])
The Kimura 2 parameters model. @var{kappa} is the transition over
-transversion ratio. Default: @var{kappa}=1
+transversion ratio. Default: @var{kappa}=1. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1K80.html#_details, Bio++ description}.
+
@item F84([kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@},theta2=@{real]0,1[@} ,"equilibrium frequencies"] )
Felsenstein's 1984 substitution model, with transition/transversion
ratio and 4 distinct equilibrium frequencies, set using three
independent parameters: @var{theta} is the GC content, @var{theta1} is
the proportion of G / (G + C) and @var{theta2} is the proportion of A
-/ (A + T or U).
+/ (A + T or U). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1F84.html#_details, Bio++ description}.
+
@item HKY85([kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
Hasegawa, Kishino and Yano 1985's substitution model. The model is
similar to @command{F84}, but with a different implementation. The
@var{kappa} parameter used here is comparable to the one in
- at command{K80}.
+ at command{K80}. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1HKY85.html#_details, Bio++ description}.
+
@item T92([kappa=@{real>0@}, theta=@{real]0,1[@} ,"equilibrium frequencies"])
Tamura 1992's model for nucleotides, similar to @command{HKY85}, yet
-assuming that the frequencies of A = T/U and G = C.
+assuming that the frequencies of A = T/U and G = C. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1T92.html#_details, Bio++ description}.
@item TN93([kappa1=@{real>0@}, kappa2=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
Tamura and Nei 1993's model, similar to @command{HKY85}, but allowing
-for two distinct transition/transversion ratios.
+for two distinct transition/transversion ratios. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1TN93.html#_details, Bio++ description}.
+
@item GTR([a=@{real>0@}, b=@{real>0@}, c=@{real>0@}, d=@{real>0@}, e=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
The General Time-Reversible substitution model. Parameters @var{a},
@var{b}, @var{c}, @var{d}, @var{e} are the entries of the
-exchangeability matrix.
+exchangeability matrix. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1GTR.html#_details, Bio++ description}.
+
@item L95([beta=@{real>0@}, gamma=@{real>0@}, delta=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
-The strand-symmetric model of Lobry 1995, for nucleotides.
+The strand-symmetric model of Lobry 1995, for nucleotides. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1L95.html#_details, Bio++ description}.
+
@end table
+ at node Protein, Miscellaneous, Nucleotide, Declaration
@subsubsection Protein models
@table @command
@item JC69
-The Jukes and Cantor model. This model has no additional parameter.
+The Jukes and Cantor model. This model has no additional parameter. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1JCprot.html#_details, Bio++ description}.
+
@item DSO78
-Protein substitution model, using the dcmutt implementation of Kosiol and Goldman 2005.
+Protein substitution model, using the dcmutt implementation of Kosiol
+and Goldman 2005. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1DSO78.html#_details, Bio++ description}.
+
@item JTT92
-Protein substitution model, using the dcmutt implementation of Kosiol and Goldman 2005.
+Protein substitution model, using the dcmutt implementation of Kosiol
+and Goldman 2005. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1JTT92.html#_details, Bio++ description}.
+
@item WAG01
-Protein substitution model, from Whelan & Goldman 2001.
+Protein substitution model, from Whelan & Goldman 2001. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1WAG01.html#_details, Bio++ description}.
+
@item LG08
-Protein substitution model, from Le & Gascuel 2008.
+Protein substitution model, from Le & Gascuel 2008. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LG08.html#_details, Bio++ description}.
+
@item LLG08_EX2([relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
-the meaning of the variables in the Mixture model below.
+the meaning of the variables in the Mixture model below. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LLG08__EX2.html#_details, Bio++ description}.
+
@item LLG08_EX3([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
-the meaning of the variables in the Mixture model below.
+the meaning of the variables in the Mixture model below. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LLG08__EX3.html#_details, Bio++ description}.
+
@item LLG08_EHO([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
-the meaning of the variables in the Mixture model below.
+the meaning of the variables in the Mixture model below. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LLG08__EHO.html#_details, Bio++ description}.
+
@item LLG08_UL2([relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
-the meaning of the variables in the Mixture model below.
+the meaning of the variables in the Mixture model below. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LLG08__UL2.html#_details, Bio++ description}.
+
@item LLG08_UL3([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
-the meaning of the variables in the Mixture model below.
+the meaning of the variables in the Mixture model below. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LLG08__UL3.html#_details, Bio++ description}.
+
@item DSO78+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ... ,"equilibrium frequencies"])
Protein substitution model, using the dcmutt implementation of Kosiol
and Goldman 2005 and free equilibrium frequencies. The @var{thetaX}
are frequencies parameters, where X is 1 to 19. Parameter @var{theta1}
is the proportion of A, @var{theta2} is the proportion of R over
-(1-A), @var{theta3} the proportion of N over (1-A-R), etc.
+(1-A), @var{theta3} the proportion of N over (1-A-R), etc. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1DSO78.html#_details, Bio++ description}.
+
@item JTT92+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, using the dcmutt implementation of Kosiol
-and Goldman 2005 and free equilibrium frequencies.
+and Goldman 2005 and free equilibrium frequencies. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1JTT92.html#_details, Bio++ description}.
+
@item WAG01+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, from Whelan & Goldman 2001, and free
-equilibrium frequencies.
+equilibrium frequencies. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1WAG01.html#_details, Bio++ description}.
+
@item LG08+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, from Le & Gascuel 2008, and free
-equilibrium frequencies.
+equilibrium frequencies. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1LG08.html#_details, Bio++ description}.
+
@item Empirical(name=@{chars@}, file=@{path@})
Build a protein substitution model from a file in PAML format, and use
@@ -576,16 +733,20 @@ namespace, including for frequencies.
@end table
+ at node Miscellaneous, Codon, Protein, Declaration
@subsubsection Miscellaneous models
@table @command
@item Binary([kappa=@{real>0@} ,"equilibrium frequencies"])
Build the model on binary alphabet, where @var{kappa} is the relative
proportion of 1 over 0 in the equilibrium distribution. Default:
- at var{kappa}=1.
+ at var{kappa}=1. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1BinarySubstitutionModel.html#_details, Bio++ description}.
+
@end table
+ at node Codon, Multiple, Miscellaneous, Declaration
@subsubsection Codon models
Standard codon models: the optional @var{genetic_code} argument
@@ -594,11 +755,11 @@ the alphabet is used. The several values available are described
below.
@itemize
- at item EchinodermMitochondrialGeneticCode
- at item InvertebrateMitochondrialGeneticCode
- at item StandardGeneticCode
- at item VertebrateMitochondrialGeneticCode
- at item YeastMitochondrialGeneticCode
+ at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1EchinodermMitochondrialGeneticCode.html#_details, EchinodermMitochondrialGeneticCode}
+ at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1InvertebrateMitochondrialGeneticCode.html#_details,InvertebrateMitochondrialGeneticCode}
+ at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1StandardGeneticCode.html#_details, StandardGeneticCode}
+ at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1VertebrateMitochondrialGeneticCode.html#_details, VertebrateMitochondrialGeneticCode}
+ at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1YeastMitochondrialGeneticCode.html#_details, YeastMitochondrialGeneticCode}
@end itemize
The next codon models also take as argument a @var{frequencies} option
@@ -613,6 +774,20 @@ similar to the ones used in the PAML software:
@item F61: free equilibrium frequencies, stop codons set to 0.
@end itemize
+An optional option @var{mgmtStopCodon} can be set to define how the
+frequencies computed to stop codons in the case of F1X4 et F3X4 are
+distributed to other codons.
+
+ at itemize
+ at item uniform : each stop frequency is distributed evenly
+ at item linear : each stop frequency is distributed to the neighbour
+codons (ie 1 substitution away), in proportion to each target codon
+frequency.
+ at item quadratic (default): each stop frequency is distributed to the
+neighbour codons (ie 1 substitution away), in proportion to the square
+of each target codon frequency.
+ at end itemize
+
The same words can be used to specify root frequencies for codon
models, in the case of non stationarity.
@@ -620,30 +795,42 @@ models, in the case of non stationarity.
@item GY94([genetic_code=@{genetic code description@}, kappa=@{real>0@}, V=@{real>0@}, "equilibrium frequencies"])
Goldman and Yang (1994) substitution model for codons (default values:
- at var{kappa}=1 and @var{V}=10000).
+ at var{kappa}=1 and @var{V}=10000). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1GY94.html#_details, Bio++ description}.
+
@item MG94([genetic_code=@{genetic code descrition@}, rho=@{real>0@}, "equilibrium frequencies"])
Muse and Gaut (1994) substitution model for codons (default values:
- at var{rho}=1).
+ at var{rho}=1). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1MG94.html#_details, Bio++ description}.
+
@item YN98([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}, "equilibrium frequencies"])
Yang and Nielsen (1998) substitution model for codons (default values:
- at var{kappa}=1 and @var{omega}=1).
+ at var{kappa}=1 and @var{omega}=1). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YN98.html#_details, Bio++ description}.
+
@item YNGKP_M0([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}, "equilibrium frequencies"])
-The M0 model of PAML, ie the same as YN98.
+The M0 model of PAML, ie the same as YN98. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YN98.html#_details, Bio++ description}.
+
@item YNGKP_M1([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega=@{real>0@}, p0=@{real>0 and <1 @}, "equilibrium frequencies"])
The M1a model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000) (default values: @var{kappa}=1, @var{p0}=0.5,
- at var{omega}=0.5).
+ at var{omega}=0.5). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YNGKP__M1.html#_details, Bio++ description}.
+
@item YNGKP_M2([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega0=@{real>0 and <1@}, theta1=@{real>0 and <1 @}], omega1=@{real>1@}, theta2=@{real>0 and <1 @}, "equilibrium frequencies"])
The M2a model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with p0=theta1 and
p1=(1-theta1)*theta2 (default values: @var{kappa}=1, @var{theta1}=0.33333,
- at var{theta2}=0.5, @var{omega0}=0.5, @var{omega2}=0.5).
+ at var{theta2}=0.5, @var{omega0}=0.5, @var{omega2}=0.5). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YNGKP__M2.html#_details, Bio++ description}.
+
@item YNGKP_M3([genetic_code=@{genetic code description@}, n=@{integer>0@}, kappa=@{real>0@}, omega0=@{real>0 and <1@}, delta1=@{real>0@}, ..., delta at var{n-1}=@{real>0@}, theta1=@{real>0 and <1 @}, ..., theta at var{n-1}1=@{real>0 and <1 @}, "equilibrium frequencies"])
@@ -651,20 +838,27 @@ The M3 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with @var{n} discrete values, with p0=theta1
and pk=(1-theta1)*...*(1-thetak)*theta(k+1), and
omegak=omega0+delta1+....+deltak (default values: @var{n}=3,
- at var{kappa}=1, @var{thetak}=1/(n-k+1), @var{omega0}=0.5, @var{deltak}=0.5).
+ at var{kappa}=1, @var{thetak}=1/(n-k+1), @var{omega0}=0.5,
+ at var{deltak}=0.5). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YNGKP__M3.html#_details, Bio++ description}.
+
@item YNGKP_M7(n=@{integer>0@}, genetic_code=@{genetic code description@},kappa=@{real>0@}, p=@{real>1@}, q=@{real>1 @}, "equilibrium frequencies"])
The M7 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with the Beta distribution discretized in @var{n}
-classes (default values: @var{kappa}=1, @var{p}=2, @var{q}=2).
+classes (default values: @var{kappa}=1, @var{p}=2, @var{q}=2). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YNGKP__M7.html#_details, Bio++ description}.
+
@item YNGKP_M8(n=@{integer>0@}, [genetic_code=@{genetic code description@},kappa=@{real>0@}, omegas=@{real>1@}, p0=@{real>0@},p=@{real>1@}, q=@{real>1 @}, "equilibrium frequencies"])
The M8 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with the Beta distribution discretized in @var{n}
classes (default values: @var{kappa}=1, @var{p}=2, @var{q}=2,
- at var{p0}=0.5, @var{omegas}=2).
+ at var{p0}=0.5, @var{omegas}=2). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YNGKP__M8.html#_details, Bio++ description}.
+
@end table
@@ -729,11 +923,14 @@ model=CodonRate(model1=T92(theta=0.5, kappa=2), \
model2=T92(theta=0.4, kappa=2), model3=JC69)
@end example
- at item CodonDistance(model=@{model name@}[, genetic_code=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonRateSubstitutionModel.html#_details, Bio++ description}.
+
+ at item CodonDist(model=@{model name@}[, genetic_code=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
or
- at item CodonDistance(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+ at item CodonDist(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
Substitution model on codons that takes into account the difference
between synonymous and non-synonymous substitutions.
@@ -743,24 +940,28 @@ substitution rate and synonymous substitution rate. Default value: 1.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonDistance(model=T92)
+model=CodonDist(model=T92)
@end example
builds a model on codons, such all sites follow the same T92 model.
-The parameters names are @var{CodonDistance.123_T92.kappa} and
- at var{CodonDistance.beta}.
+The parameters names are @var{CodonDist.123_T92.kappa} and
+ at var{CodonDist.beta}.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonDistance(model1=T92, model2=T92, model3=JC69)
+model=CodonDist(model1=T92, model2=T92, model3=JC69)
@end example
builds a model on codons, such that first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
-parameters names are @var{CodonDistance.1_T92.kappa},
- at var{CodonDistance.2_T92.kappa}, @var{CodonDistance.beta}.
+parameters names are @var{CodonDist.1_T92.kappa},
+ at var{CodonDist.2_T92.kappa}, @var{CodonDist.beta}.
+
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonDistanceSubstitutionModel.html#_details, Bio++ description}.
- at item CodonRateFrequencies(model=@{model name@}, frequencies=@{frequencies set description@}[, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+
+ at item CodonRateFreq(model=@{model name@}, frequencies=@{frequencies set description@}[, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
or
- at item CodonRateFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+ at item CodonRateFreq(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
Substitution model on codons with position specific evolution rates,
@@ -784,19 +985,22 @@ of each site is 1/3.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonRateFrequencies(frequencies=Full())
+model=CodonRateFreq(frequencies=Full())
@end example
-has parameters @var{CodonRateFrequencies.123_K80.kappa},
- at var{CodonRateFrequencies.Full.theta_1}, ...,
- at var{CodonRateFrequencies.Full.theta_60},
- at var{CodonRateFrequencies.relrate1},
- at var{CodonRateFrequencies.relrate2}.
+has parameters @var{CodonRateFreq.123_K80.kappa},
+ at var{CodonRateFreq.Full.theta_1}, ...,
+ at var{CodonRateFreq.Full.theta_60},
+ at var{CodonRateFreq.relrate1},
+ at var{CodonRateFreq.relrate2}.
+
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonRateFrequenciesSubstitutionModel.html#_details, Bio++ description}.
- at item CodonDistanceFrequencies(model=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+ at item CodonDistFreq(model=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
or
- at item CodonDistanceFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+ at item CodonDistFreq(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
Substitution model on codons that takes into account the difference
between synonymous and non-synonymous substitutions. Moreover, the
@@ -816,18 +1020,21 @@ substitution rate and synonymous substitution rate. Default value: 1.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonDistanceFrequencies(frequencies=Full())
+model=CodonDistFreq(frequencies=Full())
@end example
-has parameters @var{CodonDistanceFrequencies.012_T92.kappa},
- at var{CodonDistanceFrequencies.Full.theta_1}, ...,
- at var{CodonDistanceFrequencies.Full.theta_60},
- at var{CodonDistanceFrequencies.beta}.
+has parameters @var{CodonDistFreq.012_T92.kappa},
+ at var{CodonDistFreq.Full.theta_1}, ...,
+ at var{CodonDistFreq.Full.theta_60},
+ at var{CodonDistFreq.beta}.
- at item CodonDistancePhaseFrequencies(model=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonDistanceFrequenciesSubstitutionModel.html#_details, Bio++ description}.
-or
+ at item CodonDistPhasFreq(model=@{model name@}, frequencies=@{frequencies set description@} [, geneticcode=@{genetic code description@}, beta=@{real>0@}])
+
+or
- at item CodonDistancePhaseFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+ at item CodonDistPhasFreq(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [, geneticcode=@{genetic code description@}, beta=@{real>0@}])
Substitution model on codons that takes into account the difference
between synonymous and non-synonymous substitutions. Moreover, the
@@ -835,7 +1042,7 @@ sustitution rates are multiplied by the product of the frequencies of
the changed nucleotides -- conditioned on the phase -- in the given
frequencies set.
-This model should be used with nucleotidic models which equilibrium
+This model should be used with nucleotidic models in which equilibrium
distribution is fixed, ans does not depend on the parameters.
Otherwise there may be problems of identifiability of the parameters.
@@ -846,10 +1053,41 @@ below.
Optional argument @var{beta} is the ratio between non-synonymous
substitution rate and synonymous substitution rate. Default value: 1.
- at end table
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonDistancePhaseFrequenciesSubstitutionModel.html#_details, Bio++ description}.
+
+ at item CodonDistFitPhasFreq(model=@{model name@}, frequencies=@{frequencies set description@}, fitness=@{frequencies set description@} [, geneticcode=@{genetic code description@}, beta=@{real>0@}])
+
+or
+
+ at item CodonDistFitPhasFreq(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@}, fitness=@{frequencies set description@} [, geneticcode=@{genetic code description@}, beta=@{real>0@}])
+
+Substitution model on codons that takes into account the difference
+between synonymous and non-synonymous substitutions and the difference
+between synonymous codons, in the same manner as in Yang and Nielsen's
+2008 substitution model. The sustitution rates are multiplied by the
+product of the frequencies of the changed nucleotides -- conditioned
+on the phase -- in the given frequencies set, and by ratios of
+fitnesses of the codons.
+
+This model should be used with nucleotidic models in which equilibrium
+distribution is fixed, ans does not depend on the parameters.
+Otherwise there may be problems of identifiability of the parameters.
+The multiplicative distribution of the model is described by the
+ at var{frequencies} and @var{fitness} arguments. See the description of
+the Frequencies Set below.
+
+Optional argument @var{beta} is the ratio between non-synonymous
+substitution rate and synonymous substitution rate. Default value: 1.
+
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1CodonDistanceFitnessPhaseFrequenciesSubstitutionModel.html#_details, Bio++ description}.
+
+ at end table
+ at node Multiple, Meta, Codon, Declaration
@subsubsection General multiple site models
@table @command
@@ -900,6 +1138,9 @@ site follows a HKY85 model. Then the parameters names are
@var{Word.4_HKY85.theta1}, @var{Word.4_HKY85.theta2},
@var{Word.relrate1}, @var{Word.relrate2}, @var{Word.relrate3}.
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1WordSubstitutionModel.html#_details, Bio++ description}.
+
@item Triplet(model=@{model description@} [, relrate1=@{real>0@}, relrate2=@{real>0@}])
or
@item Triplet(model1=@{model description@}, model2=@{model description@}, model3=@{model description@}[, relrate1=@{real>0@}, relrate2=@{real>0@}])
@@ -926,7 +1167,7 @@ of each site is 1/3.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=Word(model=T92)
+model=Triplet(model=T92)
@end example
builds a model on codons, such all sites follow the same T92 model.
The parameters names are @var{Triplet.123_T92.kappa},
@@ -942,10 +1183,28 @@ parameters names are @var{Triplet.1_T92.kappa},
@var{Triplet.2_T92.kappa}, @var{Triplet.relrate1},
@var{Triplet.relrate2}.
- at end table
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1TripletSubstitutionModel.html#_details, Bio++ description}.
+
+ at item YpR_Sym(model=@{model description@}, [rCgT=@{real>=0@}, rTgC=@{real>=0@}, rCaT=@{real>=0@}, rTaC=@{real>=0@}])
+
+substitution model on quotiented triplets to handle strand symetric
+neighbour-dependency inside dinucleotides YpR (see Bérard and Guéguen
+2012). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YpR_SymSubstitutionModel.html#_details, Bio++ description}.
+
+ at item YpR_Gen(model=@{model description@}, [rCgT=@{real>=0@}, rcGA=@{real>=0@}, rTgC=@{real>=0@}, rtGA=@{real>=0@}, rCaT=@{real>=0@}, rcAG=@{real>=0@}, rTaC=@{real>=0@}, rtAG=@{real>=0@}])
+substitution model on quotiented triplets to handle general symetric
+neighbour-dependency inside dinucleotides YpR (see Bérard and Guéguen
+2012). See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1YpR_GenSubstitutionModel.html#_details, Bio++ description}.
+
+ at end table
+
+ at node Meta, Mixture, Multiple, Declaration
@subsubsection Meta models
These substitution models take as argument another substitution model, and add several parameters.
@@ -953,36 +1212,72 @@ These substitution models take as argument another substitution model, and add s
@table @command
@item TS98(model=@{model description@}, s1=@{real>0@}, s2=@{real>0@} [, "equilibrium frequencies"])
-Tuffley and Steel 1998's 'covarion' model, taking a nested substitution model as argument for @var{model}.
-The nested model can be any substitution model for any alphabet.
+Tuffley and Steel 1998's 'covarion' model, taking a nested
+substitution model as argument for @var{model}. The nested model can
+be any substitution model for any alphabet. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1TS98.html#_details, Bio++ description}.
+
@item G01(model=@{model description@}, rdist=@{rate distribution description@}, mu=@{real>0@} [, "equilibrium frequencies"])
-Galtier 2001's 'covarion' model, taking a nested substitution model as argument for @var{model} and a rate distribution for parameter @var{rdist} (see below).
-The nested model can be any substitution model for any alphabet.
+Galtier 2001's 'covarion' model, taking a nested substitution model as
+argument for @var{model} and a rate distribution for parameter
+ at var{rdist} (see below). The nested model can be any substitution
+model for any alphabet. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1G01.html#_details, Bio++ description}.
+
@item RE08(model=@{model description@}, lambda=@{real>0@}, mu=@{real>0@} [, "equilibrium frequencies"])
-Rivas and Eddy 2008's substitution model with gaps, taking a nested substitution model as argument for @var{model}.
-Parameter @var{lambda} is the insertion rate, while @var{mu} is the deletion rate.
+Rivas and Eddy 2008's substitution model with gaps, taking a nested
+substitution model as argument for @var{model}. Parameter @var{lambda}
+is the insertion rate, while @var{mu} is the deletion rate. See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1RE08.html#_details, Bio++ description}.
+
@end table
+ at node Mixture, Linking, Meta, Declaration
@subsubsection Mixture of models
@table @command
-Mixed models combine any substitution models with a priori
-distribution of parameters. We call submodels all the models that are
-mixed in the mixture.
+Mixed models are sometimes called "site models".
+
+Mixed models combine substitution models with respective
+probabilities. We call submodels all the models that are mixed in the
+mixture. A Mixed model is either the mixture of several predefined
+models, or based on a "simple" model in which some parameters follow
+given distributions.
During the likelihood computation process, all the submodels of the
-mixture are successively applied on the branches, and the mean of all
-the likelihoods is computed. With nonhomogeneous reconstruction, since
-a mixed model is a random variable, affecting a mixed model to a set
-of branches means that all these branches are dependent, and in this
-case a site follows the same submodels in all the branches of the set
-that support this mixed model. Moreover, it is possible to define
-paths that define dependencies between submodels of different
-mixtures (see below).
+mixture are successively applied on the branches, and the mean (see
+below) of all the likelihoods is computed.
+
+A site can follow given paths all along the tree, with given
+probabilities.
+
+In homogeneous reconstruction, a path corresponds to a same submodel
+on all the branches, in a stationary condition. The probability of a
+path is the probability of its submodel. Given a site follows a path,
+a likelihood can be computed; and the overall likelihood on this site
+is the mean of these likelihoods (given the probabilities of the
+paths). This means that the root distribution is a mixture of the
+equilibrium distributions of the submodels.
+
+With nonhomogeneous reconstruction, several models are applied on the
+tree, some models are mixed, some are not. A path is a vector which
+size is the number of mixed models (see below for more details and the
+declaration of paths).
+
+Since the attribution of a submodel from a mixed model to a given site
+is a unique random variable, affecting the same mixed model to a set
+of branches S means that the attribution to this site is the same on
+all the branches of S. If model M=(Ma,Mb,Mc) is defined on a set of
+branches S, a site in constrained to follow either Ma on all S, or Mb
+on all S, or Ms on all S. If we want that two branches of S are
+independant, two similar mixed models must be defined. Moreover, it is
+possible to define paths that define dependencies between submodels of
+different mixtures (see below).
+
@item MixedModel(model=@{model description@})
Mixture model from a given @var{model} in which some parameters follow
@@ -1001,6 +1296,9 @@ has parameters @var{TN93.kappa1_Gamma.alpha},
@var{TN93.theta}, @var{MixedModel.TN93.theta1},
@var{TN93.theta2}.
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1MixtureOfASubstitutionModel.html#_details, Bio++ description}.
+
@item Mixture(model1=@{model description@},..., modeln=@{model description@} [, relrate1=@{1>real>0@},..., relrate@{n-1@}=@{1>real>0@}, relproba1=@{1>real>0@}, ..., relproba@{n-1@}=@{1>real>0@}, "equilibrium frequencies"])
Mixture model built from several @var{models}: each model has its own
probability and rate.
@@ -1019,7 +1317,11 @@ has parameters at var{Mixture.relrate1}, @var{Mixture.relproba1},
@var{Mixture.2_YN98.kappa}, @var{Mixture.2_YN98.omega}.
@end table
+See the
+ at uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1MixtureOfSubstitutionModels.html#_details, Bio++ description}.
+
+ at node Linking, , Mixture, Declaration
@subsubsection Linking parameters
It is possible to reduce the parameter space by putting extra constraints on parameters, using for instance
@@ -1033,7 +1335,7 @@ In that particular case the resulting model is strictly equivalent to the HKY85
-
+ at node Non-homogeneity, FrequenciesSet, Declaration, Model
@subsection Setting up non-stationary / non-homogeneous models
You can specify a wide range of non-homogeneous models, by combining different options.
@@ -1047,10 +1349,8 @@ This option share the same parameters as the homogeneous case, since the same ki
List the names of the parameters that are shared by all branches.
In Galtier & Gouy model, that would be @var{T92.kappa}, since only the theta parameter is branch-specific.
-The '*' wildcard can be used as a suffix, as in
- at command{YN98.freq_Word.1_*} for all the parameters whose names start
-with @command{YN98.freq_Word.1_}.
-
+The '*' wildcard can be used, as in @command{*theta*} for all the
+parameters whose name has @command{theta} in it.
@end table
@@ -1114,11 +1414,6 @@ Finally, you may find useful the following options:
@table @command
- at item output.tree_ids.file = @{@{path@}|none@}
-A tree file in newick format, with node ids instead of bootstrap
-values, and leaf names with their id as suffix.
-The use of that option will cause the program to exit just after producing the tagged tree.
-
@item output.parameter_names.file = @{@{path@}|none@}
A text file listing all parameter names. This might come handy in
order to specify the parameter that should not be optimized (see
@@ -1237,6 +1532,7 @@ The Frequencies set used can be any of the ones described below
@xref{Frequencies sets}, depending on the alphabet used.
+ at node FrequenciesSet, Rates, Non-homogeneity, Model
@subsection Frequencies sets
@anchor{Frequencies sets}
@@ -1245,7 +1541,8 @@ The following frequencies distributions are available:
@table @command
@item Fixed()
-All frequencies are fixed to their initial value and are not estimated.
+All frequencies are fixed to their initial value and are not
+estimated.
@item GC(theta=@{real]0,1[@})
For nucleotides only, set the G content equal to the C content.
@@ -1281,7 +1578,7 @@ Word(frequency=GC())
@end example
builds a frequency set on 4 bases words, such that all sites
frequencies follow the same GC frequency set model. The parameter
-name is @var{Word.1234_GC.theta}.
+name is @var{1234_GC.theta}.
@example
alphabet=Word(letter=DNA,length=4)
@@ -1291,9 +1588,9 @@ Word(frequency1=GC(),frequency2=GC(),frequency3=Fixed(),\
builds a frequency set on 4 bases words, such first and second sites
follow independent GC frequency sets, third site follows a Fixed
frequency set, and fourth site follows a Full frequency set. Then the
-parameters names are @var{Word.1_GC.theta},
- at var{Word.2_GC.theta}, @var{Word.4_Full.theta_1},
- at var{Word.4_Full.theta_2}, @var{Word.4_Full.theta_3}.
+parameters names are @var{1_GC.theta},
+ at var{2_GC.theta}, @var{4_Full.theta_1},
+ at var{4_Full.theta_2}, @var{4_Full.theta_3}.
@item Codon(frequency=@{frequency set description@})
@@ -1321,7 +1618,7 @@ Codon(frequency=GC())
@end example
builds a frequency set on codons, such that all sites frequencies
follow the same GC frequency set model. The parameter name is
- at var{Codon.123_GC.theta}.
+ at var{123_GC.theta}.
@example
alphabet=Codon(letter=DNA, type=Standard)
@@ -1330,8 +1627,8 @@ Codon(frequency1=GC(),frequency2=GC(),frequency3=Fixed())
builds a frequency set on codons, such that first and second sites
follow independent GC frequency sets, third site follows a Fixed
-frequency set. Then the parameters names are @var{Codon.1_GC.theta},
- at var{Codon.2_GC.theta}.
+frequency set. Then the parameters names are @var{1_GC.theta},
+ at var{2_GC.theta}.
Predefined codon frequencies are available, with a syntax similar to
@@ -1347,7 +1644,7 @@ the parameter specification:
@item init=@{balanced,observed@}
Set all frequencies to the same value, or to their observed counts.
- at item pseudoCount=@{integer@}
+ at item observedPseudoCount=@{integer@}
If the frequencies are set from observed counts, a pseudoCount is
added to all the counts.
@@ -1358,8 +1655,7 @@ alphabetical order of states, and sum to one.
@end table
-
-
+ at node Rates, , FrequenciesSet, Model
@subsection Rate across site distribution
From version 0.4.0, BppSuite uses the keyval syntax for specifying the distributions of substitution rate across sites.
@@ -1376,7 +1672,7 @@ The following distributions are currently available:
@table @command
- at item Uniform
+ at item Constant
Uses a constant rate across sites.
@item Gamma(n=@{int>=2@}, alpha=@{float>0@})
@@ -1390,6 +1686,7 @@ with a probability @var{p}.
@end table
+ at node Distribution, Estimation, Model, Common
@section Discrete distributions
@anchor{Discrete distributions}
@@ -1421,12 +1718,14 @@ a discretized gaussian distribution, with @var{n} classes, a mean
a discretized exponential distribution, with @var{n} classes and
parameter @var{lambda}.
- at item Simple(values=@{vector<double>@}, probas=@{vector<double>@})
+ at item Simple(values=@{vector<double>@}, probas=@{vector<double>@} [, ranges=@{vector<parametername[min;max]>@}])
a discrete distribution with specific values (in @var{values}) and
their respective non-negative probabibilities (in @var{probas}). The
parameters are @var{V1}, @var{V2}, ..., @var{Vn} for all the values
and the relative probabibility parameters are @var{theta1},
- at var{theta2}, ..., @var{thetan-1}.
+ at var{theta2}, ..., @var{thetan-1}. Optional argument @{ranges@} sets
+the allowed ranges of values taken by the parameters; usage is like
+ at samp{ranges=(V1[0.2;0.9],V2[1.1;999])}.
@item TruncExponential(n=@{int>=2@}, lambda=@{float>0@}, tp=@{float>0@})
a discretized truncated exponential distribution, with @var{n}
@@ -1443,10 +1742,25 @@ a uniform distribution, with @var{n} classes in interval
@table @command
- at item Mixture(probas=@{vector<double>@}, distribution1=@{distribution description@}, ..., distributionn=@{distribution description@})
+ at item Invariant(dist=@{distribution description@}, p=@{float>0@})
+
+a Mixture of a given discrete distributution and a 0 Dirac. @var{p} is
+the probability of this 0 Dirac.
+
+For example :
+ at example
+Invariant(dist=Gaussian(n=4,2,0.5),p=0.1)
+ at end example
+builds a mixture of a gaussian distribution with 4 categories (and
+probability 0.9) and a 0 Dirac with probability 0.1. Overall, there
+are 5 categories. The parameters names are
+ at var{Invariant.Gaussian.mu}, @var{Invariant.Gaussian.sigma},
+ at var{Invariant.p}.
+
+ at item Mixture(probas=@{vector<double>@}, dist1=@{distribution description@}, ..., distn=@{distribution description@})
a Mixture of discrete distributions with specific probabilities (in
- at var{probas}) and their respective desccriptions. (in @var{probas}).
+ at var{probas}) and their respective desccriptions (in @var{probas}).
The parameters are the relative probabibility parameters @var{theta1},
@var{theta2}, ..., @var{thetan-1}, and the parameters of the included
distributions prefixed by @var{Mixture.i_} where @var{i} is the order
@@ -1454,8 +1768,8 @@ of the distribution.
For example:
@example
-Mixture(probas=(0.3,0.7),distribution1=Beta(n=5,alpha=2,beta=3),\
- distribution2=Gamma(n=10,alpha=9,beta=2))
+Mixture(probas=(0.3,0.7),dist1=Beta(n=5,alpha=2,beta=3),\
+ dist2=Gamma(n=10,alpha=9,beta=2))
@end example
builds a mixture of a discrete beta distribution and of a discrete
gamma distribution, with a total of 15 classes. The parameters names
@@ -1468,7 +1782,7 @@ are @var{Mixture.theta1}, @var{Mixture.1_Beta.alpha},
@c ------------------------------------------------------------------------------------------------------------------
- at node Estimation, WritingSequences, Model, Common
+ at node Estimation, WritingSequences, Distribution, Common
@section Numerical parameters estimation
Some programs allow you to (re-)estimate numerical parameters, including
@@ -1554,9 +1868,8 @@ nested models, the syntax is the following:
'Ancient' will ignore all parameters in the ancestral frequency set
(non-homogeneous models), and 'BrLen' will ignore all branch lengths.
-The '*' wildcard can be used as a suffix, as in
- at command{YN98.freq_Word.1_*} for all the parameters whose names start
-with @command{YN98.freq_Word.1_}.
+The '*' wildcard can be used, as in @command{*theta*} for all the
+parameters whose name has @command{theta} in it.
@item optimization.tolerance = @{float>0@}
The precision on the log-likelihood to reach.
@@ -1628,6 +1941,7 @@ This section now details the specific options for each program in the Bio++ Prog
* bppml:: Bio++ Maximum Likelihood.
* bppseqgen:: Bio++ Sequence Generator.
* bppancestor:: Bio++ Ancestral Sequences and Rates reconstruction.
+* bppmixedlikelihoods:: Bio++ Site-Likelihoods Inside Mixed Models.
* bppdist:: Bio++ Distance Methods.
* bpppars:: Bio++ Maximum Parsimony.
* bppconsense:: Bio++ Consensus Trees.
@@ -1680,6 +1994,10 @@ The tree is finally scaled to match a given total height, which can be the origi
A value of rho=0 provides a star tree, and the greater the value of rho, the more recent the inner nodes.
@end table
+
+ at item input.tree.check_root = @{boolean@}
+Tell if the input tree should be checked regarding to the presence of a root. If set to yes (the default), rooted trees will be unrooted if a homogenous model is used.
+If not, a rooted tree will be fitted, which can lead to optimization issues in most cases. Use the non default option with care!
@end table
@subsection Topology optimization
@@ -1810,7 +2128,7 @@ The estimated site-specific rates will then be used to simulate the same number
@c ------------------------------------------------------------------------------------------------------------------
- at node bppancestor, bppdist, bppseqgen, Reference
+ at node bppancestor, bppmixedlikelihoods, bppseqgen, Reference
@section BppAncestor: Bio++ Ancestral Sequence and Rate Reconstruction
The BppAncestor program uses the common syntax introduced in the previous section for setting the alphabet, loading the sequences (@pxref{Sequences}) and tree (@pxref{Tree}), specifying the model (@pxref{Model}) and writing sequence data (@pxref{WritingSequences}).
@@ -1818,6 +2136,10 @@ The BppAncestor program uses the common syntax introduced in the previous sectio
Specific options are:
@table @command
+ at item input.tree.check_root = @{boolean@}
+Tell if the input tree should be checked regarding to the presence of a root. If set to yes (the default), rooted trees will be unrooted if a homogenous model is used.
+If not, a rooted tree will be fitted, which can lead to optimization issues in most cases. Use the non default option with care!
+
@item asr.method = @{none|marginal@}
Marginal is the only option for now. If set to "none", only nodes frequencies can be output.
@@ -1848,8 +2170,40 @@ Tell if leaf nodes should be added to the output file.
@end table
@c ------------------------------------------------------------------------------------------------------------------
+ at node bppmixedlikelihoods, bppdist, bppancestor, Reference
+ at section BppMixedLikelihoods: Bio++ Site-Likelihoods Inside Mixed Models.
+
+The BppMixedLikelihoods program uses the common syntax introduced in the previous section for setting the alphabet, loading the sequences (@pxref{Sequences}) and tree (@pxref{Tree}) and specifying the model (@pxref{Model}).
+
+Given a mixed parameter name of mixed model, or a mixed model made of
+several models, the BppMixedLikelihoods program computes site per site
+log-likelihoods of the several values of the parameter, or of the
+several sub-models of the mixture. If the mixed model is built on a
+parameter which value follows a distribution, and in an additional
+column -- named "mean" -- the a posteriori mean value of the paramater
+is computed.
+
+Specific options are:
+ at table @command
+
+ at item output.likelihoods.file = @{@{path@}@}
+Ouput file of the program (site specific log-likelihood, and mean of
+the mixed parameters, if any).
- at node bppdist, bpppars, bppancestor, Reference
+ at item likelihoods.model_number = @{integer@}
+In case of a non-homogeneous modeling, the number of the mixed model
+which parameter or sub-models are considered.
+
+ at item likelihoods.parameter_name = @{string@}
+If the considered mixed model is built from a distribution on a
+parameter, the name of the parameter to be considered. In this case,
+an additional column is written, in which the average a posteriori
+value of the parameter is.
+
+ at end table
+
+ at c ------------------------------------------------------------------------------------------------------------------
+ at node bppdist, bpppars, bppmixedlikelihoods, Reference
@section BppDist: Bio++ Distance Methods
The BppDist program uses the common syntax introduced in the previous section for setting the alphabet, loading the sequences (@pxref{Sequences}) and tree (@pxref{Tree}) and specifying the model (@pxref{Model}, only the section corresponding to the homogeneous case).
@@ -2078,7 +2432,7 @@ This method can therefore be used for subsetting a list of sequences, and/or rea
Examples of use:
- at table @bullet
+ at itemize @bullet
@item Just change file format:
@example
@@ -2105,7 +2459,7 @@ sequence.manip=UnknownToGap,KeepComplete(maxGapAllowed=5)
sequence.manip=KeepComplete(maxGapAllowed=30%),GapToUnknown
@end example
- at end table
+ at end itemize
@c ------------------------------------------------------------------------------------------------------------------
diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt
index 7bf1e2f..9c009cb 100644
--- a/man/CMakeLists.txt
+++ b/man/CMakeLists.txt
@@ -3,15 +3,16 @@
# Created: 22/08/2009
IF(MAN)
- INSTALL(FILES bppml.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppseqgen.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppancestor.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bpppars.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppdist.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppconsense.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppseqman.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppreroot.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppphysamp.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bpptreedraw.1.gz DESTINATION share/man/man1)
- INSTALL(FILES bppalnscore.1.gz DESTINATION share/man/man1)
+ INSTALL(FILES bppml.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppseqgen.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppancestor.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bpppars.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppdist.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppconsense.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppseqman.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppreroot.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppphysamp.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bpptreedraw.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppalnscore.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
+ INSTALL(FILES bppmixedlikelihoods.1.${DOC_COMPRESS_EXT} DESTINATION share/man/man1)
ENDIF(MAN)
diff --git a/man/bppmixedlikelihoods.1.txt b/man/bppmixedlikelihoods.1.txt
new file mode 100644
index 0000000..16d0e98
--- /dev/null
+++ b/man/bppmixedlikelihoods.1.txt
@@ -0,0 +1,42 @@
+.TH BPPANCESTOR 1 LOCAL
+
+.SH NAME
+
+bppmixedlikelihoods - Computation of site per site likelihoods of
+components of mixture models.
+
+.SH SYNOPSIS
+
+.B bppmixedlikelihood [options]
+
+.SH AVAILABILITY
+
+All UNIX flavors
+
+.SH DESCRIPTION
+
+On the basis of a mixed model, bppmixedlikelihood computes the
+likelihood of each site for each submodel of the mixture. This is done
+with homogeneous and non-homogeneous modelings. If the mixture is
+based on several values of a parameter, the site per site a posteriori
+probabilities and average value of this parameter are computed.
+
+.SH OPTIONS
+
+You should refer to 'info bppsuite' or to the online manual of bppsuite for a complete list of available options.
+
+.TP 5
+
+--noninteractive
+
+generates output for redirection in a file.
+
+.TP
+
+param=file
+
+reads a file for loading options
+
+.SH AUTHOR
+
+The Bio++ Development Team.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bppsuite.git
More information about the debian-med-commit
mailing list