[med-svn] [bppsuite] 07/10: Import Upstream version 2.2.0
Andreas Tille
tille at debian.org
Wed Jun 14 11:36:59 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository bppsuite.
commit 1268477d4cd658e5d0e43c09f6e6bb802bb285e7
Author: Andreas Tille <tille at debian.org>
Date: Wed Jun 14 13:23:25 2017 +0200
Import Upstream version 2.2.0
---
CMakeLists.txt | 6 +-
ChangeLog | 15 +
.../MaximumLikelihood/Codons/BranchModel/ML.bpp | 4 +-
.../Codons/{BranchModel => CladeModel}/ML.bpp | 17 +-
Examples/MaximumLikelihood/Codons/M0/ML.bpp | 3 +-
.../MaximumLikelihood/Proteins/Homogeneous/ML.bpp | 2 +-
Examples/PhylogeneticSampling/PhySamp.bpp | 6 +
.../WithSiteSpecificSettings/SeqGen.bpp | 45 +++
.../WithSiteSpecificSettings/infos.csv | 31 ++
bppSuite/bppAlnScore.cpp | 6 +-
bppSuite/bppAncestor.cpp | 24 +-
bppSuite/bppConsense.cpp | 14 +-
bppSuite/bppDist.cpp | 49 ++-
bppSuite/bppML.cpp | 96 ++---
bppSuite/bppMixedLikelihoods.cpp | 100 +++--
bppSuite/bppPars.cpp | 4 +-
bppSuite/bppPhyloSampler.cpp | 31 +-
bppSuite/bppReRoot.cpp | 16 +-
bppSuite/bppSeqGen.cpp | 275 ++++++++++++--
bppSuite/bppSeqMan.cpp | 82 +++--
bppSuite/bppTreeDraw.cpp | 8 +-
bppsuite.spec | 26 +-
buildBin.sh | 18 +
buildExample.sh | 4 +
debian/changelog | 10 +
debian/control | 6 +-
debian/copyright | 8 +-
doc/bppsuite.texi | 406 +++++++++++++--------
28 files changed, 933 insertions(+), 379 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95081cd..91a5b51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,9 +203,9 @@ ENDIF(NO_DEP_CHECK)
# Packager
SET(CPACK_PACKAGE_NAME "bppsuite")
SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "0.8.0")
-SET(CPACK_PACKAGE_VERSION_MAJOR "0")
-SET(CPACK_PACKAGE_VERSION_MINOR "8")
+SET(CPACK_PACKAGE_VERSION "2.2.0")
+SET(CPACK_PACKAGE_VERSION_MAJOR "2")
+SET(CPACK_PACKAGE_VERSION_MINOR "2")
SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Program Suite")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
diff --git a/ChangeLog b/ChangeLog
index 3127dc8..c222707 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+28/09/14 -*- Version 2.2.0 -*-
+
+16/09/14 Julien Dutheil
+* bppSeqGen supports generic characters as ancestral states in info file (bug #87).
+
+12/08/14 Julien Dutheil
+* bppSeqGen supports simulation under Markov-Modulated models.
+
+06/06/14 Julien Dutheil
+* bppPhySamp now ouput trees as well.
+
+17/04/14 Julien Dutheil
+* bppSeqMan: Translate uses global genetic_code argument instead of local 'code' argument.
+* All programs use the --seed argument to set random seed for reproduceable results.
+
08/03/13 -*- Version 0.8.0 -*-
22/01/13 Julien Dutheil
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
index 24eb17f..7e06e3f 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
@@ -9,7 +9,8 @@ DATA = lysozymeLarge
# The alphabet to use:
# DNA, RNA or Protein...
-alphabet=Codon(letter=DNA, type=Standard)
+alphabet=Codon(letter=DNA)
+genetic_code=Standard
# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta
@@ -22,6 +23,7 @@ input.sequence.format=Fasta
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
+input.sequence.max_unresolved_allowed = 100%
input.sequence.remove_stop_codons = yes
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
similarity index 93%
copy from Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
copy to Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
index 24eb17f..9e8b778 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
@@ -9,7 +9,8 @@ DATA = lysozymeLarge
# The alphabet to use:
# DNA, RNA or Protein...
-alphabet=Codon(letter=DNA, type=Standard)
+alphabet=Codon(letter=DNA)
+genetic_code=Standard
# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta
@@ -22,6 +23,7 @@ input.sequence.format=Fasta
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
+input.sequence.max_unresolved_allowed = 100%
input.sequence.remove_stop_codons = yes
@@ -42,13 +44,20 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = YN98(kappa=1, omega=1, frequencies=F1X4)
-nonhomogeneous=one_per_branch
+nonhomogeneous=general
+nonhomogeneous.number_of_models=2
+
+model1 = YN98(kappa=1, omega=1, frequencies=F1X4)
+model1.nodes_id = 0:20
+
+model2 = YN98(kappa=YN98.kappa_1, omega=1, frequencies=F1X4)
+model2.nodes_id = 21:32
+
#These lines are for the F1X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
#These lines are for the F3X4 option:
-nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
+#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
nonhomogeneous.stationarity=yes
#Only if stationarity is set to false:
nonhomogeneous.root_freq=
diff --git a/Examples/MaximumLikelihood/Codons/M0/ML.bpp b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
index e6130ce..31861c9 100644
--- a/Examples/MaximumLikelihood/Codons/M0/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
@@ -10,7 +10,8 @@ DATA = lysozymeLarge
# The alphabet to use:
# DNA, RNA or Protein...
-alphabet=Codon(letter=DNA, type=Standard)
+alphabet=Codon(letter=DNA)
+genetic_code=Standard
# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta
diff --git a/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp b/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
index 369b74a..7faa66c 100644
--- a/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
@@ -39,7 +39,7 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = JTT92+F(initFreqs=observed, initFreqs.observedPseudoCount=0.)
+model = JTT92 //JTT92+F(initFreqs=observed, initFreqs.observedPseudoCount=0.)
rate_distribution = Gamma(n=4, alpha=0.5)
diff --git a/Examples/PhylogeneticSampling/PhySamp.bpp b/Examples/PhylogeneticSampling/PhySamp.bpp
index 21d7b62..7bca1a8 100755
--- a/Examples/PhylogeneticSampling/PhySamp.bpp
+++ b/Examples/PhylogeneticSampling/PhySamp.bpp
@@ -46,3 +46,9 @@ output.sequence.file=$(DATA).$(TYPE).fasta
# Ouput format:
output.sequence.format=Fasta()
+# Output tree file
+output.tree.file=$(DATA).$(TYPE).dnd
+
+# Ouput format:
+output.tree.format=Newick
+
diff --git a/Examples/SequenceSimulation/WithSiteSpecificSettings/SeqGen.bpp b/Examples/SequenceSimulation/WithSiteSpecificSettings/SeqGen.bpp
new file mode 100644
index 0000000..c8ac243
--- /dev/null
+++ b/Examples/SequenceSimulation/WithSiteSpecificSettings/SeqGen.bpp
@@ -0,0 +1,45 @@
+# The alphabet to use:
+# DNA, RNA or Protein
+alphabet = DNA
+
+# Input tree to use:
+input.tree.file = ../../Data/LSUrooted.dnd
+input.tree.format=Newick
+
+# Print a tree with ids as bootstrap values.
+# This is helpful when setting up complexe non-homogeneous models.
+# Setting this option will cause the program to exit after printing the tree.
+//output.tree.path = LSUrooted_wid.dnd
+
+#Info file specifying rate and/or ancestral state for each site:
+input.infos = infos.csv
+input.infos.rates = Rates //or 'none' to ignore rates
+input.infos.states = States //or 'none' to ignore states
+
+# The output file:
+output.sequence.file = LSUSim.fasta
+# The alignment format:
+# Must be one of Mase, Fasta, Phylip
+output.sequence.format = Fasta()
+
+# ----------------------------------------------------------------------------------------
+# Model specification
+# ----------------------------------------------------------------------------------------
+
+# Homogeneous model?
+# no => Homogeneous case
+# general => Specify the model by hand.
+nonhomogeneous = no
+
+# Options for homogeneous and one-per_branch models:
+
+# Available models.
+# For proteins, the DCmutt method is used for JTT92 and DSO78.
+# You can use the 'empirical' option to specify another model.
+# JCnuc, K80, T92, HKY85, F84, TN93, JCprot, DSO78, JTT92 or empirical
+# Append +G2001 or +TS98 to the model name to add a covarion model.
+model = HKY85(kappa=2.843, theta=0.7, theta1=0.4, theta2=0.6)
+
+# Rate Across Sites variation
+rate_distribution = Gamma(n=4, alpha=0.358)
+
diff --git a/Examples/SequenceSimulation/WithSiteSpecificSettings/infos.csv b/Examples/SequenceSimulation/WithSiteSpecificSettings/infos.csv
new file mode 100644
index 0000000..61b4a23
--- /dev/null
+++ b/Examples/SequenceSimulation/WithSiteSpecificSettings/infos.csv
@@ -0,0 +1,31 @@
+Rates States
+1 A
+1 A
+1 A
+1 A
+1 A
+0.1 A
+0.1 A
+0.1 A
+0.1 A
+0.1 A
+0.01 A
+0.01 A
+0.01 A
+0.01 A
+0.01 A
+0.001 A
+0.001 A
+0.001 A
+0.001 A
+0.001 A
+0.0001 A
+0.0001 A
+0.0001 A
+0.0001 A
+0.0001 A
+0. A
+0. A
+0. A
+0. A
+0. A
diff --git a/bppSuite/bppAlnScore.cpp b/bppSuite/bppAlnScore.cpp
index 4690198..956fcab 100644
--- a/bppSuite/bppAlnScore.cpp
+++ b/bppSuite/bppAlnScore.cpp
@@ -71,8 +71,8 @@ void help()
int main(int args, char** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Alignment Score, version 0.1 *" << endl;
- cout << "* Author: J. Dutheil Last Modif. 15/12/11 *" << endl;
+ cout << "* Bio++ Alignment Score, version 2.2.0 *" << endl;
+ cout << "* Author: J. Dutheil Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -139,7 +139,7 @@ int main(int args, char** argv)
string phaseOpt = ApplicationTools::getStringParameter("score.phase", bppalnscore.getParams(), "1");
if (TextTools::isDecimalInteger(phaseOpt))
{
- phase = TextTools::toInt(phaseOpt);
+ phase = TextTools::to<size_t>(phaseOpt);
if (phase == 0)
throw Exception("ERROR: positions are 1-based.");
phase--;
diff --git a/bppSuite/bppAncestor.cpp b/bppSuite/bppAncestor.cpp
index 40a2bee..2c8019c 100644
--- a/bppSuite/bppAncestor.cpp
+++ b/bppSuite/bppAncestor.cpp
@@ -92,9 +92,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Ancestral Sequence Reconstruction, version 0.5.0 *" << endl;
+ cout << "* Bio++ Ancestral Sequence Reconstruction, version 2.2.0 *" << endl;
cout << "* Authors: J. Dutheil Created on: 10/09/08 *" << endl;
- cout << "* B. Boussau Last Modif: 17/06/11 *" << endl;
+ cout << "* B. Boussau Last Modif: 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -110,6 +110,14 @@ int main(int args, char ** argv)
bppancestor.startTimer();
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppancestor.getParams(), "", false);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
+ if (codonAlphabet) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppancestor.getParams(), "Standard", "", true, true);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppancestor.getParams());
@@ -157,7 +165,7 @@ int main(int args, char ** argv)
if (nhOpt == "no")
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppancestor.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppancestor.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() > model->getAlphabet()->getSize())
{
@@ -177,7 +185,7 @@ int main(int args, char ** argv)
}
else if (nhOpt == "one_per_branch")
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppancestor.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppancestor.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() > model->getAlphabet()->getSize())
{
@@ -196,7 +204,7 @@ int main(int args, char ** argv)
rateFreqs = vector<double>(n, 1./(double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
// we should assume a rate distribution for the root also!!!
}
- FrequenciesSet * rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppancestor.getParams(), rateFreqs);
+ FrequenciesSet * rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), sites, bppancestor.getParams(), rateFreqs);
vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppancestor.getParams(), ',', "");
modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters);
model = 0;
@@ -207,7 +215,7 @@ int main(int args, char ** argv)
}
else if (nhOpt == "general")
{
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppancestor.getParams());
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), sites, bppancestor.getParams());
if (modelSet->getModel(0)->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize())
{
@@ -451,9 +459,9 @@ int main(int args, char ** argv)
vector<string> colNames;
colNames.push_back("Nodes");
for (unsigned int i = 0; i < tl->getNumberOfStates(); i++)
- colNames.push_back("exp" + alphabet->intToChar(i));
+ colNames.push_back("exp" + tl->getAlphabetStateAsChar(i));
for (unsigned int i = 0; i < tl->getNumberOfStates(); i++)
- colNames.push_back("eb" + alphabet->intToChar(i));
+ colNames.push_back("eb" + tl->getAlphabetStateAsChar(i));
//Now fill the table:
vector<string> row(colNames.size());
diff --git a/bppSuite/bppConsense.cpp b/bppSuite/bppConsense.cpp
index 1b74a88..7a1c3c4 100644
--- a/bppSuite/bppConsense.cpp
+++ b/bppSuite/bppConsense.cpp
@@ -70,9 +70,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Consensus and Bootstrap Methods, version 0.3.0 *" << endl;
+ cout << "* Bio++ Consensus and Bootstrap Methods, version 2.2.0 *" << endl;
cout << "* Authors: J. Dutheil Created 06/06/07 *" << endl;
- cout << "* N. Galtier Last Modif. 08/08/09 *" << endl;
+ cout << "* N. Galtier Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -90,7 +90,7 @@ int main(int args, char ** argv)
vector<Tree*> list = PhylogeneticsApplicationTools::getTrees(bppconsense.getParams());
Tree* tree = 0;
- string treeMethod = ApplicationTools::getStringParameter("tree", bppconsense.getParams(), "consensus");
+ string treeMethod = ApplicationTools::getStringParameter("tree", bppconsense.getParams(), "Consensus", "", false, 1);
string cmdName;
map<string, string> cmdArgs;
KeyvalTools::parseProcedure(treeMethod, cmdName, cmdArgs);
@@ -101,7 +101,7 @@ int main(int args, char ** argv)
}
else if(cmdName == "Consensus")
{
- double threshold = ApplicationTools::getDoubleParameter("threshold", cmdArgs, 0);
+ double threshold = ApplicationTools::getDoubleParameter("threshold", cmdArgs, 0, "", false, 1);
ApplicationTools::displayResult("Consensus threshold", TextTools::toString(threshold));
ApplicationTools::displayTask("Computing consensus tree");
tree = TreeTools::thresholdConsensus(list, threshold, true);
@@ -110,12 +110,14 @@ int main(int args, char ** argv)
else throw Exception("Unknown input tree method: " + treeMethod);
ApplicationTools::displayTask("Compute bootstrap values");
- TreeTools::computeBootstrapValues(*tree, list);
+
+ int bsformat = ApplicationTools::getIntParameter("bootstrap.format", bppconsense.getParams(), 0, "", false, 1);
+ TreeTools::computeBootstrapValues(*tree, list, true, bsformat);
ApplicationTools::displayTaskDone();
//Write resulting tree:
PhylogeneticsApplicationTools::writeTree(*tree, bppconsense.getParams());
- for (unsigned int i = 0; i < list.size(); i++)
+ for (size_t i = 0; i < list.size(); i++)
delete list[i];
delete tree;
diff --git a/bppSuite/bppDist.cpp b/bppSuite/bppDist.cpp
index a88ac87..626833f 100644
--- a/bppSuite/bppDist.cpp
+++ b/bppSuite/bppDist.cpp
@@ -58,6 +58,8 @@ using namespace std;
#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
+#include <Bpp/Text/KeyvalTools.h>
+
// From PhylLib:
#include <Bpp/Phyl/Tree.h>
@@ -84,9 +86,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Distance Methods, version 0.3.0 *" << endl;
+ cout << "* Bio++ Distance Methods, version 2.2.0 *" << endl;
cout << "* Author: J. Dutheil Created 05/05/07 *" << endl;
- cout << "* Last Modif. 08/08/09 *" << endl;
+ cout << "* Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -102,6 +104,14 @@ int main(int args, char ** argv)
bppdist.startTimer();
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
+ if (codonAlphabet) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams());
@@ -111,7 +121,7 @@ int main(int args, char ** argv)
ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
- SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppdist.getParams());
+ SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams());
DiscreteDistribution* rDist = 0;
if (model->getNumberOfStates() > model->getAlphabet()->getSize())
@@ -226,11 +236,32 @@ int main(int args, char ** argv)
if (matrixPath != "none")
{
ApplicationTools::displayResult("Output matrix file", matrixPath);
- ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT);
+ string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false);
+ string format = "";
+ bool extended = false;
+ std::map<std::string, std::string> unparsedArguments_;
+ KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_);
+ if (unparsedArguments_.find("type") != unparsedArguments_.end())
+ {
+ if (unparsedArguments_["type"] == "extended")
+ {
+ extended = true;
+ }
+ else if (unparsedArguments_["type"] == "classic")
+ extended = false;
+ else
+ ApplicationTools::displayWarning("Argument '" +
+ unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
+ "Default used instead: not extended.");
+ }
+ else
+ ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended.");
+
+
+ ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended);
odm->write(*distEstimation.getMatrix(), matrixPath, true);
delete odm;
- }
-
+ }
PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
//Output some parameters:
@@ -331,9 +362,9 @@ int main(int args, char ** argv)
delete distMethod;
delete tree;
- bppdist.done();
-
- }
+ bppdist.done();}
+
+
catch(exception & e)
{
cout << e.what() << endl;
diff --git a/bppSuite/bppML.cpp b/bppSuite/bppML.cpp
index a2bc90d..8c6413d 100644
--- a/bppSuite/bppML.cpp
+++ b/bppSuite/bppML.cpp
@@ -57,14 +57,14 @@ using namespace std;
#include <Bpp/Text/TextTools.h>
#include <Bpp/Text/KeyvalTools.h>
-// From SeqLib:
+// From bpp-seq:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
#include <Bpp/Seq/Container/SiteContainerTools.h>
#include <Bpp/Seq/SiteTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
-// From PhylLib:
+// From bpp-phyl:
#include <Bpp/Phyl/Tree.h>
#include <Bpp/Phyl/Likelihood.all>
#include <Bpp/Phyl/PatternTools.h>
@@ -114,6 +114,14 @@ int main(int args, char** argv)
bppml.startTimer();
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppml.getParams(), "", false);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
+ if (codonAlphabet) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppml.getParams(), "Standard", "", true, true);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppml.getParams());
@@ -125,7 +133,7 @@ int main(int args, char** argv)
// Get the initial tree
Tree* tree = 0;
- string initTreeOpt = ApplicationTools::getStringParameter("init.tree", bppml.getParams(), "user", "", false, false);
+ string initTreeOpt = ApplicationTools::getStringParameter("init.tree", bppml.getParams(), "user", "", false, 1);
ApplicationTools::displayResult("Input tree", initTreeOpt);
if (initTreeOpt == "user")
{
@@ -144,7 +152,7 @@ int main(int args, char** argv)
// but allow to check file existence before running optimization!
PhylogeneticsApplicationTools::writeTree(*tree, bppml.getParams());
- bool computeLikelihood = ApplicationTools::getBooleanParameter("compute.likelihood", bppml.getParams(), true, "", false, false);
+ bool computeLikelihood = ApplicationTools::getBooleanParameter("compute.likelihood", bppml.getParams(), true, "", false, 1);
if (!computeLikelihood)
{
delete alphabet;
@@ -155,20 +163,20 @@ int main(int args, char** argv)
}
// Setting branch lengths?
- string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", bppml.getParams(), "Input", "", true, false);
+ string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", bppml.getParams(), "Input", "", true, 1);
string cmdName;
map<string, string> cmdArgs;
KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
if (cmdName == "Input")
{
// Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
- string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, false);
- if(midPointRootBrLengths == "yes")
+ bool midPointRootBrLengths = ApplicationTools::getBooleanParameter("midpoint_root_branch", cmdArgs, false, "", true, 2);
+ if(midPointRootBrLengths)
TreeTools::constrainedMidPointRooting(*tree);
}
else if (cmdName == "Equal")
{
- double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, false);
+ double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, 2);
if (value <= 0)
throw Exception("Value for branch length must be superior to 0");
ApplicationTools::displayResult("Branch lengths set to", value);
@@ -180,7 +188,7 @@ int main(int args, char** argv)
}
else if (cmdName == "Grafen")
{
- string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, false);
+ string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, 2);
double h;
if (grafenHeight == "input")
{
@@ -193,7 +201,7 @@ int main(int args, char** argv)
}
ApplicationTools::displayResult("Total height", TextTools::toString(h));
- double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, false);
+ double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
ApplicationTools::displayResult("Grafen's rho", rho);
TreeTools::computeBranchLengthsGrafen(*tree, rho);
double nh = TreeTools::getHeight(*tree, tree->getRootId());
@@ -202,7 +210,7 @@ int main(int args, char** argv)
else throw Exception("Method '" + initBrLenMethod + "' unknown for computing branch lengths.");
ApplicationTools::displayResult("Branch lengths", cmdName);
- string treeWIdPath = ApplicationTools::getAFilePath("output.tree_ids.file", bppml.getParams(), false, false);
+ string treeWIdPath = ApplicationTools::getAFilePath("output.tree_ids.file", bppml.getParams(), false, false, "", true, "none", 1);
if (treeWIdPath != "none")
{
TreeTemplate<Node> ttree(*tree);
@@ -224,12 +232,12 @@ int main(int args, char** argv)
}
DiscreteRatesAcrossSitesTreeLikelihood* tl;
- string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppml.getParams(), "no", "", true, false);
+ string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppml.getParams(), "no", "", true, 1);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
- bool checkTree = ApplicationTools::getBooleanParameter("input.tree.check_root", bppml.getParams(), true, "", true, false);
- bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", bppml.getParams(), false, "", true, false);
- unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppml.getParams(), 0, "", true, false);
+ bool checkTree = ApplicationTools::getBooleanParameter("input.tree.check_root", bppml.getParams(), true, "", true, 2);
+ bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", bppml.getParams(), false, "", true, 1);
+ unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppml.getParams(), 0, "", true, 1);
SubstitutionModel* model = 0;
SubstitutionModelSet* modelSet = 0;
@@ -239,7 +247,7 @@ int main(int args, char** argv)
{
if (nhOpt != "no")
throw Exception("Topology estimation with NH model not supported yet, sorry :(");
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppml.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
@@ -257,7 +265,7 @@ int main(int args, char** argv)
}
else if (nhOpt == "no")
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppml.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
@@ -268,11 +276,11 @@ int main(int args, char** argv)
{
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
}
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, 1);
ApplicationTools::displayResult("Likelihood recursion", recursion);
if (recursion == "simple")
{
- string compression = ApplicationTools::getStringParameter("likelihood.recursion_simple.compression", bppml.getParams(), "recursive", "", true, false);
+ string compression = ApplicationTools::getStringParameter("likelihood.recursion_simple.compression", bppml.getParams(), "recursive", "", true, 2);
ApplicationTools::displayResult("Likelihood data compression", compression);
if (compression == "simple")
if (dynamic_cast<MixedSubstitutionModel*>(model))
@@ -299,7 +307,7 @@ int main(int args, char** argv)
}
else if (nhOpt == "one_per_branch")
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppml.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
@@ -319,13 +327,13 @@ int main(int args, char** argv)
// we should assume a rate distribution for the root also!!!
}
- bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", bppml.getParams(), false, "", false, false);
+ bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", bppml.getParams(), false, "", false, 1);
FrequenciesSet* rootFreqs = 0;
if (!stationarity)
{
- rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppml.getParams(), rateFreqs);
+ rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), sites, bppml.getParams(), rateFreqs);
stationarity = !rootFreqs;
- string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", bppml.getParams(), "");
+ string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", bppml.getParams(), "", "", true, 1);
if (freqDescription == "MVAprotein")
{
if (dynamic_cast<CoalaCore*>(model))
@@ -345,7 +353,7 @@ int main(int args, char** argv)
modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters);
model = 0;
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, 1);
ApplicationTools::displayResult("Likelihood recursion", recursion);
if (recursion == "simple")
{
@@ -366,7 +374,7 @@ int main(int args, char** argv)
}
else if (nhOpt == "general")
{
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppml.getParams());
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), sites, bppml.getParams());
if (modelSet->getModel(0)->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (modelSet->getNumberOfStates() >= 2 * modelSet->getAlphabet()->getSize())
{
@@ -378,7 +386,7 @@ int main(int args, char** argv)
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
}
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, 1);
ApplicationTools::displayResult("Likelihood recursion", recursion);
if (recursion == "simple")
{
@@ -402,7 +410,7 @@ int main(int args, char** argv)
delete tree;
//Listing parameters
- string paramNameFile = ApplicationTools::getAFilePath("output.parameter_names.file", bppml.getParams(), false, false);
+ string paramNameFile = ApplicationTools::getAFilePath("output.parameter_names.file", bppml.getParams(), false, false, "", true, "none", 1);
if (paramNameFile != "none") {
ApplicationTools::displayResult("List parameters to", paramNameFile);
ofstream pnfile(paramNameFile.c_str(), ios::out);
@@ -435,16 +443,16 @@ int main(int args, char** argv)
if (isinf(logL))
{
ApplicationTools::displayError("!!! Unexpected initial likelihood == 0.");
- CodonAlphabet *pca = dynamic_cast<CodonAlphabet*>(alphabet);
- if (pca) {
+ if (codonAlphabet)
+ {
bool f = false;
size_t s;
for (size_t i = 0; i < sites->getNumberOfSites(); i++) {
if (isinf(tl->getLogLikelihoodForASite(i))) {
- const Site& site=sites->getSite(i);
+ const Site& site = sites->getSite(i);
s = site.size();
for (size_t j = 0; j < s; j++) {
- if (pca->isStop(site.getValue(j))) {
+ if (gCode->isStop(site.getValue(j))) {
(*ApplicationTools::error << "Stop Codon at site " << site.getPosition() << " in sequence " << sites->getSequence(j).getName()).endLine();
f = true;
}
@@ -454,12 +462,15 @@ int main(int args, char** argv)
if (f)
exit(-1);
}
- bool removeSaturated = ApplicationTools::getBooleanParameter("input.sequence.remove_saturated_sites", bppml.getParams(), false, "", true, false);
+ bool removeSaturated = ApplicationTools::getBooleanParameter("input.sequence.remove_saturated_sites", bppml.getParams(), false, "", true, 1);
if (!removeSaturated) {
- ApplicationTools::displayError("!!! Looking at each site:");
- for (unsigned int i = 0; i < sites->getNumberOfSites(); i++) {
- (*ApplicationTools::error << "Site " << sites->getSite(i).getPosition() << "\tlog likelihood = " << tl->getLogLikelihoodForASite(i)).endLine();
+ ofstream debug ("DEBUG_likelihoods.txt", ios::out);
+ for (size_t i = 0; i < sites->getNumberOfSites(); i++)
+ {
+ debug << "Position " << sites->getSite(i).getPosition() << " = " << tl->getLogLikelihoodForASite(i) << endl;
}
+ debug.close();
+ ApplicationTools::displayError("!!! Site-specific likelihood have been written in file DEBUG_likelihoods.txt .");
ApplicationTools::displayError("!!! 0 values (inf in log) may be due to computer overflow, particularily if datasets are big (>~500 sequences).");
ApplicationTools::displayError("!!! You may want to try input.sequence.remove_saturated_sites = yes to ignore positions with likelihood 0.");
exit(1);
@@ -476,9 +487,8 @@ int main(int args, char** argv)
tl->initialize();
logL = tl->getValue();
if (isinf(logL)) {
- ApplicationTools::displayError("This should not happen. Exiting now.");
- exit(1);
- }
+ throw Exception("Likelihood is still 0 after saturated sites are removed! Looks like a bug...");
+ }
ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-logL, 15));
}
}
@@ -506,7 +516,7 @@ int main(int args, char** argv)
PhylogeneticsApplicationTools::checkEstimatedParameters(tl->getParameters());
// Write parameters to file:
- string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppml.getParams(), false, false);
+ string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppml.getParams(), false, false, "none", 1);
ApplicationTools::displayResult("Output estimates to file", parametersFile);
if (parametersFile != "none")
{
@@ -593,7 +603,7 @@ int main(int args, char** argv)
// Bootstrap:
- string optimizeClock = ApplicationTools::getStringParameter("optimization.clock", bppml.getParams(), "None", "", true, false);
+ string optimizeClock = ApplicationTools::getStringParameter("optimization.clock", bppml.getParams(), "None", "", true, 1);
if (nbBS > 0 && optimizeClock != "None")
{
ApplicationTools::displayError("Bootstrap is not supported with clock trees.");
@@ -601,9 +611,9 @@ int main(int args, char** argv)
if (nbBS > 0 && optimizeClock == "None")
{
ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
- bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppml.getParams(), true);
- ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no"));
- bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppml.getParams(), false, "", true, false);
+ bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppml.getParams(), true, "", true, 2);
+ ApplicationTools::displayBooleanResult("Use approximate bootstrap", approx);
+ bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppml.getParams(), false, "", true, 2);
const Tree* initTree = tree;
if (!bootstrapVerbose) bppml.getParam("optimization.verbose") = "0";
diff --git a/bppSuite/bppMixedLikelihoods.cpp b/bppSuite/bppMixedLikelihoods.cpp
index ff73b6f..572d249 100644
--- a/bppSuite/bppMixedLikelihoods.cpp
+++ b/bppSuite/bppMixedLikelihoods.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS
+ Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -91,7 +91,8 @@ int main(int args, char** argv)
{
cout << "******************************************************************" << endl;
cout << "* Bio++ Computation of site likelihoods inside mixed models *" << endl;
- cout << "* Author: L. Guéguen Created on: 12/11/12 *" << endl;
+ cout << "* Version 2.2.0. *" << endl;
+ cout << "* Author: L. Guéguen Last Modif.: 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -107,6 +108,14 @@ int main(int args, char** argv)
bppmixedlikelihoods.startTimer();
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppmixedlikelihoods.getParams(), "", false);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
+ if (codonAlphabet) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppmixedlikelihoods.getParams(), "Standard", "", true, true);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
// get the data
@@ -127,13 +136,13 @@ int main(int args, char** argv)
string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppmixedlikelihoods.getParams(), "no", "", true, false);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
- MixedSubstitutionModel* model = 0;
+ MixedSubstitutionModel* model = 0;
MixedSubstitutionModelSet* modelSet = 0;
- DiscreteDistribution* rDist = 0;
+ DiscreteDistribution* rDist = 0;
if (nhOpt == "no")
{
- model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppmixedlikelihoods.getParams()));
+ model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
if (model == 0)
{
cout << "Model is not a Mixed model" << endl;
@@ -154,7 +163,7 @@ int main(int args, char** argv)
}
else if (nhOpt == "one_per_branch")
{
- model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppmixedlikelihoods.getParams()));
+ model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
if (model == 0)
{
cout << "Model is not a Mixed model" << endl;
@@ -179,7 +188,7 @@ int main(int args, char** argv)
rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
// we should assume a rate distribution for the root also!!!
}
- FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppmixedlikelihoods.getParams(), rateFreqs);
+ FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams(), rateFreqs);
vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppmixedlikelihoods.getParams(), ',', "");
modelSet = dynamic_cast<MixedSubstitutionModelSet*>(SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters));
model = 0;
@@ -187,7 +196,7 @@ int main(int args, char** argv)
}
else if (nhOpt == "general")
{
- modelSet = dynamic_cast<MixedSubstitutionModelSet*>(PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppmixedlikelihoods.getParams()));
+ modelSet = dynamic_cast<MixedSubstitutionModelSet*>(PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
if (modelSet == 0)
{
cout << "Missing a Mixed model" << endl;
@@ -283,14 +292,19 @@ int main(int args, char** argv)
bool fromBiblio=false;
- //this is an uglly fix because getMixedModel is private... can't we use clone instead or const everywhere?
const AbstractBiblioMixedSubstitutionModel* ptmp = dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(p0);
if (ptmp) {
p0 = ptmp->getMixedModel().clone();
+ if (nhOpt == "no")
+ model = p0;
+ else {
+ modelSet->replaceModel(nummodel-1, p0);
+ modelSet->isFullySetUpFor(*tree);
+ }
fromBiblio=true;
}
-
+ //////////////////////////////////////////////////
// Case of a MixtureOfSubstitutionModels
MixtureOfSubstitutionModels* pMSM = dynamic_cast<MixtureOfSubstitutionModels*>(p0);
@@ -350,29 +364,38 @@ int main(int args, char** argv)
DataTable::write(*rates, out, "\t");
}
+ //////////////////////////////////////////////////
// Case of a MixtureOfASubstitutionModel
else
{
- if (fromBiblio)
- {
- ApplicationTools::displayError("!!! Not available for models parametrized upon bibliography.");
- ApplicationTools::displayError("!!! Please convert into MixedModel declaration.");
- exit(-1);
- }
-
MixtureOfASubstitutionModel* pMSM2 = dynamic_cast<MixtureOfASubstitutionModel*>(p0);
if (pMSM2 != NULL)
{
+ size_t nummod = pMSM2->getNumberOfModels();
+ if (fromBiblio && (parname == ""))
+ {
+ ParameterList pl=pMSM2->getParameters();
+
+ for (size_t i2 = 0; i2 < pl.size(); i2++)
+ {
+ string pl2n = pl[i2].getName();
+ string par2 = pl2n.substr(0,pl2n.find("_")) + "_1";
+ Vint vnmod = pMSM2->getSubmodelNumbers(par2);
+ if (vnmod.size() == 1) {
+ parname=pl2n.substr(0,pl2n.find("_"));
+ break;
+ }
+ }
+ }
+
if (parname == "")
{
ApplicationTools::displayError("Argument likelihoods.parameter_name is required.");
exit(-1);
}
- size_t nummod = pMSM2->getNumberOfModels();
-
- vector<vector<int> > vvnmod;
+ vector< Vint > vvnmod;
size_t i2 = 0;
while (i2 < nummod)
{
@@ -394,9 +417,9 @@ int main(int args, char** argv)
for (size_t i = 0; i < nbcl; i++)
{
vector<double> vprob2;
- for (unsigned int j = 0; j < vvnmod[i].size(); j++)
+ for (size_t j = 0; j < vvnmod[i].size(); j++)
{
- vprob2.push_back(vprob[vvnmod[i][j]]);
+ vprob2.push_back(vprob[static_cast<size_t>(vvnmod[i][j])]);
}
vvprob.push_back(vprob2);
@@ -408,18 +431,15 @@ int main(int args, char** argv)
Vdouble dval;
for (unsigned int i = 0; i < nbcl; i++)
- {
- SubstitutionModel* pSM = pMSM2->getNModel(vvnmod[i][0]);
- double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
- dval.push_back(valPar);
- colNames.push_back("Ll_" + parname + "=" + TextTools::toString(valPar));
- }
+ {
+ SubstitutionModel* pSM = pMSM2->getNModel(static_cast<size_t>(vvnmod[i][0]));
+ double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
+ dval.push_back(valPar);
+ colNames.push_back("Ll_" + parname + "=" + TextTools::toString(valPar));
+ }
for (unsigned int i = 0; i < nbcl; i++)
- {
- SubstitutionModel* pSM = pMSM2->getNModel(vvnmod[i][0]);
- double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
- colNames.push_back("Pr_" + parname + "=" + TextTools::toString(valPar));
- }
+ colNames.push_back("Pr_" + parname + "=" + TextTools::toString(dval[i]));
+
colNames.push_back("mean");
DataTable* rates = new DataTable(nSites, colNames.size());
@@ -434,14 +454,18 @@ int main(int args, char** argv)
VVdouble vvd;
- for (unsigned int i = 0; i < nbcl; i++)
+
+ vector<double> vRates = pMSM2->getVRates();
+
+ for (size_t i = 0; i < nbcl; ++i)
{
string par2 = parname + "_" + TextTools::toString(i + 1);
- for (unsigned int j = 0; j < nummod; j++)
+
+ for (unsigned int j = 0; j < nummod; ++j)
pMSM2->setNProbability(j, 0);
- for (unsigned int j = 0; j < vvprob[i].size(); j++)
- pMSM2->setNProbability(vvnmod[i][j], vvprob[i][j] / vsprob[i]);
+ for (size_t j = 0; j < vvprob[i].size(); ++j)
+ pMSM2->setNProbability(static_cast<size_t>(vvnmod[i][j]), vvprob[i][j] / vsprob[i]);
if (tl)
delete tl;
@@ -461,7 +485,7 @@ int main(int args, char** argv)
vvd.push_back(vd);
ApplicationTools::displayMessage("\n");
- ApplicationTools::displayMessage("Parameter " + par2 + ":");
+ ApplicationTools::displayMessage("Parameter " + par2 + "=" + TextTools::toString(dval[i]) + " with rate=" + TextTools::toString(vRates[i]));
ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
ApplicationTools::displayResult("Probability", TextTools::toString(vsprob[i], 15));
diff --git a/bppSuite/bppPars.cpp b/bppSuite/bppPars.cpp
index 4065f48..29621f0 100644
--- a/bppSuite/bppPars.cpp
+++ b/bppSuite/bppPars.cpp
@@ -80,9 +80,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Parsimony Methods, version 0.2.0 *" << endl;
+ cout << "* Bio++ Parsimony Methods, version 2.2.0 *" << endl;
cout << "* Author: J. Dutheil Created 05/05/07 *" << endl;
- cout << "* Last Modif. 13/06/12 *" << endl;
+ cout << "* Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
diff --git a/bppSuite/bppPhyloSampler.cpp b/bppSuite/bppPhyloSampler.cpp
index 9737a4b..d31085d 100755
--- a/bppSuite/bppPhyloSampler.cpp
+++ b/bppSuite/bppPhyloSampler.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS
+Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -51,7 +51,7 @@ using namespace std;
#include <Bpp/Numeric/DataTable.h>
#include <Bpp/Numeric/Random/RandomTools.h>
-// From SeqLib:
+// From bpp-seq:
#include <Bpp/Seq/Alphabet.all>
#include <Bpp/Seq/Container.all>
#include <Bpp/Seq/Io.all>
@@ -59,7 +59,7 @@ using namespace std;
#include <Bpp/Seq/SequenceTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
-// From PhylLib:
+// From bpp-phyl:
#include <Bpp/Phyl/Tree.h>
#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
#include <Bpp/Phyl/Io/PhylipDistanceMatrixFormat.h>
@@ -103,8 +103,8 @@ class Test {
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Phylogenetic Sampler, version 0.2 *" << endl;
- cout << "* Author: J. Dutheil Last Modif. 03/06/10 *" << endl;
+ cout << "* Bio++ Phylogenetic Sampler, version 2.2.0. *" << endl;
+ cout << "* Author: J. Dutheil Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -126,17 +126,18 @@ int main(int args, char ** argv)
string inputMethod = ApplicationTools::getStringParameter("input.method", bppphysamp.getParams(), "tree");
ApplicationTools::displayResult("Input method", inputMethod);
- DistanceMatrix* dist = 0;
+ auto_ptr< DistanceMatrix > dist;
+ auto_ptr< TreeTemplate<Node> > tree;
if(inputMethod == "tree")
{
- Tree* tree = PhylogeneticsApplicationTools::getTree(bppphysamp.getParams());
- dist = TreeTemplateTools::getDistanceMatrix(*tree);
+ tree.reset(dynamic_cast<TreeTemplate<Node> *>(PhylogeneticsApplicationTools::getTree(bppphysamp.getParams())));
+ dist.reset(TreeTemplateTools::getDistanceMatrix(*tree));
}
else if(inputMethod == "matrix")
{
string distPath = ApplicationTools::getAFilePath("input.matrix", bppphysamp.getParams(), true, true);
PhylipDistanceMatrixFormat matIO;
- dist = matIO.read(distPath);
+ dist.reset(matIO.read(distPath));
}
else throw Exception("Unknown input method: " + inputMethod);
@@ -208,7 +209,7 @@ int main(int args, char ** argv)
//Remove sequence in list:
size_t pos = VectorTools::which(seqNames, dist->getName(rm));
ApplicationTools::displayResult("Remove sequence", seqNames[pos]);
- seqNames.erase(seqNames.begin() + pos);
+ seqNames.erase(seqNames.begin() + static_cast<ptrdiff_t>(pos));
//Ignore all distances from this sequence:
remove_if(distances.begin(), distances.end(), Test(rm));
@@ -241,7 +242,7 @@ int main(int args, char ** argv)
//Remove sequence in list:
size_t pos = VectorTools::which(seqNames, dist->getName(rm));
ApplicationTools::displayResult("Remove sequence", seqNames[pos]);
- seqNames.erase(seqNames.begin() + pos);
+ seqNames.erase(seqNames.begin() + static_cast<ptrdiff_t>(pos));
//Ignore all distances from this sequence:
remove_if(distances.begin(), distances.end(), Test(rm));
@@ -257,6 +258,14 @@ int main(int args, char ** argv)
SequenceApplicationTools::writeAlignmentFile(asc, bppphysamp.getParams());
+ //Write tree file:
+ if (ApplicationTools::getStringParameter("output.tree.file", bppphysamp.getParams(), "None") != "None") {
+ for (size_t i = 0; i < seqNames.size(); ++i) {
+ TreeTemplateTools::dropLeaf(*tree, seqNames[i]);
+ }
+ PhylogeneticsApplicationTools::writeTree(*tree, bppphysamp.getParams(), "output.", "", true, true, false);
+ }
+
bppphysamp.done();
}
catch (exception& e)
diff --git a/bppSuite/bppReRoot.cpp b/bppSuite/bppReRoot.cpp
index a2acf93..5d1cdb3 100644
--- a/bppSuite/bppReRoot.cpp
+++ b/bppSuite/bppReRoot.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS
+Copyright or © or Copr. Bio++ Development Team
This software is a computer program whose purpose is to estimate
phylogenies and evolutionary parameters from a dataset according to
@@ -78,9 +78,9 @@ int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ ReRoot, version 0.1.3 *" << endl;
+ cout << "* Bio++ ReRoot, version 2.2.0 *" << endl;
cout << "* Author: C. Scornavacca Created 15/01/08 *" << endl;
- cout << "* Last Modif. 08/08/09 *" << endl;
+ cout << "* Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -261,10 +261,10 @@ int main(int args, char ** argv)
{
sonUpper = (tree->getRootNode())->getSon(0);
}
- int ident = TreeTools::getMaxId(* low, low->getRootId());
+ int ident = TreeTools::getMaxId(*low, low->getRootId());
vector <Node *> nodesTemp= TreeTemplateTools::getNodes( * sonUpper);
- for(unsigned int F = 0; F < nodesTemp.size(); F++)
- ( * nodesTemp[F]).setId(ident + F + 1);
+ for(size_t F = 0; F < nodesTemp.size(); F++)
+ nodesTemp[F]->setId(ident + static_cast<int>(F + 1));
low->getRootNode()->addSon(sonUpper);
tree = low;
}
@@ -306,9 +306,7 @@ int main(int args, char ** argv)
ApplicationTools::displayTaskDone();
//Write rooted trees:
-
-
- for (unsigned int i = 0; i < trees.size(); i++) delete trees[i];
+ for (size_t i = 0; i < trees.size(); i++) delete trees[i];
bppreroot.done();
}
diff --git a/bppSuite/bppSeqGen.cpp b/bppSuite/bppSeqGen.cpp
index 9e22698..8a888d8 100644
--- a/bppSuite/bppSeqGen.cpp
+++ b/bppSuite/bppSeqGen.cpp
@@ -44,6 +44,7 @@ knowledge of the CeCILL license and that you accept its terms.
using namespace std;
+// From bpp-core:
#include <Bpp/App/BppApplication.h>
#include <Bpp/App/ApplicationTools.h>
#include <Bpp/Io/FileTools.h>
@@ -51,14 +52,18 @@ using namespace std;
#include <Bpp/Numeric/Prob/DiscreteDistribution.h>
#include <Bpp/Numeric/Prob/ConstantDistribution.h>
#include <Bpp/Numeric/DataTable.h>
+#include <Bpp/Numeric/Random/RandomTools.h>
+#include <Bpp/Text/KeyvalTools.h>
+#include <Bpp/App/NumCalcApplicationTools.h>
-// From SeqLib:
+// From bpp-seq:
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
#include <Bpp/Seq/Container/SequenceContainerTools.h>
#include <Bpp/Seq/App/SequenceApplicationTools.h>
+#include <Bpp/Seq/SequenceTools.h>
-// From PhylLib:
+// From bpp-phyl:
#include <Bpp/Phyl/TreeTemplate.h>
#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
#include <Bpp/Phyl/Simulation.all>
@@ -129,11 +134,11 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Sequence Generator, version 1.2.0 *" << endl;
+ cout << "* Bio++ Sequence Generator, version 2.2.0 *" << endl;
cout << "* *" << endl;
cout << "* Authors: J. Dutheil *" << endl;
- cout << "* B. Boussau Last Modif. 29/01/13 *" << endl;
- cout << "* L. Gu�guen *" << endl;
+ cout << "* B. Boussau Last Modif. 25/09/14 *" << endl;
+ cout << "* L. Gueguen *" << endl;
cout << "* M. Groussin *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -150,7 +155,21 @@ int main(int args, char ** argv)
bppseqgen.startTimer();
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppseqgen.getParams(), "", false);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
+ if (codonAlphabet) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqgen.getParams(), "Standard", "", true, true);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
+
+
+ /**************************/
+ /* Trees */
+ /**************************/
+
vector<Tree*> trees;
vector<double> positions;
string inputTrees = ApplicationTools::getStringParameter("input.tree.method", bppseqgen.getParams(), "single", "", true, false);
@@ -190,9 +209,12 @@ int main(int args, char ** argv)
}
else throw Exception("Unknown input.tree.method option: " + inputTrees);
- string infosFile = ApplicationTools::getAFilePath("input.infos", bppseqgen.getParams(), false, true);
- ApplicationTools::displayResult("Site information", infosFile);
+ /**********************************/
+ /* Models */
+ /**********************************/
+
+
string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppseqgen.getParams(), "no", "", true, false);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
@@ -201,8 +223,8 @@ int main(int args, char ** argv)
//Homogeneous case:
if (nhOpt == "no")
{
- SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, 0, bppseqgen.getParams());
- FrequenciesSet* fSet = new FixedFrequenciesSet(model->getAlphabet(), model->getFrequencies());
+ SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), 0, bppseqgen.getParams());
+ FrequenciesSet* fSet = new FixedFrequenciesSet(model->getStateMap().clone(), model->getFrequencies());
modelSet = SubstitutionModelSetTools::createHomogeneousModelSet(model, fSet, trees[0]);
}
//Galtier-Gouy case:
@@ -213,13 +235,13 @@ int main(int args, char ** argv)
SubstitutionModel* model = 0;
string modelName = ApplicationTools::getStringParameter("model", bppseqgen.getParams(), "");
if (!TextTools::hasSubstring(modelName,"COaLA"))
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, 0, bppseqgen.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), 0, bppseqgen.getParams());
else
{
//COaLA model
VectorSiteContainer* allSitesAln = 0;
allSitesAln = SequenceApplicationTools::getSiteContainer(alphabet, bppseqgen.getParams());
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, allSitesAln, bppseqgen.getParams());
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), allSitesAln, bppseqgen.getParams());
}
vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppseqgen.getParams(), ',', "");
@@ -231,7 +253,7 @@ int main(int args, char ** argv)
rateFreqs = vector<double>(n, 1./static_cast<double>(n)); // Equal rates assumed for now, may be changed later (actually, in the most general case,
// we should assume a rate distribution for the root also!!!
}
- FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, 0, bppseqgen.getParams(), rateFreqs);
+ FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), 0, bppseqgen.getParams(), rateFreqs);
string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", bppseqgen.getParams(), "Full(init=observed)");
if (freqDescription.substr(0,10) == "MVAprotein")
{
@@ -247,13 +269,13 @@ int main(int args, char ** argv)
throw Exception("Multiple input trees cannot be used with non-homogeneous simulations.");
string modelName = ApplicationTools::getStringParameter("model1",bppseqgen.getParams(),"");
if (!TextTools::hasSubstring(modelName,"COaLA"))
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, 0, bppseqgen.getParams());
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), 0, bppseqgen.getParams());
else
{
//COaLA model
VectorSiteContainer* allSitesAln = 0;
allSitesAln = SequenceApplicationTools::getSiteContainer(alphabet, bppseqgen.getParams());
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, allSitesAln, bppseqgen.getParams());
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), allSitesAln, bppseqgen.getParams());
}
}
else throw Exception("Unknown non-homogeneous option: " + nhOpt);
@@ -261,28 +283,174 @@ int main(int args, char ** argv)
if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
throw Exception("Non-homogeneous mixed substitution sequence generation not implemented, sorry!");
+ /*******************************************/
+ /* Starting sequence */
+ /*******************************************/
+
DiscreteDistribution* rDist = 0;
NonHomogeneousSequenceSimulator* seqsim = 0;
SiteContainer* sites = 0;
+ size_t nbSites = 0;
+
+ string infosFile = ApplicationTools::getAFilePath("input.infos", bppseqgen.getParams(), false, true);
+
+ bool withStates = false;
+ bool withRates = false;
+ vector<size_t> states;
+ vector<double> rates;
+
if (infosFile != "none")
{
+ ApplicationTools::displayResult("Site information", infosFile);
ifstream in(infosFile.c_str());
DataTable* infos = DataTable::read(in, "\t");
- rDist = new ConstantRateDistribution();
- size_t nbSites = infos->getNumberOfRows();
+ nbSites = infos->getNumberOfRows();
ApplicationTools::displayResult("Number of sites", TextTools::toString(nbSites));
- vector<double> rates(nbSites);
- vector<string> ratesStrings = infos->getColumn(string("pr"));
- for (size_t i = 0; i < nbSites; i++)
+ string rateCol = ApplicationTools::getStringParameter("input.infos.rates", bppseqgen.getParams(), "pr", "", true, true);
+ string stateCol = ApplicationTools::getStringParameter("input.infos.states", bppseqgen.getParams(), "none", "", true, true);
+ withRates = rateCol != "none";
+ withStates = stateCol != "none";
+
+ if (withRates)
+ {
+ rDist = new ConstantRateDistribution();
+ rates.resize(nbSites);
+ vector<string> ratesStrings = infos->getColumn(rateCol);
+ for (size_t i = 0; i < nbSites; i++)
+ {
+ rates[i] = TextTools::toDouble(ratesStrings[i]);
+ }
+ }
+ if (withStates)
+ {
+ vector<string> ancestralStates = infos->getColumn(stateCol);
+ states.resize(nbSites);
+ for (size_t i = 0; i < nbSites; i++)
+ {
+ int alphabetState = alphabet->charToInt(ancestralStates[i]);
+ //If a generic character is provided, we pick one state randomly from the possible ones:
+ if (alphabet->isUnresolved(alphabetState))
+ alphabetState = RandomTools::pickOne<int>(alphabet->getAlias(alphabetState));
+ states[i] = RandomTools::pickOne<size_t>(modelSet->getModelStates(alphabetState));
+ }
+
+ string siteSet = ApplicationTools::getStringParameter("input.site.selection", bppseqgen.getParams(), "none", "", true, 1);
+
+ if (siteSet != "none")
+ {
+ vector<size_t> vSite;
+ try {
+ vector<int> vSite1 = NumCalcApplicationTools::seqFromString(siteSet);
+ for (size_t i = 0; i < vSite1.size(); ++i){
+ int x = (vSite1[i] >= 0 ? vSite1[i] : static_cast<int>(nbSites) + vSite1[i]);
+ if (x >= 0)
+ vSite.push_back(static_cast<size_t>(x));
+ else
+ throw Exception("SequenceApplicationTools::getSiteContainer(). Incorrect negative index: " + TextTools::toString(x));
+ }
+ }
+ catch (Exception& e)
+ {
+ string seln;
+ map<string, string> selArgs;
+ KeyvalTools::parseProcedure(siteSet, seln, selArgs);
+ if (seln == "Sample")
+ {
+ size_t n = ApplicationTools::getParameter<size_t>("n", selArgs, nbSites, "", true, 1);
+ bool replace = ApplicationTools::getBooleanParameter("replace", selArgs, false, "", true, 1);
+
+ vSite.resize(n);
+ vector<size_t> vPos;
+ for (size_t p = 0; p < nbSites; ++p)
+ vPos.push_back(p);
+
+ RandomTools::getSample(vPos, vSite, replace);
+ }
+ }
+
+ nbSites = vSite.size();
+
+ vector<size_t> newStates(nbSites);
+ vector<double> newRates(nbSites);
+
+ for (size_t ni = 0; ni < nbSites; ++ni)
+ {
+ newStates[ni] = states[vSite[ni]];
+ newRates[ni] = rates[vSite[ni]];
+ }
+
+ states = newStates;
+ rates = newRates;
+ }
+ }
+ }
+ else
+ {
+ try {
+ VectorSiteContainer* allSeq = 0;
+ allSeq = SequenceApplicationTools::getSiteContainer(alphabet, bppseqgen.getParams());
+
+ if (allSeq->getNumberOfSequences() > 0)
+ {
+ Sequence* pseq = SequenceTools::getSequenceWithCompleteSites(allSeq->getSequence(0));
+
+ nbSites = pseq->size();
+ states.resize(nbSites);
+ withStates = true;
+
+ for (size_t i = 0; i < nbSites; ++i) {
+ states[i] = RandomTools::pickOne(modelSet->getModelStates((*pseq)[i]));
+ }
+ ApplicationTools::displayResult("Number of sites", TextTools::toString(nbSites));
+
+ delete pseq;
+ }
+ }
+ catch (Exception& e)
{
- rates[i] = TextTools::toDouble(ratesStrings[i]);
}
+
+ }
+
+ if (rDist == 0)
+ {
+ if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize())
+ {
+ //Markov-modulated Markov model!
+ rDist = new ConstantRateDistribution();
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppseqgen.getParams());
+ }
+ }
+
+ if (nbSites == 0)
+ nbSites = ApplicationTools::getParameter<size_t>("number_of_sites", bppseqgen.getParams(), 100);
+
+ /*******************/
+ /* Simulations */
+ /*******************/
+
+ if (withStates)
+ {
if (trees.size() == 1)
{
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
ApplicationTools::displayTask("Perform simulations");
- sites = SequenceSimulationTools::simulateSites(*seqsim, rates);
+ if (withRates)
+ if (withStates)
+ sites = SequenceSimulationTools::simulateSites(*seqsim, rates, states);
+ else
+ sites = SequenceSimulationTools::simulateSites(*seqsim, rates);
+ else
+ if (withStates){
+ sites = SequenceSimulationTools::simulateSites(*seqsim, states);
+ }
+ else
+ throw Exception("Error! Info file should contain either site specific rates of ancestral states or both.");
+
delete seqsim;
}
else
@@ -290,24 +458,52 @@ int main(int args, char ** argv)
ApplicationTools::displayTask("Perform simulations", true);
ApplicationTools::displayGauge(0, trees.size() - 1, '=');
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
- unsigned int previousPos = 0;
- unsigned int currentPos = static_cast<unsigned int>(round(positions[1]*static_cast<double>(nbSites)));
- vector<double> tmpRates(rates.begin() + previousPos, rates.begin() + currentPos);
- SequenceContainer* tmpCont1 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates);
+ ptrdiff_t previousPos = 0;
+ ptrdiff_t currentPos = static_cast<ptrdiff_t>(round(positions[1]*static_cast<double>(nbSites)));
+ vector<double> tmpRates;
+ if (withRates)
+ tmpRates = vector<double>(rates.begin() + previousPos, rates.begin() + currentPos);
+ vector<size_t> tmpStates;
+ if (withStates)
+ tmpStates = vector<size_t>(states.begin() + previousPos, states.begin() + currentPos);
+ SequenceContainer* tmpCont1 = 0;
+ if (withRates)
+ if (withStates)
+ tmpCont1 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates, tmpStates);
+ else
+ tmpCont1 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates);
+ else
+ if (withStates)
+ tmpCont1 = SequenceSimulationTools::simulateSites(*seqsim, tmpStates);
+ else
+ throw Exception("Error! Info file should contain either site specific rates of ancestral states or both.");
previousPos = currentPos;
delete seqsim;
-
- for(unsigned int i = 1; i < trees.size(); i++)
+
+ for(size_t i = 1; i < trees.size(); i++)
{
ApplicationTools::displayGauge(i, trees.size() - 1, '=');
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[i]);
- currentPos = static_cast<unsigned int>(round(positions[i+1]) * static_cast<double>(nbSites));
- tmpRates = vector<double>(rates.begin() + previousPos + 1, rates.begin() + currentPos);
- SequenceContainer* tmpCont2 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates);
+ currentPos = static_cast<ptrdiff_t>(round(positions[i+1]) * static_cast<double>(nbSites));
+ if (withRates)
+ tmpRates = vector<double>(rates.begin() + previousPos + 1, rates.begin() + currentPos);
+ if (withStates)
+ tmpStates = vector<size_t>(states.begin() + previousPos + 1, states.begin() + currentPos);
+ SequenceContainer* tmpCont2 = 0;
+ if (withRates)
+ if (withStates)
+ tmpCont2 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates, tmpStates);
+ else
+ tmpCont2 = SequenceSimulationTools::simulateSites(*seqsim, tmpRates);
+ else
+ if (withStates)
+ tmpCont2 = SequenceSimulationTools::simulateSites(*seqsim, tmpStates);
+ else
+ throw Exception("Error! Info file should contain either site specific rates of ancestral states or both.");
previousPos = currentPos;
delete seqsim;
VectorSequenceContainer* mergedCont = new VectorSequenceContainer(alphabet);
- SequenceContainerTools::merge(*tmpCont1, *tmpCont2, *reinterpret_cast<SequenceContainer*>(mergedCont));
+ SequenceContainerTools::merge(*tmpCont1, *tmpCont2, *mergedCont);
delete tmpCont1;
delete tmpCont2;
tmpCont1 = mergedCont;
@@ -328,8 +524,7 @@ int main(int args, char ** argv)
{
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppseqgen.getParams());
}
-
- size_t nbSites = ApplicationTools::getParameter<size_t>("number_of_sites", bppseqgen.getParams(), 100);
+
if (trees.size() == 1)
{
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
@@ -344,12 +539,12 @@ int main(int args, char ** argv)
ApplicationTools::displayGauge(0, trees.size() - 1, '=');
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[0]);
size_t previousPos = 0;
- size_t currentPos = static_cast<unsigned int>(round(positions[1]*static_cast<double>(nbSites)));
+ size_t currentPos = static_cast<unsigned int>(round(positions[1] * static_cast<double>(nbSites)));
SequenceContainer* tmpCont1 = seqsim->simulate(currentPos - previousPos);
previousPos = currentPos;
delete seqsim;
- for (unsigned int i = 1; i < trees.size(); i++)
+ for (size_t i = 1; i < trees.size(); i++)
{
ApplicationTools::displayGauge(i, trees.size() - 1, '=');
seqsim = new NonHomogeneousSequenceSimulator(modelSet, rDist, trees[i]);
@@ -358,7 +553,7 @@ int main(int args, char ** argv)
previousPos = currentPos;
delete seqsim;
VectorSequenceContainer* mergedCont = new VectorSequenceContainer(alphabet);
- SequenceContainerTools::merge(*tmpCont1, *tmpCont2, *reinterpret_cast<SequenceContainer*>(mergedCont));
+ SequenceContainerTools::merge(*tmpCont1, *tmpCont2, *mergedCont);
delete tmpCont1;
delete tmpCont2;
tmpCont1 = mergedCont;
@@ -371,15 +566,15 @@ int main(int args, char ** argv)
// Write to file:
SequenceApplicationTools::writeAlignmentFile(*sites, bppseqgen.getParams());
-
+
delete alphabet;
- for (unsigned int i = 0; i < trees.size(); i++)
+ for (size_t i = 0; i < trees.size(); i++)
delete trees[i];
delete rDist;
bppseqgen.done();
-
- }
+
+}
catch (exception& e)
{
cout << e.what() << endl;
diff --git a/bppSuite/bppSeqMan.cpp b/bppSuite/bppSeqMan.cpp
index ea5c6af..7cc52e1 100644
--- a/bppSuite/bppSeqMan.cpp
+++ b/bppSuite/bppSeqMan.cpp
@@ -49,8 +49,9 @@ using namespace std;
#include <Bpp/Io/FileTools.h>
#include <Bpp/Text/KeyvalTools.h>
-// From SeqLib:
+// From bpp-seq:
#include <Bpp/Seq/SiteTools.h>
+#include <Bpp/Seq/CodonSiteTools.h>
#include <Bpp/Seq/Alphabet/Alphabet.h>
#include <Bpp/Seq/Alphabet/AlphabetTools.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
@@ -60,7 +61,7 @@ using namespace std;
#include <Bpp/Seq/SequenceTools.h>
#include <Bpp/Seq/GeneticCode.all>
-//From PhylLib:
+//From bpp-phyl:
#include <Bpp/Phyl/Tree.h>
#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
@@ -79,8 +80,8 @@ void help()
int main(int args, char** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Sequence Manipulator, version 0.6 *" << endl;
- cout << "* Author: J. Dutheil Last Modif. 21/12/11 *" << endl;
+ cout << "* Bio++ Sequence Manipulator, version 2.2.0. *" << endl;
+ cout << "* Author: J. Dutheil Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -97,6 +98,8 @@ int main(int args, char** argv)
// Get alphabet
Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppseqman.getParams(), "", false, true, true);
+ auto_ptr<GeneticCode> gCode;
+ CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
// Get sequences:
SequenceContainer* tmp = SequenceApplicationTools::getSequenceContainer(alphabet, bppseqman.getParams(), "", true, true);
@@ -106,7 +109,7 @@ int main(int args, char** argv)
// Perform manipulations
- vector<string> actions = ApplicationTools::getVectorParameter<string>("sequence.manip", bppseqman.getParams(), ',', "", "", false, false);
+ vector<string> actions = ApplicationTools::getVectorParameter<string>("sequence.manip", bppseqman.getParams(), ',', "", "", false, 1);
bool aligned = false;
@@ -124,7 +127,7 @@ int main(int args, char** argv)
{
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(sequences->getAlphabet());
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(sequences->getAlphabet()));
+ else sc = new VectorSequenceContainer(sequences->getAlphabet());
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
Sequence* seq = SequenceTools::getComplement(sequences->getSequence(i));
@@ -143,7 +146,7 @@ int main(int args, char** argv)
{
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(&AlphabetTools::RNA_ALPHABET);
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(&AlphabetTools::RNA_ALPHABET));
+ else sc = new VectorSequenceContainer(&AlphabetTools::RNA_ALPHABET);
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
Sequence* seq = SequenceTools::transcript(sequences->getSequence(i));
@@ -157,7 +160,7 @@ int main(int args, char** argv)
{
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(&AlphabetTools::DNA_ALPHABET);
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(&AlphabetTools::DNA_ALPHABET));
+ else sc = new VectorSequenceContainer(&AlphabetTools::DNA_ALPHABET);
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
Sequence* seq = SequenceTools::reverseTranscript(sequences->getSequence(i));
@@ -186,7 +189,7 @@ int main(int args, char** argv)
else throw Exception("Cannot switch alphabet type, alphabet is not of type 'nucleic'.");
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(alpha);
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(alpha));
+ else sc = new VectorSequenceContainer(alpha);
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
const Sequence* old = &sequences->getSequence(i);
@@ -204,16 +207,20 @@ int main(int args, char** argv)
{
if (!AlphabetTools::isCodonAlphabet(sequences->getAlphabet()))
throw Exception("Error in translation: alphabet is not of type 'codon'.");
- GeneticCode* gc = NULL;
- string gcstr = ApplicationTools::getStringParameter("code", cmdArgs, "Standard");
- gc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const CodonAlphabet*>(sequences->getAlphabet())->getNucleicAlphabet(), gcstr);
+ if (cmdArgs["code"] != "")
+ throw Exception("ERROR: 'code' argument is deprecated. The genetic code to use for translation is now set by the top-level argument 'genetic_code'.");
+ if (!gCode.get()) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(&AlphabetTools::PROTEIN_ALPHABET);
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(&AlphabetTools::PROTEIN_ALPHABET));
- for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
+ else sc = new VectorSequenceContainer(&AlphabetTools::PROTEIN_ALPHABET);
+ for (size_t i = 0; i < sequences->getNumberOfSequences(); ++i)
{
- Sequence* seq = gc->translate(sequences->getSequence(i));
+ Sequence* seq = gCode->translate(sequences->getSequence(i));
sc->addSequence(*seq, false);
delete seq;
}
@@ -243,7 +250,7 @@ int main(int args, char** argv)
{
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(sequences->getAlphabet());
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(sequences->getAlphabet()));
+ else sc = new VectorSequenceContainer(sequences->getAlphabet());
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
Sequence* seq = new BasicSequence(sequences->getSequence(i));
@@ -278,24 +285,29 @@ int main(int args, char** argv)
// +--------------+
else if (cmdName == "RemoveStops")
{
+ if (!gCode.get()) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
SiteContainer* sites = dynamic_cast<SiteContainer*>(sequences);
if (!sites)
{
VectorSequenceContainer* sc = new VectorSequenceContainer(sequences->getAlphabet());
- for (unsigned int i = 0; i < sequences->getNumberOfSequences(); ++i)
+ for (size_t i = 0; i < sequences->getNumberOfSequences(); ++i)
{
auto_ptr<Sequence> seq(sequences->getSequence(i).clone());
- SequenceTools::removeStops(*seq);
+ SequenceTools::removeStops(*seq, *gCode);
sc->addSequence(*seq);
}
delete sequences;
sequences = sc;
} else {
VectorSiteContainer* sc = new VectorSiteContainer(sequences->getAlphabet());
- for (unsigned int i = 0; i < sequences->getNumberOfSequences(); ++i)
+ for (size_t i = 0; i < sequences->getNumberOfSequences(); ++i)
{
auto_ptr<Sequence> seq(sequences->getSequence(i).clone());
- SequenceTools::replaceStopsWithGaps(*seq);
+ SequenceTools::replaceStopsWithGaps(*seq, *gCode);
sc->addSequence(*seq);
}
delete sequences;
@@ -313,10 +325,15 @@ int main(int args, char** argv)
{
throw Exception("'RemoveColumnsWithStop' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
}
+ if (!gCode.get()) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
for (size_t i = sites->getNumberOfSites(); i > 0; i--)
{
- if (SiteTools::hasStopCodon(sites->getSite(i-1)))
+ if (CodonSiteTools::hasStop(sites->getSite(i-1), *gCode))
sites->deleteSite(i - 1);
}
}
@@ -326,14 +343,19 @@ int main(int args, char** argv)
// +---------+
else if (cmdName == "GetCDS")
{
+ if (!gCode.get()) {
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
+ ApplicationTools::displayResult("Genetic Code", codeDesc);
+ gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
+ }
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(sequences->getAlphabet());
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(sequences->getAlphabet()));
- for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
+ else sc = new VectorSequenceContainer(sequences->getAlphabet());
+ for (size_t i = 0; i < sequences->getNumberOfSequences(); ++i)
{
BasicSequence seq = sequences->getSequence(i);
size_t len = seq.size();
- SequenceTools::getCDS(seq, false, true, true, false);
+ SequenceTools::getCDS(seq, *gCode, false, true, true, false);
if (aligned) {
for (size_t c = seq.size(); c < len; ++c)
seq.addElement(seq.getAlphabet()->getGapCharacterCode());
@@ -367,7 +389,7 @@ int main(int args, char** argv)
}
const Alphabet* alpha = 0;
- string alphastr = ApplicationTools::getStringParameter("alphabet", cmdArgs, "DNA");
+ string alphastr = ApplicationTools::getStringParameter("alphabet", cmdArgs, "DNA", "", false, 1);
if (alphastr == "DNA") alpha = &AlphabetTools::DNA_ALPHABET;
else if (alphastr == "RNA") alpha = &AlphabetTools::RNA_ALPHABET;
else if (alphastr == "Protein") alpha = &AlphabetTools::PROTEIN_ALPHABET;
@@ -387,7 +409,7 @@ int main(int args, char** argv)
throw Exception("'KeepComplete' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
}
- string maxGapOption = ApplicationTools::getStringParameter("maxGapAllowed", cmdArgs, "100%");
+ string maxGapOption = ApplicationTools::getStringParameter("maxGapAllowed", cmdArgs, "100%", "", false, 1);
if (maxGapOption[maxGapOption.size()-1] == '%')
{
double gapFreq = TextTools::toDouble(maxGapOption.substr(0, maxGapOption.size()-1)) / 100.;
@@ -417,7 +439,7 @@ int main(int args, char** argv)
{
OrderedSequenceContainer* sc = 0;
if (aligned) sc = new VectorSiteContainer(sequences->getAlphabet());
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(sequences->getAlphabet()));
+ else sc = new VectorSequenceContainer(sequences->getAlphabet());
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
const Sequence* old = &sequences->getSequence(i);
@@ -433,7 +455,7 @@ int main(int args, char** argv)
// +------------------+
else if (cmdName == "GetCodonPosition")
{
- unsigned int pos = ApplicationTools::getParameter<unsigned int>("position", cmdArgs, 3);
+ unsigned int pos = ApplicationTools::getParameter<unsigned int>("position", cmdArgs, 3, "", false, 1);
OrderedSequenceContainer* sc = dynamic_cast<OrderedSequenceContainer*>(SequenceContainerTools::getCodonPosition(*sequences, pos - 1));
delete sequences;
if (aligned) {
@@ -470,11 +492,11 @@ int main(int args, char** argv)
ApplicationTools::displayBooleanResult("Final sequences are aligned", aligned);
if (aligned)
{
- SequenceApplicationTools::writeAlignmentFile(*dynamic_cast<SiteContainer*>(sequences), bppseqman.getParams(), "", true);
+ SequenceApplicationTools::writeAlignmentFile(*dynamic_cast<SiteContainer*>(sequences), bppseqman.getParams(), "", true, 1);
}
else
{
- SequenceApplicationTools::writeSequenceFile(*sequences, bppseqman.getParams(), "", true);
+ SequenceApplicationTools::writeSequenceFile(*sequences, bppseqman.getParams(), "", true, 1);
}
delete alphabet;
diff --git a/bppSuite/bppTreeDraw.cpp b/bppSuite/bppTreeDraw.cpp
index 873c12f..556f3a7 100644
--- a/bppSuite/bppTreeDraw.cpp
+++ b/bppSuite/bppTreeDraw.cpp
@@ -73,9 +73,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Tree Drawing program, version 0.1.0 *" << endl;
+ cout << "* Bio++ Tree Drawing program, version 2.2.0. *" << endl;
cout << "* *" << endl;
- cout << "* Authors: J. Dutheil Last Modif. 18/05/10 *" << endl;
+ cout << "* Authors: J. Dutheil Last Modif. 25/09/14 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -129,11 +129,11 @@ int main(int args, char ** argv)
KeyvalTools::parseProcedure(plotTypeCmd, plotType, plotTypeArgs);
if (plotType == "Cladogram")
{
- td = reinterpret_cast<TreeDrawing*>(new CladogramPlot());
+ td = new CladogramPlot();
}
else if (plotType == "Phylogram")
{
- td = reinterpret_cast<TreeDrawing*>(new PhylogramPlot());
+ td = new PhylogramPlot();
}
else throw Exception("Unknown output format: " + plotType);
td->setTree(tree);
diff --git a/bppsuite.spec b/bppsuite.spec
index 18933f0..d4255d9 100644
--- a/bppsuite.spec
+++ b/bppsuite.spec
@@ -1,5 +1,5 @@
%define _basename bppsuite
-%define _version 0.8.0
+%define _version 2.2.0
%define _release 1
%define _prefix /usr
@@ -14,21 +14,21 @@ Source: http://biopp.univ-montp2.fr/repos/sources/%{_basename}-%{_version}.tar.g
Summary: The Bio++ Program Suite
Group: Productivity/Scientific/Other
-Requires: libbpp-phyl9 = 2.1.0
-Requires: libbpp-seq9 = 2.1.0
-Requires: libbpp-core2 = 2.1.0
+Requires: libbpp-phyl9 = %{_version}
+Requires: libbpp-seq9 = %{_version}
+Requires: libbpp-core2 = %{_version}
BuildRoot: %{_builddir}/%{_basename}-root
BuildRequires: cmake >= 2.6.0
BuildRequires: gcc-c++ >= 4.0.0
BuildRequires: groff
BuildRequires: texinfo >= 4.0.0
-BuildRequires: libbpp-core2 = 2.1.0
-BuildRequires: libbpp-core-devel = 2.1.0
-BuildRequires: libbpp-seq9 = 2.1.0
-BuildRequires: libbpp-seq-devel = 2.1.0
-BuildRequires: libbpp-phyl9 = 2.1.0
-BuildRequires: libbpp-phyl-devel = 2.1.0
+BuildRequires: libbpp-core2 = %{_version}
+BuildRequires: libbpp-core-devel = %{_version}
+BuildRequires: libbpp-seq9 = %{_version}
+BuildRequires: libbpp-seq-devel = %{_version}
+BuildRequires: libbpp-phyl9 = %{_version}
+BuildRequires: libbpp-phyl-devel = %{_version}
AutoReq: yes
@@ -121,6 +121,12 @@ rm -rf $RPM_BUILD_ROOT
%{_prefix}/share/man/man1/bppmixedlikelihoods.1.%{zipext}
%changelog
+* Mon Sep 28 2014 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.2.0-1
+- Compatibility update. Bio++ Program Suite version number is now indexed
+ on Bio++'s version.
+- Programs support the --seed argument for setting the random seed.
+- bppSeqGen suport generic characters as input.
+- bppPhySamp outputs sampled trees.
* Fri Mar 08 2013 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.8.0-1
- New models for proteins (COaLA)
- New program bppMixedLikelihoods
diff --git a/buildBin.sh b/buildBin.sh
new file mode 100755
index 0000000..5b71638
--- /dev/null
+++ b/buildBin.sh
@@ -0,0 +1,18 @@
+#! /bin/sh
+arch=x86_64 #i686
+version=0.8.0-1
+
+strip bppSuite/bppdist
+strip bppSuite/bpppars
+strip bppSuite/bppml
+strip bppSuite/bppseqgen
+strip bppSuite/bppconsense
+strip bppSuite/bppseqman
+strip bppSuite/bppphysamp
+strip bppSuite/bppreroot
+strip bppSuite/bppancestor
+strip bppSuite/bpptreedraw
+strip bppSuite/bppalnscore
+strip bppSuite/bppmixedlikelihoods
+tar cvzf bppsuite-${arch}-bin-static-${version}.tar.gz bppSuite/bppdist bppSuite/bpppars bppSuite/bppml bppSuite/bppseqgen bppSuite/bppconsense bppSuite/bppseqman bppSuite/bppphysamp bppSuite/bppreroot bppSuite/bppancestor bppSuite/bpptreedraw bppSuite/bppalnscore bppSuite/bppmixedlikelihoods
+
diff --git a/buildExample.sh b/buildExample.sh
new file mode 100755
index 0000000..4733d32
--- /dev/null
+++ b/buildExample.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+zip -r bppsuite-examples-0.8.0.zip Examples -x \*.svn\*
+
diff --git a/debian/changelog b/debian/changelog
index 999f8e9..9ac4650 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,13 @@
+bppsuite (2.2.0-1) unstable; urgency=low
+
+ * Compatibility update. Bio++ Program Suite version number is now indexed
+ on Bio++'s version.
+ * Programs support the --seed argument for setting the random seed.
+ * bppSeqGen suport generic characters as input.
+ * bppPhySamp outputs sampled trees.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Mon, 28 Sep 2014 14:00:00 +0100
+
bppsuite (0.8.0-1) unstable; urgency=low
* New models for proteins (COaLA)
diff --git a/debian/control b/debian/control
index a26ceba..52236d8 100644
--- a/debian/control
+++ b/debian/control
@@ -4,12 +4,12 @@ Priority: optional
Maintainer: Loic Dachary <loic at dachary.org>
Uploaders: Julien Dutheil <julien.dutheil at univ-montp2.fr>
Build-Depends: debhelper (>= 5), cmake (>= 2.6), dpkg (>= 1.15.4) | install-info, texinfo,
- libbpp-phyl-dev (>= 2.0.3)
-Standards-Version: 3.9.1
+ libbpp-phyl-dev (>= 2.2.0)
+Standards-Version: 3.9.4
Package: bppsuite
Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-phyl9 (>= 2.0.3)
+Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-phyl9 (>= 2.2.0)
Description: Bio++ program suite
Includes programs:
- BppML for maximum likelihood analysis,
diff --git a/debian/copyright b/debian/copyright
index 3bdf7c9..1428d5b 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,7 +1,7 @@
This package was debianized by Julien Dutheil <julien.dutheil at univ-montp2.fr> on
-Fri, 08 Mar 2013 11:41:00 +0100
+Mon, 28 Sep 2014 14:00:00 +0100
-It was downloaded from <http://download.gna.org/bppsuite/source>
+It was downloaded from <http://biopp.univ-montp2.fr/repos/sources/bppsuite>
Upstream Author:
@@ -9,7 +9,7 @@ Upstream Author:
Copyright:
- Copyright (C) 2013 Bio++ Development Team
+ Copyright (C) 2014 Bio++ Development Team
License:
@@ -27,7 +27,7 @@ License:
along with this package; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-The Debian packaging is (C) 2013, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
+The Debian packaging is (C) 2014, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
The provided software is distributed under the CeCILL license:
diff --git a/doc/bppsuite.texi b/doc/bppsuite.texi
index b8f527b..012f62f 100644
--- a/doc/bppsuite.texi
+++ b/doc/bppsuite.texi
@@ -1,7 +1,7 @@
\input texinfo @c -*-texinfo-*-
@c %**start of header
@setfilename bppsuite.info
- at settitle BppSuite Manual 0.7.0
+ at settitle BppSuite Manual 2.2.0
@documentencoding UTF-8
@afourpaper
@dircategory Science Biology Genetics
@@ -21,9 +21,9 @@
@c %**end of header
@copying
-This is the manual of the Bio++ Program Suite, version 0.7.0.
+This is the manual of the Bio++ Program Suite, version 2.2.0.
-Copyright @copyright{} 2007, 2008, 2009, 2010, 2011, 2012 Bio++ development team
+Copyright @copyright{} 2007-2014 Bio++ development team
@end copying
@titlepage
@@ -58,21 +58,23 @@ Copyright @copyright{} 2007, 2008, 2009, 2010, 2011, 2012 Bio++ development team
Common options encountered in several programs.
+* Alphabet:: Alphabets and genetic codes.
* Sequences:: Loading sequences/alignments.
* Tree:: Loading trees.
-* AlphabetIndex:: Setting alphabet indexes.
-* Model:: Setting up a substitution model.
+* AlphabetIndex:: Setting biochemical properties and distances.
+* Process::
* Distribution:: Setting of the discrete distributions.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
* WritingTrees:: Writing trees to files.
-Model specification
+Process specification
-* Declaration:: Numerous declarations of models.
+* Model::
* Non-homogeneity:: Specific declaration of non-homogeneous modelling.
* FrequenciesSet:: Frequencies
* Rates:: Rates across sites
+* Linking::
Setting up the substitution model
@@ -83,7 +85,6 @@ Setting up the substitution model
* Multiple:: General multiple site models
* Meta:: Meta models
* Mixture:: Mixture of models
-* Linking:: Linking parameters
Bio++ Program Suite Reference
@@ -147,9 +148,12 @@ or
@{program@} param=option_file
@end example
@end cartouche
-where @{program@} is the name of the program to use (bppml, bppseqgen, etc.).
-Option files contain @command{parameter=value} lines, with only one parameter per line.
-They can be written from scratch using a regular text editor, but since these files can potentially turn to be quite complex, it is probably wiser to start with a sample provided along with the program (if any!).
+where @{program@} is the name of the program to use (bppml, bppseqgen,
+etc.). Option files contain @command{parameter=value} lines, with only
+one parameter per line. They can be written from scratch using a
+regular text editor, but since these files can potentially turn to be
+quite complex, it is probably wiser to start with a sample provided
+along with the program (if any!).
Extra-space may be included between parameter names, equal sign and value:
@cartouche
@@ -192,11 +196,15 @@ Command line and file options may be combined:
@{program@} param=option_file parameterX=valueX
@end example
@end cartouche
-In case @command{parameterX} is specified in both option file and command line, the command line value will be used.
-This allows to run the programs several times by changing a single option, like the name of the data set for instance.
+In case @command{parameterX} is specified in both option file and
+command line, the command line value will be used. This allows to run
+the programs several times by changing a single option, like the name
+of the data set for instance.
-Option files can be nested, by using @command{param=nestedoptionfile} within an option file, as with the command line.
-It is possible to use this option as often as needed, this will load all the required option files.
+Option files can be nested, by using @command{param=nestedoptionfile}
+within an option file, as with the command line. It is possible to use
+this option as often as needed, this will load all the required option
+files.
@section Different types of options
@@ -303,18 +311,19 @@ data=LSU
@c ------------------------------------------------------------------------------------------------------------------
@menu
+* Alphabet:: Alphabets and genetic codes.
* Sequences:: Loading sequences/alignments.
* Tree:: Loading trees.
-* AlphabetIndex::
-* Model:: Setting up a substitution model.
+* AlphabetIndex:: Setting biochemical properties and distances.
+* Process::
* Distribution:: Setting of the discrete distributions.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
* WritingTrees:: Writing trees to files.
@end menu
- at node Sequences, Tree, Common, Common
- at section Setting alphabet and reading sequences
+ at node Alphabet, Sequences, Common, Common
+ at section Setting alphabet and genetic code
@table @command
@item alphabet =
@@ -328,52 +337,97 @@ The alphabet to use when reading sequences. DNA and RNA alphabet can in addition
Tell is exclamation mark should be considered as a gap character. The default is to consider it as an unknown character such as 'N' or '?'.
@end table
+ at item genetic_code = @{translation table@}
+Where 'translation table' specifies the code to use, either as a text description, or as the NCBI number.
+The following table give the currently implemented codes with their corresponding names:
+
+ at multitable @columnfractions 0.5 0.5
+ at item Standard @tab 1
+ at item VertebrateMitochondrial @tab 2
+ at item YeastMitochondrial @tab 3
+ at item MoldMitochondrial @tab 4
+ at item InvertebrateMitochondrial @tab 5
+ at item EchinodermMitochondrial @tab 9
+ at item AscidianMitochondrial @tab 13
+ at end multitable
+ at end table
+
+ at c ------------------------------------------------------------------------------------------------------------------
+
+ at node Sequences, Tree, Alphabet, Common
+ at section Reading sequences
+
+ at table @command
@item input.sequence.file=@{path@}
The sequence file to use. Depending on the program, these sequences have or do not have to be aligned.
@item input.sequence.format = @{sequence format description@}
The sequence file format.
+ at item input.site.selection = @{list of integers@}
+Will only consider sites in the given list of positions, in extended
+format : positions separated with ",", and "i-j" for all positions
+between i and j, included.
+
+ at item input.site.selection = @{Sample(n=@{integer@} [, replace=@{true@}])@}
+Will consider @{n@} random sites, with optional replacement.
+
@end table
-Since Bio++ Program Suite version 0.4.0, the format description uses the keyval syntax.
-The format is a function, with optional parameters:
+Since Bio++ Program Suite version 0.4.0, the format description uses
+the keyval syntax. The format is a function, with optional parameters:
@table @command
@item Fasta(extended=@{bool@}, strictNames=@{bool@})
The fasta format.
-The argument @command{extended}, default to 'no', allows to enable the HUPO-PSI extension of the format.
-The argument @command{strict_names}, default to 'no', specifies that only the first word in the fasta header is used as a sequence names, the rest of the header being considered as comments.
+The argument @command{extended}, default to 'no', allows to enable the
+HUPO-PSI extension of the format.
+The argument @command{strict_names}, default to 'no', specifies that
+only the first word in the fasta header is used as a sequence names,
+the rest of the header being considered as comments.
@item Mase(siteSelection=@{chars@})
-The Mase format (as read by Seaview and Phylo_win for instance), with an optional site selection name.
+The Mase format (as read by Seaview and Phylo_win for instance), with
+an optional site selection name.
@item Phylip(order=@{interleaved|sequential@}, type=@{classic|extended@}, split=@{spaces|tab@})
The Phylip format, with several variations.
-The argument @command{order} distinguishes between sequential and interleaved format, while the option @command{type} distinguished between the plain old Phylip format and the more recent extension allowing for sequence names longer than 10 characters, as understood by PAML and PhyML.
-Finally, the @command{split} argument specifies the type of character that separates the sequence name from the sequence content.
-The conventional option is to use one (classic) or more (extended) spaces, but tabs can also be used instead.
+The argument @command{order} distinguishes between sequential and
+interleaved format, while the option @command{type} distinguished
+between the plain old Phylip format and the more recent extension
+allowing for sequence names longer than 10 characters, as understood
+by PAML and PhyML.
+Finally, the @command{split} argument specifies the type of character
+that separates the sequence name from the sequence content. The
+conventional option is to use one (classic) or more (extended) spaces,
+but tabs can also be used instead.
@item Clustal(extraSpaces=@{int@})
-The Clustal format.
-In its basic set up, sequence names do not have space characters, and one space splits the sequence content from its name.
-The parser can however be configured to allow for spaces in the sequence names, providing a minimum number of space characters is used to split the content from the name.
-Setting @command{extraSpaces} to 5 for instance, the sequences are expected to be at least 6 spaces away for their names.
+The Clustal format.
+In its basic set up, sequence names do not have space characters, and
+one space splits the sequence content from its name. The parser can
+however be configured to allow for spaces in the sequence names,
+providing a minimum number of space characters is used to split the
+content from the name. Setting @command{extraSpaces} to 5 for
+instance, the sequences are expected to be at least 6 spaces away for
+their names.
@item Dcse()
-The DCSE alignment format. The secondary structure annotation will be ignored.
+The DCSE alignment format. The secondary structure annotation will be
+ignored.
@item Nexus()
-The Nexus alignment format.
-Only very basic support is provided.
+The Nexus alignment format. Only very basic support is provided.
@end table
-For programs that do not require the sequences to be aligned, the following formats are also available:
+For programs that do not require the sequences to be aligned, the
+following formats are also available:
@table @command
@item GenBank()
-Very basic support: only retrieves the sequence content for now, all features are ignored.
+Very basic support: only retrieves the sequence content for now, all
+features are ignored.
@end table
@@ -424,17 +478,19 @@ The format of the input tree file.
@end table
-In case the input tree does not specify node identifiers, some will be generated automatically.
-Nodes identifiers can be outputed using the following option:
+In case the input tree does not specify node identifiers, some will be
+generated automatically. Nodes identifiers can be outputed using the
+following option:
@table @command
@item output.tree_ids.file = @{@{path@}|none@}
A tree file in newick format, with node ids instead of bootstrap
values, and leaf names with their id as suffix.
@end table
-In case it is supported by the program, the use of that option will cause the program to exit just after producing the tagged tree.
+In case it is supported by the program, the use of that option will
+cause the program to exit just after producing the tagged tree.
-Some programs may require that your file contains several trees.
-The corresponding options are then:
+Some programs may require that your file contains several trees. The
+corresponding options are then:
@table @command
@item input.trees.file = @{path@}
@@ -447,12 +503,13 @@ The format of the input tree file.
@c ------------------------------------------------------------------------------------------------------------------
- at node AlphabetIndex, Model, Tree, Common
- at section Specifying alphabet indexes
+ at node AlphabetIndex, Process, Tree, Common
+ at section Specifying biochemical properties and distances
-Some methods require an "alphabet index" to be specified.
-Alphabet indexes associate a value with each alphabet state (Index1, e.g. a biochemical property) or for a pair of states (Index2, e.g. a biochemical distance).
-This section describes the supported indexes:
+Some methods require an "alphabet index" to be specified. Alphabet
+indexes associate a value with each alphabet state (Index1, e.g. a
+biochemical property) or for a pair of states (Index2, e.g. a
+biochemical distance). This section describes the supported indexes:
@subsection Index1
@@ -470,9 +527,11 @@ Chou and Fasmani score for secondary structure prediction.
@item ChenGuHuangHydrophobicity @{AA@}
Hydrophobicity according to Chen, Gu and Huang.
@item SEALow, SEAMedium, SEAHigh @{AA@}
-Solvent Exposed Area, percent of amino acids having a SEA below 10, between 10 and 30, or higher than 30, respectively.
+Solvent Exposed Area, percent of amino acids having a SEA below 10,
+between 10 and 30, or higher than 30, respectively.
@item User
-A user defined Index1, from a file in the AAIndex1 syntax. The input file is specified using the @command{file=@{path@}} argument.
+A user defined Index1, from a file in the AAIndex1 syntax. The input
+file is specified using the @command{file=@{path@}} argument.
@command{file}
@end table
@@ -486,27 +545,38 @@ If no index should be used.
@item Blosum50 @{AA@}
The BLOSUM 50 amino acid distance matrix.
@item Grantham, Miyata @{AA@}
-Two biochemical distance matrices. Both accept an optional argument @command{symmetrical=@{boolean@}} allowing to specify if the matrix should be symmetric or not. If not, the distance measure will be signed.
+Two biochemical distance matrices. Both accept an optional argument
+ at command{symmetrical=@{boolean@}} allowing to specify if the matrix
+should be symmetric or not. If not, the distance measure will be
+signed.
@item Diff
-Allow to compute a distance matrix by taking the difference for, each pair of state, of an Index1 value.
-The Index1 to use is specified using the @command{index1=@{Index1 description@}} argument. An additional argument allow to specify whether the resulting matrix should be symetric (@command{symmetrical=@{boolean@}}):
- if so, the absolute difference will be used. Alternatively, the distance will be signed and d[i,j] = - d[j,i].
+Allow to compute a distance matrix by taking the difference for, each
+pair of state, of an Index1 value. The Index1 to use is specified
+using the @command{index1=@{Index1 description@}} argument. An
+additional argument allow to specify whether the resulting matrix
+should be symetric (@command{symmetrical=@{boolean@}}): if so, the
+absolute difference will be used. Alternatively, the distance will be
+signed and d[i,j] = - d[j,i].
@item User
-A user defined Index2, from a file in the AAIndex2 syntax. The input file is specified using the @command{file=@{path@}} argument.
-The @command{symmetrical=@{boolean@}} argument can be used to specify whether distances should be signed or not.
+A user defined Index2, from a file in the AAIndex2 syntax. The input
+file is specified using the @command{file=@{path@}} argument. The
+ at command{symmetrical=@{boolean@}} argument can be used to specify
+whether distances should be signed or not.
@end table
@c ------------------------------------------------------------------------------------------------------------------
- at node Model, Distribution, AlphabetIndex, Common
- at section Model specification
+
+ at node Process, Distribution, AlphabetIndex, Common
+ at section Process specification
@menu
-* Declaration:: Numerous declarations of models.
+* Model::
* Non-homogeneity:: Specific declaration of non-homogeneous modelling.
* FrequenciesSet:: Frequencies
* Rates:: Rates across sites
+* Linking::
@end menu
The substitution model specification over the tree is set up in different parts.
@@ -514,17 +584,19 @@ The substitution model specification over the tree is set up in different parts.
@item nonhomogeneous = @{no|one_per_branch|general@}
Set the type of model. The @option{no} option is used for homogeneous
models. The @option{one_per_branch} option is used as a short cut for
-setting branch models (for instance Galtier and Gouy 97 for branch GC content, or PAML branch model), and the
- at option{general} option for the more general case, including PAML clade models.
-In either of the last two cases, the model is potentially non-stationary, that is, possibly not at the equilibrium and hence
-includes the root frequencies as additional parameters. If the
-substitution model is not the same across the tree, then the model is
-also non-homogeneous.
+setting branch models (for instance Galtier and Gouy 97 for branch GC
+content, or PAML branch model), and the @option{general} option for
+the more general case, including PAML clade models. In either of the
+last two cases, the model is potentially non-stationary, that is,
+possibly not at the equilibrium and hence includes the root
+frequencies as additional parameters. If the substitution model is not
+the same across the tree, then the model is also non-homogeneous.
@end table
-In combination with those models, one can also specify a distribution of site-specific rate.
+In combination with those models, one can also specify a distribution
+of site-specific rate.
- at node Declaration, Non-homogeneity, Model, Model
+ at node Model, Non-homogeneity, Process, Process
@subsection Setting up the substitution model
@menu
@@ -535,7 +607,6 @@ In combination with those models, one can also specify a distribution of site-sp
* Multiple:: General multiple site models
* Meta:: Meta models
* Mixture:: Mixture of models
-* Linking:: Linking parameters
@end menu
@table @command
@@ -575,7 +646,7 @@ the frequencies are computed from observed data.
@end table
- at node Nucleotide, Protein, Declaration, Declaration
+ at node Nucleotide, Protein, Model, Model
@subsubsection Nucleotide models
@table @command
@@ -633,7 +704,7 @@ The strand-symmetric model of Lobry 1995, for nucleotides. See the
@end table
- at node Protein, Miscellaneous, Nucleotide, Declaration
+ at node Protein, Miscellaneous, Nucleotide, Model
@subsubsection Protein models
@table @command
@@ -733,7 +804,7 @@ namespace, including for frequencies.
@end table
- at node Miscellaneous, Codon, Protein, Declaration
+ at node Miscellaneous, Codon, Protein, Model
@subsubsection Miscellaneous models
@table @command
@@ -746,23 +817,13 @@ proportion of 1 over 0 in the equilibrium distribution. Default:
@end table
- at node Codon, Multiple, Miscellaneous, Declaration
+ at node Codon, Multiple, Miscellaneous, Model
@subsubsection Codon models
-Standard codon models: the optional @var{genetic_code} argument
-describes the genetic code. If it is not given, the one related with
-the alphabet is used. The several values available are described
-below.
-
- at itemize
- at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1EchinodermMitochondrialGeneticCode.html#_details, EchinodermMitochondrialGeneticCode}
- at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1InvertebrateMitochondrialGeneticCode.html#_details,InvertebrateMitochondrialGeneticCode}
- at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1StandardGeneticCode.html#_details, StandardGeneticCode}
- at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1VertebrateMitochondrialGeneticCode.html#_details, VertebrateMitochondrialGeneticCode}
- at item @uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-seq/html/classbpp_1_1YeastMitochondrialGeneticCode.html#_details, YeastMitochondrialGeneticCode}
- at end itemize
+Standard codon models: the global @var{genetic_code} argument
+describes the genetic code and has to be specified.
-The next codon models also take as argument a @var{frequencies} option
+Codon models also take as argument a @var{frequencies} option
specifying the equilibrium frequencies of the model. Any frequencies
description can be used here, but the syntax also supports options
similar to the ones used in the PAML software:
@@ -1087,7 +1148,7 @@ See the
@end table
- at node Multiple, Meta, Codon, Declaration
+ at node Multiple, Meta, Codon, Model
@subsubsection General multiple site models
@table @command
@@ -1204,7 +1265,7 @@ neighbour-dependency inside dinucleotides YpR (see Bérard and Guéguen
@end table
- at node Meta, Mixture, Multiple, Declaration
+ at node Meta, Mixture, Multiple, Model
@subsubsection Meta models
These substitution models take as argument another substitution model, and add several parameters.
@@ -1235,7 +1296,7 @@ is the insertion rate, while @var{mu} is the deletion rate. See the
@end table
- at node Mixture, Linking, Meta, Declaration
+ at node Mixture, , Meta, Model
@subsubsection Mixture of models
@table @command
@@ -1315,27 +1376,17 @@ model=Mixture(model1=GY94(), model2=YN98(), relrate1=0.1)
has parameters at var{Mixture.relrate1}, @var{Mixture.relproba1},
@var{Mixture.1_GY94.kappa}, @var{Mixture.1_GY94.V},
@var{Mixture.2_YN98.kappa}, @var{Mixture.2_YN98.omega}.
- at end table
See the
@uref{http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/classbpp_1_1MixtureOfSubstitutionModels.html#_details, Bio++ description}.
-
- at node Linking, , Mixture, Declaration
- at subsubsection Linking parameters
-
-It is possible to reduce the parameter space by putting extra constraints on parameters, using for instance
- at example
-model=TN93(kappa1=1.0, kappa2=kappa1, theta=0.5)
- at end example
-In that particular case the resulting model is strictly equivalent to the HKY85 model. This syntax however allows to define a larger set of models.
-
+ at end table
- at node Non-homogeneity, FrequenciesSet, Declaration, Model
+ at node Non-homogeneity, FrequenciesSet, Model, Process
@subsection Setting up non-stationary / non-homogeneous models
You can specify a wide range of non-homogeneous models, by combining different options.
@@ -1381,35 +1432,6 @@ Id ranges can be specified using the @option{begin:end} syntax.
@end table
-You can also make a given model share parameters with another one by writing for instance:
- at example
-model2 = T92(theta=0.39, kappa=model1.T92.kappa)
- at end example
-Please note the syntax, parameters are referred to as [model name].[parameter name] in that case. Only parameter from identical models can be aliased in this manner.
-To link parameters from different models, you have to use the more general option (warning, currently beta feature!)
-
- at table @command
-
- at item nonhomogeneous.alias = @{list of aliases@}
-
-where each alias is described as `param1->param2'. The full name of the parameters have to be used, see for example:
-
- at example
-model1 = T92(theta=0.4, kappa=4)
-model2 = GTR(theta=0.4, a = 1.1, b=0.4, c=0.4, d=0.25, e=0.1)
-nonhomogeneous.alias=GTR.theta1->T92.theta1
- at end example
-
-This option can be used to link parameters of the root frequencies if the model is non-stationary:
- at example
-nonhomogeneous.root_freq=Full(init=balanced)
-nonhomogeneous.alias=Full.theta1->GTR.theta1_1
- at end example
-
-Note that this option is only available with the 'general' nonhomogeneous substitution models and will be ignored if used with "one_per_branch".
-
- at end table
-
Finally, you may find useful the following options:
@table @command
@@ -1532,7 +1554,7 @@ The Frequencies set used can be any of the ones described below
@xref{Frequencies sets}, depending on the alphabet used.
- at node FrequenciesSet, Rates, Non-homogeneity, Model
+ at node FrequenciesSet, Rates, Non-homogeneity, Process
@subsection Frequencies sets
@anchor{Frequencies sets}
@@ -1655,11 +1677,9 @@ alphabetical order of states, and sum to one.
@end table
- at node Rates, , FrequenciesSet, Model
+ at node Rates, Linking, FrequenciesSet, Process
@subsection Rate across site distribution
-From version 0.4.0, BppSuite uses the keyval syntax for specifying the distributions of substitution rate across sites.
-
@table @command
@item rate_distribution = @{rate distribution description@}
@@ -1685,8 +1705,56 @@ with a probability @var{p}.
@end table
+ at node Linking, , Rates, Process
+ at subsection Linking parameters
+
+It is possible to reduce the parameter space by putting extra
+constraints on parameters, using for instance
+
+ at example
+model=TN93(kappa1=1.0, kappa2=kappa1, theta=0.5)
+ at end example
+
+In that particular case the resulting model is strictly equivalent to
+the HKY85 model. This syntax however allows to define a larger set of
+models.
- at node Distribution, Estimation, Model, Common
+As long as their range match, parameters of several objects (models,
+root frequencies, rates, etc) can be linked.
+
+For instance:
+ at example
+model1 = T92(theta=GC.theta, kappa=3)
+model2 = T92(theta=0.39, kappa=T92.kappa_1)
+
+nonhomogeneous.root_freq=GC
+ at end example
+
+In the case of nonhomogeneous modelling, a specific syntax is available:
+
+ at example
+nonhomogeneous.alias = @{list of aliases@}
+ at end example
+
+where each alias is described as `param1->param2'. The full name of the parameters have to be used, see for example:
+
+ at example
+model1 = T92(theta=0.4, kappa=4)
+model2 = GTR(theta=0.4, a = 1.1, b=0.4, c=0.4, d=0.25, e=0.1)
+nonhomogeneous.alias=GTR.theta1->T92.theta1
+ at end example
+
+This option can be used to link parameters of the root frequencies if the model is non-stationary:
+ at example
+model1=GTR(theta1=0.7)
+nonhomogeneous.root_freq=Full(init=balanced)
+nonhomogeneous.alias=Full.theta1->GTR.theta1_1
+ at end example
+
+Note that this option is only available with the 'general' nonhomogeneous substitution models and will be ignored if used with "one_per_branch".
+
+
+ at node Distribution, Estimation, Process, Common
@section Discrete distributions
@anchor{Discrete distributions}
@@ -1866,11 +1934,20 @@ nested models, the syntax is the following:
@command{G01.rdist_Gamma.alpha}, @command{TS98.model_T92.kappa},
@command{RE08.lamba}, @command{RE08.model_G01.model_GTR.a}, etc.
'Ancient' will ignore all parameters in the ancestral frequency set
-(non-homogeneous models), and 'BrLen' will ignore all branch lengths.
+(non-homogeneous models), 'BrLen' will ignore all branch lengths and
+'Model' will ignore all model parameters.
The '*' wildcard can be used, as in @command{*theta*} for all the
parameters whose name has @command{theta} in it.
+ at item optimization.constrain_parameter = @{list<chars=interval>@}
+A list of parameters on which the authorized values are limited to a
+given interval.
+
+ at example
+optimization.constrain_parameter = YN98.omega = [-inf;1.9[, *theta* = [0.1;0.7[, BrLen*=[0.01;inf]
+ at end example
+
@item optimization.tolerance = @{float>0@}
The precision on the log-likelihood to reach.
@@ -1976,11 +2053,11 @@ Available methods include:
@table @command
- at item Input
-Keep initial branch lengths as is.
+ at item Input(midpoint_root_branch=@{boolean@})
+Keep initial branch lengths as is. Additional argument specifies if the root position should be moved to the midpoint position of the branch containing it.
@item Equal(value=@{float>0@})
-Set all branch lengths to the same value, provided as argumet.
+Set all branch lengths to the same value, provided as argumemt.
@item Clock
Coerce to a clock tree.
@@ -2115,16 +2192,48 @@ This is usually the best option, particularly for nucleotide data sets.
The BppSeqGen program uses the common syntax introduced in the previous section for setting the alphabet, loading the sequences (@pxref{Sequences}) and tree (@pxref{Tree}), specifying the model (@pxref{Model}) and writing sequence data (@pxref{WritingSequences}).
- at table @command
+The root sequence can be sampled from the model specification, with
+additional argument:
+
+ at table @command
@item number_of_sites = @{int>0@}
The number of site positions to simulate.
+ at end table
+
+
+Or the root sequence can be built from a file of a sequence:
+
+ at table @command
+
+ at item input.sequence.file=@{path@}
+A sequence is be loaded, from which the simulation will be performed,
+or from which a root sequence can be sampled.
+(@pxref{Sequences}).
+
@item input.infos = @{path@}
A info file like the one output by bppML.
The estimated site-specific rates will then be used to simulate the same number of sites as found in the info file, with the corresponding rates.
+In this case, additional options are possible:
+
+ at table @command
+
+ at item input.infos.rates = @{string@}
+Name of the column on which the rates are described (default: pr).
+
+ at item input.infos.states = @{string@}
+Name of the column on which the states are read (default: none, which
+means a random sequence).
+
+ at item input.site.selection = @{string@}
+used to sample from the given sequence (@pxref{Sequences}).
+
+ at end table
+
@end table
+
@c ------------------------------------------------------------------------------------------------------------------
@@ -2162,7 +2271,8 @@ then the sequence with unknown characters will be used.
Alignment information log file (site specific rates, probabilities, etc).
@item output.nodes.file = @{@{path@}|none@}
-Ancestral nodes information: expected frequencies of ancestral states.
+Ancestral nodes information: expected frequencies (prefix exp) (see Galtier & Gouy
+1998) and a posteriori probabilities of ancestral states (prefix eb).
@item output.nodes.add_extant = @{boolean@}
Tell if leaf nodes should be added to the output file.
@@ -2295,7 +2405,7 @@ Build a consensus tree according to a given threshold.
The Bio++ Phylogenetic Sampler samples sequences from a file according to phylogenetic information.
The goal is to clean a big data set by removing redundant sequences, bringing only few additional information for evolutionary analyses.
-The BppPhySamp programs uses the common options for setting the alphabet, loading the sequences (@pxref{Sequences}) and (@pxref{Tree}) and writing the resulting data set (@pxref{WritingSequences}).
+The BppPhySamp programs uses the common options for setting the alphabet, loading the sequences (@pxref{Sequences}) and (@pxref{Tree}) and writing the resulting data set (@pxref{WritingSequences}, @pxref{WritingTrees}).
@table @command
@@ -2375,16 +2485,10 @@ Convert to the complementary sequence, switching the type of alphabet (DNA<->RNA
@item Switch [[alphabet = DNA or RNA]]
Change the alphabet type (DNA<->RNA).
- at item Translate(code = @{genetic code@}) [[alphabet = DNA or RNA]]
-Convert to proteins. You have to specify a genetic code, see specific options.
- at option{code}: The genetic code to use for the translation, one of
- at itemize
- at item EchinodermMitochondrialGeneticCode
- at item InvertebrateMitochondrialGeneticCode
- at item StandardGeneticCode
- at item VertebrateMitochondrialGeneticCode
- at item YeastMitochondrialGeneticCode
- at end itemize
+ at item Translate [[alphabet = DNA or RNA]]
+Convert to proteins.
+The genetic code used for translation is set via the genetic_code option.
+Genetic code is set once for all sequences.
@item Invert
Invert the sequence 5' <-> 3' or N <-> C
@@ -2400,9 +2504,13 @@ Change (partially) unresolved characters to gaps.
@item RemoveStops
Remove all stop codons in sequences. If sequences are aligned, stop codons will be replaced by gaps.
+The genetic code used for translation is set via the genetic_code option.
+Genetic code is set once for all sequences.
@item RemoveColumnsWithStops
Remove all sites with at least one stop codon.
+The genetic code used for translation is set via the genetic_code option.
+Genetic code is set once for all sequences.
@item GetCDS
Remove the first stop codon and everything after in codon sequences.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bppsuite.git
More information about the debian-med-commit
mailing list