[med-svn] [Git][med-team/bppsuite][upstream] New upstream version 2.4.0
Julien Y. Dutheil
gitlab at salsa.debian.org
Sun Apr 8 10:30:12 BST 2018
Julien Y. Dutheil pushed to branch upstream at Debian Med / bppsuite
Commits:
27c31817 by Julien Y. Dutheil at 2018-04-08T08:31:08+02:00
New upstream version 2.4.0
- - - - -
20 changed files:
- CMakeLists.txt
- ChangeLog
- Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
- Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
- Examples/MaximumLikelihood/Codons/M0/ML.bpp
- Examples/MaximumLikelihood/Codons/M1/ML.bpp
- Examples/MaximumLikelihood/Codons/M2/ML.bpp
- Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
- Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
- Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
- Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
- README.md
- bppSuite/CMakeLists.txt
- bppSuite/bppDist.cpp
- bppSuite/bppPopStats.cpp
- bppSuite/bppSeqGen.cpp
- bppSuite/bppSeqMan.cpp
- bppsuite.spec
- doc/bppsuite.texi
- man/bppseqman.1
Changes:
=====================================
CMakeLists.txt
=====================================
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,8 @@ else ()
# Generate command line args (always add -c to output compressed file to stdout)
if (${COMPRESS_PROGRAM} STREQUAL "gzip")
# -n for no timestamp in files (reproducible builds)
- set (COMPRESS_ARGS -c -n)
+ # -9 for maximum compression (lintian error)
+ set (COMPRESS_ARGS -c -n -9)
else ()
set (COMPRESS_ARGS -c)
endif ()
@@ -74,8 +75,8 @@ if (CMAKE_INSTALL_PREFIX)
endif (CMAKE_INSTALL_PREFIX)
include (GNUInstallDirs)
-find_package (bpp-phyl 11.0.0 REQUIRED)
-find_package (bpp-popgen 7.0.0 REQUIRED)
+find_package (bpp-phyl 12.0.0 REQUIRED)
+find_package (bpp-popgen 8.0.0 REQUIRED)
# Subdirectories
add_subdirectory (bppSuite)
@@ -87,10 +88,10 @@ ENDIF(NO_DEP_CHECK)
# Packager
SET(CPACK_PACKAGE_NAME "bppsuite")
SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "2.3.1")
+SET(CPACK_PACKAGE_VERSION "2.4.0")
SET(CPACK_PACKAGE_VERSION_MAJOR "2")
-SET(CPACK_PACKAGE_VERSION_MINOR "3")
-SET(CPACK_PACKAGE_VERSION_PATCH "1")
+SET(CPACK_PACKAGE_VERSION_MINOR "4")
+SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Program Suite")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS.txt")
=====================================
ChangeLog
=====================================
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+19/03/18 -*- Version 2.4.0 -*-
+
+19/02/18 Julien Dutheil
+* Bug fixed in bppPopStats: dN and dS were inverted!
+
+10/12/17 -*- Version 2.3.2 -*-
+
06/11/17 Julien Dutheil
* Added estimation of kappa to bppPopStats + bugs fixed
=====================================
Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
@@ -95,7 +95,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/CladeModel/ML.bpp
@@ -102,7 +102,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Codons/M0/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Codons/M0/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
@@ -87,7 +87,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Codons/M1/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Codons/M1/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M1/ML.bpp
@@ -85,7 +85,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter = YNGP_M1.*Full*
+optimization.ignore_parameters = YNGP_M1.*Full*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Codons/M2/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Codons/M2/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M2/ML.bpp
@@ -85,7 +85,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter = YNGP_M1.*Full*
+optimization.ignore_parameters = YNGP_M1.*Full*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
@@ -86,7 +86,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
=====================================
--- a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
@@ -100,7 +100,7 @@ optimization.topology.tolerance.before=100
optimization.topology.tolerance.during=100
optimization.scale_first=no
optimization.verbose=3
-optimization.ignore_parameter=
+optimization.ignore_parameters=
# Should we write the resulting tree? none or file name.
output.tree.file = $(DATA).ML.dnd
=====================================
Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
=====================================
--- a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
@@ -102,7 +102,7 @@ likelihood.recursion_simple.compression = recursive
optimization=FullD(derivatives=Newton)
optimization.reparametrization=no
optimization.verbose = 1
-optimization.ignore_parameter =
+optimization.ignore_parameters =
optimization.max_number_f_eval = 10000
optimization.tolerance = 0.000001
optimization.message_handler = $(DATA).messages
=====================================
Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
=====================================
--- a/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
@@ -85,7 +85,7 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
=====================================
README.md
=====================================
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ BppSuite is a suite of ready-to-use programs for phylogenetic and sequence analy
### Standalone executables
-Standalone executables are available for [linux64](http://biopp.univ-montp2.fr/repos/exe/lin64/).
+Standalone executables are available for [linux64](https://github.com/BioPP/bppsuite/releases/tag/v2.3.2)
[//]: [win32](http://biopp.univ-montp2.fr/repos/exe/win32/), [win64](http://biopp.univ-montp2.fr/repos/exe/win64/) and [Mac](http://biopp.univ-montp2.fr/repos/exe/mac/)
@@ -78,5 +78,5 @@ Many examples are available in the subdirectory of <tt>Examples</tt>.
### Documentation
-Documentation can be found at http://github.com/bppsuite/releases in pdf or html.
+Documentation can be found at https://github.com/BioPP/bppsuite/releases/tag/v2.3.2 in pdf or html.
\ No newline at end of file
=====================================
bppSuite/CMakeLists.txt
=====================================
--- a/bppSuite/CMakeLists.txt
+++ b/bppSuite/CMakeLists.txt
@@ -41,6 +41,7 @@ foreach (target ${bppsuite-targets})
set_target_properties (${target} PROPERTIES LINK_SEARCH_END_STATIC TRUE)
else (BUILD_STATIC)
target_link_libraries (${target} ${BPP_LIBS_SHARED})
+ set_target_properties (${target} PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
endif (BUILD_STATIC)
endforeach (target)
=====================================
bppSuite/bppDist.cpp
=====================================
--- a/bppSuite/bppDist.cpp
+++ b/bppSuite/bppDist.cpp
@@ -182,14 +182,14 @@ int main(int args, char ** argv)
string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false);
OutputStream* messenger =
(mhPath == "none") ? 0 :
- (mhPath == "std") ? ApplicationTools::message :
+ (mhPath == "std") ? ApplicationTools::message.get() :
new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
ApplicationTools::displayResult("Message handler", mhPath);
string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false);
OutputStream* profiler =
(prPath == "none") ? 0 :
- (prPath == "std") ? ApplicationTools::message :
+ (prPath == "std") ? ApplicationTools::message.get() :
new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
if(profiler) profiler->setPrecision(20);
ApplicationTools::displayResult("Profiler", prPath);
@@ -232,11 +232,11 @@ int main(int args, char ** argv)
//Here it is:
ofstream warn("warnings", ios::out);
- ApplicationTools::warning = new StlOutputStreamWrapper(&warn);
+ shared_ptr<OutputStream> wout = ApplicationTools::warning;
+ ApplicationTools::warning.reset(new StlOutputStreamWrapper(&warn));
tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
warn.close();
- delete ApplicationTools::warning;
- ApplicationTools::warning = ApplicationTools::message;
+ ApplicationTools::warning = wout;
string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false);
if (matrixPath != "none")
=====================================
bppSuite/bppPopStats.cpp
=====================================
--- a/bppSuite/bppPopStats.cpp
+++ b/bppSuite/bppPopStats.cpp
@@ -42,6 +42,7 @@
#include <fstream>
#include <iomanip>
#include <memory>
+#include <cmath>
using namespace std;
@@ -152,6 +153,7 @@ int main(int args, char** argv)
if (ApplicationTools::parameterExists("input.sequence.outgroup.name", bpppopstats.getParams())) {
vector<string> outgroups = ApplicationTools::getVectorParameter<string>("input.sequence.outgroup.name", bpppopstats.getParams(), ',', "");
for (auto g : outgroups) {
+ ApplicationTools::displayResult("Sequence from outgroup", g);
psc->setAsOutgroupMember(g);
}
}
@@ -270,7 +272,7 @@ int main(int args, char** argv)
// Create a likelihood object:
treeLik = new DRHomogeneousTreeLikelihood(*tree, *aln, model.get(), rDist.get());
treeLik->initialize();
- if (isinf(treeLik->getValue()))
+ if (std::isinf(treeLik->getValue()))
throw Exception("Error: null likelihood. Possible cause: stop codon or numerical underflow (too many sequences).");
// Optimize parameters:
treeLik = dynamic_cast<DRTreeLikelihood*>(PhylogeneticsApplicationTools::optimizeParameters(treeLik, treeLik->getParameters(), bpppopstats.getParams(), "", true, true, 2));
@@ -431,8 +433,8 @@ int main(int args, char** argv)
}
double piS = SequenceStatistics::piSynonymous(*pscIn, *gCode);
double piN = SequenceStatistics::piNonSynonymous(*pscIn, *gCode);
- double nbS = SequenceStatistics::meanNumberOfSynonymousSites(*pscIn, *gCode);
- double nbN = SequenceStatistics::meanNumberOfNonSynonymousSites(*pscIn, *gCode);
+ double nbS = SequenceStatistics::meanNumberOfSynonymousSites(*pscIn, *gCode, kappa);
+ double nbN = SequenceStatistics::meanNumberOfNonSynonymousSites(*pscIn, *gCode, kappa);
double r = (piN / nbN) / (piS / nbS);
ApplicationTools::displayResult("PiN:", piN);
ApplicationTools::displayResult("PiS:", piS);
@@ -597,27 +599,22 @@ int main(int args, char** argv)
if (codonAlphabet->isUnresolved(outgroupState) || codonAlphabet->isGap(outgroupState)) {
out << "\tNA\tNA\tNA";
} else {
- if (estimateAncestor) {
- //This is the same value as for polymorphism, we add it for having consistent output format
- out << "\t" << CodonSiteTools::numberOfSynonymousPositions(ancestralSequence->getValue(i), *gCode, kappa);
- } else {
- //Also average over outgroup (Note: minState and maxState are identical in this case)
- out << "\t" << (CodonSiteTools::numberOfSynonymousPositions(outgroupState, *gCode, kappa) +
- CodonSiteTools::numberOfSynonymousPositions(minState, *gCode, kappa)) / 2.;
- }
+ //Average over outgroup (Note: minState and maxState are identical in this case)
+ out << "\t" << (CodonSiteTools::numberOfSynonymousPositions(outgroupState, *gCode, kappa) +
+ CodonSiteTools::numberOfSynonymousPositions(minState, *gCode, kappa)) / 2.;
if (nbAlleles == 1) {
//Compare with outgroup:
if (site[0] == outgroupState) {
- out << "\t0\t0" << endl;
+ out << "\t0\t0";
} else {
//This is a real substitution:
- int nt = (int)CodonSiteTools::numberOfDifferences(outgroupState, minState, *codonAlphabet);
+ double nt = static_cast<double>(CodonSiteTools::numberOfDifferences(outgroupState, minState, *codonAlphabet));
double ns = CodonSiteTools::numberOfSynonymousDifferences(outgroupState, minState, *gCode);
- out << "\t" << ns << "\t" << (nt - ns);
+ out << "\t" << (nt - ns) << "\t" << ns;
}
} else {
//Site is polymorphic, this is not a substitution
- out << "\t0\t0" << endl;
+ out << "\t0\t0";
}
}
}
=====================================
bppSuite/bppSeqGen.cpp
=====================================
--- a/bppSuite/bppSeqGen.cpp
+++ b/bppSuite/bppSeqGen.cpp
@@ -79,7 +79,7 @@ using namespace bpp;
/**
* @brief Read trees from an input file, with segment annotations. Hudson's MS format.
*/
-void readTreesMs(ifstream& file, vector<Tree*>& trees, vector<double>& pos, unsigned int totPos) throw (Exception)
+void readTreesMs(ifstream& file, vector<Tree*>& trees, vector<double>& pos, unsigned int totPos)
{
string line = "";
unsigned int segsize;
@@ -131,7 +131,7 @@ void readTreesMs(ifstream& file, vector<Tree*>& trees, vector<double>& pos, unsi
/**
* @brief Read trees from an input file, with segment annotations. Mailund's CoaSim format.
*/
-void readTreesCoaSim(ifstream& file, vector<Tree*>& trees, vector<double>& pos) throw (Exception)
+void readTreesCoaSim(ifstream& file, vector<Tree*>& trees, vector<double>& pos)
{
string line = "";
double begin, end;
=====================================
bppSuite/bppSeqMan.cpp
=====================================
--- a/bppSuite/bppSeqMan.cpp
+++ b/bppSuite/bppSeqMan.cpp
@@ -298,6 +298,8 @@ int main(int args, char** argv)
// +--------------+
else if (cmdName == "RemoveStops")
{
+ if (!codonAlphabet)
+ throw Exception("RemoveStops: requires a codon alphabet.");
if (!gCode.get()) {
string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
ApplicationTools::displayResult("Genetic Code", codeDesc);
@@ -338,6 +340,8 @@ int main(int args, char** argv)
{
throw Exception("'RemoveColumnsWithStops' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
}
+ if (!codonAlphabet)
+ throw Exception("RemoveColumnsWithStops: requires a codon alphabet.");
if (!gCode.get()) {
string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
ApplicationTools::displayResult("Genetic Code", codeDesc);
@@ -356,6 +360,8 @@ int main(int args, char** argv)
// +---------+
else if (cmdName == "GetCDS")
{
+ if (!codonAlphabet)
+ throw Exception("GetCDS: requires a codon alphabet.");
if (!gCode.get()) {
string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppseqman.getParams(), "Standard", "", true, 1);
ApplicationTools::displayResult("Genetic Code", codeDesc);
=====================================
bppsuite.spec
=====================================
--- a/bppsuite.spec
+++ b/bppsuite.spec
@@ -1,36 +1,34 @@
-%define _basename bppsuite
-%define _version 2.3.2
-%define _release 1
%define _prefix /usr
URL: https://github.com/BioPP
-Name: %{_basename}
-Version: %{_version}
-Release: %{_release}
+Name: bppsuite
+Version: 2.4.0
+Release: 1%{?dist}
License: CECILL-2.0
Vendor: The Bio++ Project
-Source: %{_basename}-%{_version}.tar.gz
+Source: %{name}-%{version}.tar.gz
Summary: The Bio++ Program Suite
Group: Productivity/Scientific/Other
-Requires: libbpp-phyl11 = %{_version}
-Requires: libbpp-seq11 = %{_version}
-Requires: libbpp-core3 = %{_version}
+Requires: libbpp-popgen8 = %{version}
+Requires: libbpp-phyl12 = %{version}
+Requires: libbpp-seq12 = %{version}
+Requires: libbpp-core4 = %{version}
-BuildRoot: %{_builddir}/%{_basename}-root
+BuildRoot: %{_builddir}/%{name}-root
BuildRequires: cmake >= 2.8.11
BuildRequires: gcc-c++ >= 4.7.0
BuildRequires: groff
BuildRequires: texinfo >= 4.0.0
-BuildRequires: libbpp-core3 = %{_version}
-BuildRequires: libbpp-core-devel = %{_version}
-BuildRequires: libbpp-seq11 = %{_version}
-BuildRequires: libbpp-seq-devel = %{_version}
-BuildRequires: libbpp-phyl11 = %{_version}
-BuildRequires: libbpp-phyl-devel = %{_version}
-BuildRequires: libbpp-popgen7 = %{_version}
-BuildRequires: libbpp-popgen-devel = %{_version}
+BuildRequires: libbpp-core4 = %{version}
+BuildRequires: libbpp-core-devel = %{version}
+BuildRequires: libbpp-seq12 = %{version}
+BuildRequires: libbpp-seq-devel = %{version}
+BuildRequires: libbpp-phyl12 = %{version}
+BuildRequires: libbpp-phyl-devel = %{version}
+BuildRequires: libbpp-popgen8 = %{version}
+BuildRequires: libbpp-popgen-devel = %{version}
AutoReq: yes
@@ -90,12 +88,13 @@ rm -rf $RPM_BUILD_ROOT
%{_prefix}/share/man/man1/*.1*
%changelog
+* Mon Mar 12 2018 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.4.0-1
* Tue Jun 06 2017 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.3.1-1
* Wed May 10 2017 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.3.0-1
- New BppPopStats program
- BppPhySamp is now distributed separately
- Several bugs fixed and improvements
-* Mon Sep 28 2014 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.2.0-1
+* Mon Sep 29 2014 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.2.0-1
- Compatibility update. Bio++ Program Suite version number is now indexed
on Bio++'s version.
- Programs support the --seed argument for setting the random seed.
@@ -112,5 +111,5 @@ rm -rf $RPM_BUILD_ROOT
* Thu Mar 25 2010 Julien Dutheil <julien.dutheil at univ-montp2.fr> 0.5.0-1
* Wed Jun 10 2009 Julien Dutheil <jdutheil at birc.au.dk> 0.4.0-1
* Thu Dec 11 2008 Julien Dutheil <jdutheil at birc.au.dk> 0.3.1-1
-* Thu Sep 23 2008 Julien Dutheil <jdutheil at birc.au.dk> 0.3.0-1
+* Tue Sep 23 2008 Julien Dutheil <jdutheil at birc.au.dk> 0.3.0-1
- Initial spec file.
=====================================
doc/bppsuite.texi
=====================================
--- a/doc/bppsuite.texi
+++ b/doc/bppsuite.texi
@@ -1,7 +1,7 @@
\input texinfo @c -*-texinfo-*-
@c %**start of header
@setfilename bppsuite.info
- at settitle BppSuite Manual 2.3.2
+ at settitle BppSuite Manual 2.4.0
@documentencoding UTF-8
@afourpaper
@dircategory Science Biology Genetics
@@ -21,15 +21,15 @@
@c %**end of header
@copying
-This is the manual of the Bio++ Program Suite, version 2.3.2.
+This is the manual of the Bio++ Program Suite, version 2.4.0.
-Copyright @copyright{} 2007-2017 Bio++ development team
+Copyright @copyright{} 2007-2018 Bio++ development team
@end copying
@titlepage
@title BppSuite Manual
@author Julien Dutheil, Laurent Guéguen
- at author @email{julien.dutheil@@univ-montp2.fr}
+ at author @email{dutheil@@evolbio.mpg.de}
@c The following two commands start the copyright page.
@page
@@ -97,8 +97,8 @@ Bio++ Program Suite Reference
* bppconsense:: Bio++ Consensus Trees.
* bppreroot:: Bio++ Serial Tree Re-rooting.
* bppseqman:: Bio++ Sequences Manipulation.
-* bppalnscore:: Bio++ Alignment Scoring
-* bpppopstats::
+* bppalnscore:: Bio++ Alignment Scoring.
+* bpppopstats:: Bio++ Population Genetics Statistics.
* bpptreedraw:: Bio++ Tree Drawing.
@end detailmenu
@@ -1000,30 +1000,38 @@ Arguments @var{relrate@{i@}} stands for the relative substitution rates
of the sites. Default: @var{relrate@{i@}=1/@{4-i@}}, such that the rate
of each site is 1/3.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
model=CodonRate(model=T92)
@end example
+ at end cartouche
+
builds a model on codons, such all sites follow the same T92 model.
The parameters names are @var{CodonRate.123_T92.kappa},
@var{CodonRate.relrate1}, @var{CodonRate.relrate2}.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
model=CodonRate(model1=T92, model2=T92, model3=JC69)
@end example
+ at end cartouche
+
builds a model on codons, such that first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
parameters names are @var{CodonRate.1_T92.kappa},
@var{CodonRate.2_T92.kappa}, @var{CodonRate.relrate1},
@var{CodonRate.relrate2}, and can be initialized as is:
+ at cartouche
@example
model=CodonRate(model1=T92(theta=0.5, kappa=2), \
model2=T92(theta=0.4, kappa=2), model3=JC69)
@end example
+ at end cartouche
See the
@uref{http://bioweb.me/bpp-phyl-doc/classbpp_1_1CodonRateSubstitutionModel.html#details, Bio++ description, Bio++ description}.
@@ -1037,19 +1045,24 @@ between synonymous and non-synonymous substitutions.
Optional argument @var{beta} is the ratio between non-synonymous
substitution rate and synonymous substitution rate. Default value: 1.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
-
model=CodonDist(model=T92)
@end example
+ at end cartouche
+
builds a model on codons, such all sites follow the same T92 model.
The parameters names are @var{CodonDist.123_T92.kappa} and
@var{CodonDist.beta}.
+ at cartouche
@example
alphabet=Codon(letter=DNA, type=Standard)
model=CodonDist(model1=T92, model2=T92, model3=JC69)
@end example
+ at end cartouche
+
builds a model on codons, such that first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
parameters names are @var{CodonDist.1_T92.kappa},
@@ -1072,11 +1085,14 @@ Optional argument @var{beta} is the ratio between average substitution
rate between amino-acids and synonymous substitution rate. Default
value: 1.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
model=CodonProt(model=T92, protmodel=LG08)
@end example
+ at end cartouche
+
builds a model on codons, such all sites follow the same T92 model,
and amino-acid rates are proportional to LG08 substition matrice.
The parameters names are @var{CodonProt.123_T92.kappa} and
@@ -1098,11 +1114,14 @@ in the given frequencies set. This factor is described by the
below.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
model=CodonDistFreq(frequencies=Full())
@end example
+ at end cartouche
+
has parameters @var{CodonDistFreq.012_T92.kappa},
@var{CodonDistFreq.Full.theta_1}, ...,
@var{CodonDistFreq.Full.theta_60},
@@ -1195,19 +1214,25 @@ Arguments @var{relrate@{i@}} stands for the relative substitution rates
of the sites. Default: @var{relrate@{i@}=1/@{n-i+1@}}, such that the rate
of each site is 1/n.
+ at cartouche
@example
alphabet=Word(letter=DNA,length=4)
model=Word(model=T92())
@end example
+ at end cartouche
+
builds a model on 4 bases words, such all sites follow the same T92
model. The parameters names are @var{Word.1234_T92.kappa},
@var{Word.relrate1}, @var{Word.relrate2}, @var{Word.relrate3}.
+ at cartouche
@example
alphabet=Word(letter=DNA,length=4)
model=Word(model1=T92(), model2=T92(), model3=JC69(), \
model4=HKY85())
@end example
+ at end cartouche
+
builds a model on 4 bases words, such first and second sites follow
independent T92 models, third site follows a JC69 model, and fourth
site follows a HKY85 model. Then the parameters names are
@@ -1253,25 +1278,33 @@ allowed).
As examples, on a DNA word with 3 positions:
+ at cartouche
@example
model=Kron(model=K80(), positions=1*2*3)
@end example
+ at end cartouche
allows only substitutions that change the 3 positions.
+ at cartouche
@example
model=Kron(model=K80(), positions=1*2+3)
@end example
+ at end cartouche
allows only substitutions that change the positions 1 and 2, and the
ones that change position 3 alone.
+ at cartouche
@example
model=Kron(model=K80(), positions=1*2+2*3)
@end example
+ at end cartouche
allows only substitutions that change two neighbor positions.
+ at cartouche
@example
model=Kron(model=K80(), positions=1+2+3)
@end example
+ at end cartouche
allows only substitutions that change one position, i.e. @var{Word}
model.
@@ -1304,19 +1337,23 @@ Arguments @var{relrate@{i@}} stands for the relative substitution rates
of the sites. Default: @var{relrate@{i@}=1/@{4-i@}}, such that the rate
of each site is 1/3.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
model=Triplet(model=T92)
@end example
+ at end cartouche
builds a model on codons, such all sites follow the same T92 model.
The parameters names are @var{Triplet.123_T92.kappa},
@var{Triplet.relrate1}, @var{Triplet.relrate2}.
+ at cartouche
@example
alphabet=Word(letter=DNA, length=3)
model=Triplet(model1=T92, model2=T92, model3=JC69)
@end example
+ at end cartouche
builds a model on 3 bases words, such first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
parameters names are @var{Triplet.1_T92.kappa},
@@ -1427,11 +1464,13 @@ parameters distributions is described below. In case the range of a
parameter is limited, the domain of the corresponding distribution is
truncated accordingly.
+ at cartouche
@example
model=MixedModel(model=TN93(kappa1=Gamma(n=4,alpha=3,beta=1),\
kappa2=Exponential(lambda=2),\
theta=0.5,theta1=0.2,theta2=0.1))
@end example
+ at end cartouche
has parameters @var{TN93.kappa1_Gamma.alpha},
@var{TN93.kappa1_Gamma.beta},
@var{TN93.kappa2_Exponential.lamba},
@@ -1451,9 +1490,11 @@ order the models are given). Default: @var{relproba@{i@}=1/@{n-i+1@}},
such that the probabilty of each site is 1/n, and
@var{relrate@{i@}=1/@{n-i+1@}} such that the rate of each site is 1.
+ at cartouche
@example
model=Mixture(model1=GY94(), model2=YN98(), relrate1=0.1)
@end example
+ at end cartouche
has parameters at var{Mixture.relrate1}, @var{Mixture.relproba1},
@var{Mixture.1_GY94.kappa}, @var{Mixture.1_GY94.V},
@var{Mixture.2_YN98.kappa}, @var{Mixture.2_YN98.omega}.
@@ -1520,9 +1561,11 @@ Set the number of distinct models to use.
You now have to configure each model individually, using the syntax introduced for the homogeneous case, excepted that model will be numbered, for instance:
+ at cartouche
@example
model1 = T92(theta=0.39, kappa=2.79)
@end example
+ at end cartouche
The additional option is available to attach the model to branches in the tree, specified by the id of the upper node in the tree:
@@ -1552,18 +1595,22 @@ file.
To define constraints for sites between submodels, we can set "paths"
that any site must follow. For example, in the following description:
+ at cartouche
@example
nonhomogeneous = general
nonhomogeneous.number_of_models = 3
model1=T92()
-model2=MixedModel(model=T92(kappa=Simple(values=(4,10,20),probas=(0.1,0.5,0.4))))
-model3=MixedModel(model=TN93(theta1=Simple(values=(0.1,0.5,0.9),probas=(0.3,0.2,0.5))))
+model2=MixedModel(model=T92(kappa=Simple(values=(4,10,20),\
+ probas=(0.1,0.5,0.4))))
+model3=MixedModel(model=TN93(theta1=Simple(values=(0.1,0.5,0.9),\
+ probas=(0.3,0.2,0.5))))
model1.nodes_id=0:1
model2.nodes_id=2:3
model3.nodes_id=4:5
@end example
+ at end cartouche
In this case, on branches 2 & 3 a site follows any submodel of model 2
(but the same submodel on both branches), and on branches 4 & 5, a
@@ -1579,20 +1626,25 @@ are free (in this case it means that @var{T92.kappa=20} in model 2 is
linked with @var{TN93.theta1=0.5} in model 3), then we can use the
declarations:
+ at cartouche
@example
site.number_of_paths=2
site.path1=model2[T92.kappa_1] & model3[TN93.theta1_2]
site.path2=model2[T92.kappa_2] & model3[TN93.theta1_3]
@end example
+ at end cartouche
The third path (for the remaining submodels) is automatically
computed.
It is possible to link mixtures of submodels. For example,
+ at cartouche
@example
-site.path1=model2[T92.kappa_1] & model3[TN93.theta1_2] & model3[TN93.theta1_3]
+site.path1=model2[T92.kappa_1] & model3[TN93.theta1_2]\
+ & model3[TN93.theta1_3]
@end example
+ at end cartouche
means that a site that has @var{T92.kappa=4} in model2 has either
@var{TN93.theta1=0.5} or @var{TN93.theta1=0.9} in model3.
@@ -1618,6 +1670,7 @@ identifiability of several parameters (here the probabilities in model
Another example in the case of mixtures of mixed models, where the
submodels are defined by their names;
+ at cartouche
@example
nonhomogeneous = general
nonhomogeneous.number_of_models = 2
@@ -1627,8 +1680,10 @@ model2=LLG08_UL3()
site.number_of_paths=2
site.path1=model1[LLG08_UL2.M2] & model2[LLG08_UL3.Q1]
-site.path2=model1[LLG08_UL2.M1] & model2[LLG08_UL3.Q2] & model2[LLG08_UL3.Q3]
+site.path2=model1[LLG08_UL2.M1] & model2[LLG08_UL3.Q2] \
+ & model2[LLG08_UL3.Q3]
@end example
+ at end cartouche
When nonhomogeneity option is @option{one_per_branch}, each site is
constrained to follow the same submodel from leaves to root.
@@ -1696,19 +1751,23 @@ length of the words in the alphabet must be @var{n}, and all single
site frequency sets are independent. In that case, all single site
frequency set parameters are position dependent.
+ at cartouche
@example
alphabet=Word(letter=DNA,length=4)
nonhomogeneous.root_freq=Word(frequency=GC())
@end example
+ at end cartouche
builds a root frequency set on 4 bases words, such that all sites
frequencies follow the same GC frequency set model. The parameter
name is @var{1234_GC.theta}.
+ at cartouche
@example
alphabet=Word(letter=DNA,length=4)
nonhomogeneous.root_freq=Word(frequency1=GC(),frequency2=GC(),\
frequency3=Fixed(),frequency4=Full())
@end example
+ at end cartouche
builds a root frequency set on 4 bases words, such first and second sites
follow independent GC frequency sets, third site follows a Fixed
frequency set, and fourth site follows a Full frequency set. Then the
@@ -1736,20 +1795,25 @@ If the arguments are @var{frequency1}, @var{frequency2},
that case, all single site frequency set parameters are position
dependent.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
nonhomogeneous.root_freq=Codon(frequency=GC())
@end example
+ at end cartouche
builds a frequency set on codons, such that all sites frequencies
follow the same GC frequency set model. The parameter name is
@var{123_GC.theta}.
+ at cartouche
@example
alphabet=Codon(letter=DNA)
genetic_code=Standard
-nonhomogeneous.root_freq=Codon(frequency1=GC(),frequency2=GC(),frequency3=Fixed())
+nonhomogeneous.root_freq=Codon(frequency1=GC(),frequency2=GC(),\
+ frequency3=Fixed())
@end example
+ at end cartouche
builds a frequency set on codons, such that first and second sites
follow independent GC frequency sets, third site follows a Fixed
@@ -1815,9 +1879,11 @@ with a probability @var{p}.
It is possible to reduce the parameter space by putting extra
constraints on parameters, using for instance
+ at cartouche
@example
model=TN93(kappa1=1.0, kappa2=kappa1, theta=0.5)
@end example
+ at end cartouche
In that particular case the resulting model is strictly equivalent to
the HKY85 model. This syntax however allows to define a larger set of
@@ -1827,33 +1893,41 @@ As long as their range match, parameters of several objects (models,
root frequencies, rates, etc) can be linked.
For instance:
+ at cartouche
@example
model1 = T92(theta=GC.theta, kappa=3)
model2 = T92(theta=0.39, kappa=T92.kappa_1)
nonhomogeneous.root_freq=GC
@end example
+ at end cartouche
In the case of nonhomogeneous modelling, a specific syntax is available:
+ at cartouche
@example
nonhomogeneous.alias = @{list of aliases@}
@end example
+ at end cartouche
where each alias is described as `param1->param2'. The full name of the parameters have to be used, see for example:
+ at cartouche
@example
model1 = T92(theta=0.4, kappa=4)
model2 = GTR(theta=0.4, a = 1.1, b=0.4, c=0.4, d=0.25, e=0.1)
nonhomogeneous.alias=GTR.theta1->T92.theta1
@end example
+ at end cartouche
This option can be used to link parameters of the root frequencies if the model is non-stationary:
+ at cartouche
@example
model1=GTR(theta1=0.7)
nonhomogeneous.root_freq=Full(init=balanced)
nonhomogeneous.alias=Full.theta1->GTR.theta1_1
@end example
+ at end cartouche
Note that this option is only available with the 'general' nonhomogeneous substitution models and will be ignored if used with "one_per_branch".
@@ -1920,9 +1994,11 @@ a Mixture of a given discrete distributution and a 0 Dirac. @var{p} is
the probability of this 0 Dirac.
For example :
+ at cartouche
@example
Invariant(dist=Gaussian(n=4,2,0.5),p=0.1)
@end example
+ at end cartouche
builds a mixture of a gaussian distribution with 4 categories (and
probability 0.9) and a 0 Dirac with probability 0.1. Overall, there
are 5 categories. The parameters names are
@@ -1939,10 +2015,12 @@ distributions prefixed by @var{Mixture.i_} where @var{i} is the order
of the distribution.
For example:
+ at cartouche
@example
Mixture(probas=(0.3,0.7),dist1=Beta(n=5,alpha=2,beta=3),\
dist2=Gamma(n=10,alpha=9,beta=2))
@end example
+ at end cartouche
builds a mixture of a discrete beta distribution and of a discrete
gamma distribution, with a total of 15 classes. The parameters names
are @var{Mixture.theta1}, @var{Mixture.1_Beta.alpha},
@@ -2048,9 +2126,12 @@ parameters whose name has @command{theta} in it.
A list of parameters on which the authorized values are limited to a
given interval.
+ at cartouche
@example
-optimization.constrain_parameter = YN98.omega = [-inf;1.9[, *theta* = [0.1;0.7[, BrLen*=[0.01;inf]
- at end example
+optimization.constrain_parameter = YN98.omega = [-inf;1.9[,\
+ *theta* = [0.1;0.7[, BrLen*=[0.01;inf]
+ at end example
+ at end cartouche
@item optimization.tolerance = @{float>0@}
The precision on the log-likelihood to reach.
@@ -2128,8 +2209,8 @@ This section now details the specific options for each program in the Bio++ Prog
* bppconsense:: Bio++ Consensus Trees.
* bppreroot:: Bio++ Serial Tree Re-rooting.
* bppseqman:: Bio++ Sequences Manipulation.
-* bppalnscore:: Bio++ Alignment Scoring
-* bpppopstats::
+* bppalnscore:: Bio++ Alignment Scoring.
+* bpppopstats:: Bio++ Population Genetics Statistics.
* bpptreedraw:: Bio++ Tree Drawing.
@end menu
@@ -2640,29 +2721,39 @@ Examples of use:
@itemize @bullet
@item Just change file format:
+ at cartouche
@example
sequence.manip=
@end example
+ at end cartouche
@item Change DNA to RNA:
+ at cartouche
@example
sequence.manip=Switch
@end example
+ at end cartouche
@item Unalign sequences, perform transcription and translate to proteins:
+ at cartouche
@example
sequence.manip=RemoveGaps,Transcript,Translate
@end example
+ at end cartouche
@item Change all unresolved characters to gaps and keep only positions with less than 5 gaps:
+ at cartouche
@example
sequence.manip=UnknownToGap,KeepComplete(maxGapAllowed=5)
@end example
+ at end cartouche
@item Keep only positions with less than 30% of gaps, and change them to unresolved characters:
+ at cartouche
@example
sequence.manip=KeepComplete(maxGapAllowed=30%),GapToUnknown
@end example
+ at end cartouche
@end itemize
@@ -2767,6 +2858,19 @@ This option is only recognized if @command{input.sequence.file.ingroup} was not
@item input.sequence.stop_codons_policy = Keep|RemoveIfLast|RemoveAll
Tells what to do with positions containing at least one stop codon: keep them, remove them only if they are at the end of the alignment, or remove them all.
+ at item estimate.kappa = @{[boolean]@}
+Tells if the ratio of transitions / transversion should be estimated from the data and used for further analyses. If yes, kappa will be estimated by maximum likelihood using a model of (codon) sequence evolution.
+
+ at item estimate.ancestor = @{[boolean]@}
+If an outgroup sequence is present, it will be used to estimate the ancestral allele for each polymorphic position.
+A model of (codon) sequence evolution will be used with a marginal ancestral state reconstruction method.
+
+ at item estimate.sample_ingroup = @{[bollean]@}
+Tell if a random subset of ingroup sequences should be used to fit model (speeds up calculations in case of large data sets).
+
+ at item estimate.sample_ingroup.size = @{[integer]@}
+Number of ingroup sequences to sample.
+
@item pop.stats = @{[string]@}
The list of statistics to compute. The next section describes all available statistics.
@@ -2802,6 +2906,10 @@ then the total number of mutations is used in the calculation, instead of the nu
For codon sequences only, obviously. Compute nucleotide diversity at synonymous and non-synonymous site,
the number of synonymous and non-synonymous sites, as well as the weighted ratio (PiN / NbN) / (PiS / NbS).
+ at item dN_dS
+For codon sequences only. Build the consensus sequence of both ingroup and outgroup alignments and fit a Yang and Nielsen model of codon sequence evolution with a maximum likelihood approach.
+Reports the estimated parameters omega (dN / dS ratio) and kappa (transitions / transversions ratio), as well as the divergence between the two sequences.
+
@item MKT
Compute the MacDonald-Kreitman table, for codon sequences with outgroup.
@@ -2824,7 +2932,9 @@ Generate a table with codon-site specifics statistics, including:
@item State in the first outgroup sequence, if any
- at item Mean number of synonymous positions
+ at item Ancestral state, if computed
+
+ at item Mean number of synonymous positions for polymorphism
@item Whether the site is synonymous polymorphic
@@ -2834,6 +2944,12 @@ Generate a table with codon-site specifics statistics, including:
@item Synonymous diversity (piS)
+ at item Mean number of synonymous positions for divergence
+
+ at item dN, if an outgroup is available
+
+ at item dS, if an outgroup is available
+
@end itemize
The @command{output.file} argument allows to specify the output file (mandatory).
=====================================
man/bppseqman.1
=====================================
--- a/man/bppseqman.1
+++ b/man/bppseqman.1
@@ -14,7 +14,7 @@ All UNIX flavors
.SH DESCRIPTION
-bppseqman performs several in silico molecular biology operations like transcription, translation or traduction. It also allows conversion between various file format, and bioinformatics tasks like removing of gaps, unknow characters or stop codons.
+bppseqman performs several in silico molecular biology operations like transcription, translation or traduction. It also allows conversion between various file formats, and bioinformatics tasks like removing of gaps, unknown characters or stop codons.
.SH OPTIONS
View it on GitLab: https://salsa.debian.org/med-team/bppsuite/commit/27c3181764d3a792f4f4cb0b2ab51519fed5ab07
---
View it on GitLab: https://salsa.debian.org/med-team/bppsuite/commit/27c3181764d3a792f4f4cb0b2ab51519fed5ab07
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/debian-med-commit/attachments/20180408/d994ffce/attachment-0001.html>
More information about the debian-med-commit
mailing list