[med-svn] [fastml2] 01/02: Imported Upstream version 2.0.3
Andreas Tille
tille at debian.org
Mon Jun 22 11:24:38 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository fastml2.
commit baab54398cad06c4f44d3b9b50475011a4fc43c3
Author: Andreas Tille <tille at debian.org>
Date: Mon Jun 22 10:14:06 2015 +0200
Imported Upstream version 2.0.3
---
.gitignore | 5 +
AUTHORS | 2 +
Makefile | 68 ++
Readme.md | 39 +
Vagrantfile | 122 +++
debian/changelog | 19 +
debian/compat | 1 +
debian/control | 13 +
debian/copyright | 9 +
debian/rules | 19 +
libs/phylogeny/AddLog.cpp | 25 +
libs/phylogeny/AddLog.h | 67 ++
libs/phylogeny/C_evalParamUSSRV.cpp | 112 ++
libs/phylogeny/C_evalParamUSSRV.h | 177 +++
libs/phylogeny/ConversionUtils.cpp | 52 +
libs/phylogeny/ConversionUtils.h | 51 +
libs/phylogeny/GLaguer.cpp | 178 +++
libs/phylogeny/GLaguer.h | 30 +
libs/phylogeny/GamMixtureOptimizer.cpp | 156 +++
libs/phylogeny/GamMixtureOptimizer.h | 52 +
libs/phylogeny/HIVb.dat.q | 24 +
libs/phylogeny/HIVw.dat.q | 23 +
libs/phylogeny/LG.dat.q | 23 +
libs/phylogeny/Makefile | 231 ++++
libs/phylogeny/NNiProp.cpp | 139 +++
libs/phylogeny/NNiProp.h | 39 +
libs/phylogeny/NNiSep.cpp | 174 +++
libs/phylogeny/NNiSep.h | 40 +
libs/phylogeny/Nni.cpp | 119 ++
libs/phylogeny/Nni.h | 32 +
libs/phylogeny/Parameters.cpp | 361 ++++++
libs/phylogeny/Parameters.h | 249 +++++
libs/phylogeny/aaJC.cpp | 7 +
libs/phylogeny/aaJC.h | 52 +
libs/phylogeny/adrianCodon.dat.q | 72 ++
libs/phylogeny/allTrees.cpp | 134 +++
libs/phylogeny/allTrees.h | 68 ++
libs/phylogeny/allTreesSeparateModel.cpp | 83 ++
libs/phylogeny/allTreesSeparateModel.h | 76 ++
libs/phylogeny/alphaTrivialAccelerator.h | 56 +
libs/phylogeny/alphabet.cpp | 7 +
libs/phylogeny/alphabet.h | 32 +
libs/phylogeny/amino.cpp | 152 +++
libs/phylogeny/amino.h | 46 +
libs/phylogeny/bblEM.cpp | 156 +++
libs/phylogeny/bblEM.h | 56 +
libs/phylogeny/bblEM2USSRV.cpp | 181 +++
libs/phylogeny/bblEM2USSRV.h | 73 ++
libs/phylogeny/bblEM2codon.cpp | 165 +++
libs/phylogeny/bblEM2codon.h | 54 +
libs/phylogeny/bblEMProportional.h | 50 +
libs/phylogeny/bblEMProprtional.cpp | 142 +++
libs/phylogeny/bblEMSeperate.cpp | 28 +
libs/phylogeny/bblEMSeperate.h | 30 +
libs/phylogeny/bblEMfixRoot.cpp | 175 +++
libs/phylogeny/bblEMfixRoot.h | 60 +
libs/phylogeny/bestAlpha.cpp | 301 +++++
libs/phylogeny/bestAlpha.h | 155 +++
libs/phylogeny/bestAlphaAndK.cpp | 262 +++++
libs/phylogeny/bestAlphaAndK.h | 84 ++
libs/phylogeny/bestAlphaAndNu.cpp | 177 +++
libs/phylogeny/bestAlphaAndNu.h | 215 ++++
libs/phylogeny/bestAlphaManyTrees.cpp | 270 +++++
libs/phylogeny/bestAlphaManyTrees.h | 127 +++
libs/phylogeny/bestGtrModelParams.cpp | 174 +++
libs/phylogeny/bestGtrModelParams.h | 111 ++
libs/phylogeny/bestHKYparam.cpp | 158 +++
libs/phylogeny/bestHKYparam.h | 109 ++
libs/phylogeny/bestParamUSSRV.cpp | 474 ++++++++
libs/phylogeny/bestParamUSSRV.h | 130 +++
libs/phylogeny/bestTamura92param.cpp | 205 ++++
libs/phylogeny/bestTamura92param.h | 137 +++
libs/phylogeny/betaDistribution.cpp | 139 +++
libs/phylogeny/betaDistribution.h | 61 ++
libs/phylogeny/betaDistributionFixedCategories.cpp | 158 +++
libs/phylogeny/betaDistributionFixedCategories.h | 37 +
...DistributionFixedCategoriesWithOmegaUniform.cpp | 52 +
...taDistributionFixedCategoriesWithOmegaUniform.h | 53 +
libs/phylogeny/betaOmegaDistribution.cpp | 61 ++
libs/phylogeny/betaOmegaDistribution.h | 56 +
libs/phylogeny/betaUtilities.cpp | 174 +++
libs/phylogeny/betaUtilities.h | 21 +
libs/phylogeny/bootstrap.cpp | 227 ++++
libs/phylogeny/bootstrap.h | 82 ++
libs/phylogeny/chebyshevAccelerator.cpp | 212 ++++
libs/phylogeny/chebyshevAccelerator.h | 48 +
libs/phylogeny/checkcovFanctors.h | 104 ++
libs/phylogeny/checkcovFanctorsWithFactors.h | 47 +
libs/phylogeny/clustalFormat.cpp | 158 +++
libs/phylogeny/clustalFormat.h | 47 +
libs/phylogeny/cmdline.ggo | 83 ++
libs/phylogeny/cmdline2EvolObjs.cpp | 2 +
libs/phylogeny/cmdline2EvolObjs.h | 578 ++++++++++
libs/phylogeny/codon.cpp | 560 ++++++++++
libs/phylogeny/codon.h | 107 ++
libs/phylogeny/codonJC.cpp | 6 +
libs/phylogeny/codonJC.h | 47 +
libs/phylogeny/codonUtils.cpp | 340 ++++++
libs/phylogeny/codonUtils.h | 36 +
libs/phylogeny/computeCounts.cpp | 142 +++
libs/phylogeny/computeCounts.h | 68 ++
libs/phylogeny/computeDownAlg.cpp | 221 ++++
libs/phylogeny/computeDownAlg.h | 49 +
libs/phylogeny/computeJumps.cpp | 166 +++
libs/phylogeny/computeJumps.h | 103 ++
libs/phylogeny/computeMarginalAlg.cpp | 100 ++
libs/phylogeny/computeMarginalAlg.h | 29 +
libs/phylogeny/computePijComponent.cpp | 109 ++
libs/phylogeny/computePijComponent.h | 54 +
.../computePosteriorExpectationOfSubstitutions.cpp | 202 ++++
.../computePosteriorExpectationOfSubstitutions.h | 60 +
...rExpectationOfSubstitutions_nonReversibleSp.cpp | 91 ++
...iorExpectationOfSubstitutions_nonReversibleSp.h | 22 +
libs/phylogeny/computeSubstitutionCounts.cpp | 378 +++++++
libs/phylogeny/computeSubstitutionCounts.h | 71 ++
libs/phylogeny/computeUpAlg.cpp | 157 +++
libs/phylogeny/computeUpAlg.h | 67 ++
libs/phylogeny/computeUpAlgFactors.cpp | 190 ++++
libs/phylogeny/countTableComponent.cpp | 35 +
libs/phylogeny/countTableComponent.h | 84 ++
libs/phylogeny/cpREV45.dat.q | 22 +
libs/phylogeny/datMatrixHolder.cpp | 32 +
libs/phylogeny/datMatrixHolder.h | 31 +
libs/phylogeny/dayhoff.dat.q | 79 ++
libs/phylogeny/definitions.h | 83 ++
libs/phylogeny/distanceBasedSeqs2Tree.cpp | 554 ++++++++++
libs/phylogeny/distanceBasedSeqs2Tree.h | 195 ++++
libs/phylogeny/distanceMethod.h | 24 +
libs/phylogeny/distanceTable.cpp | 21 +
libs/phylogeny/distanceTable.h | 17 +
libs/phylogeny/distances2Tree.h | 18 +
libs/phylogeny/distribution.cpp | 13 +
libs/phylogeny/distribution.h | 31 +
libs/phylogeny/distributionPlusCategory.cpp | 100 ++
libs/phylogeny/distributionPlusCategory.h | 43 +
libs/phylogeny/distributionPlusInvariant.cpp | 77 ++
libs/phylogeny/distributionPlusInvariant.h | 41 +
libs/phylogeny/doubleRep.cpp | 73 ++
libs/phylogeny/doubleRep.h | 316 ++++++
libs/phylogeny/errorMsg.cpp | 45 +
libs/phylogeny/errorMsg.h | 33 +
libs/phylogeny/evaluateCharacterFreq.cpp | 151 +++
libs/phylogeny/evaluateCharacterFreq.h | 26 +
libs/phylogeny/fastStartTree.cpp | 145 +++
libs/phylogeny/fastStartTree.h | 24 +
libs/phylogeny/fastaFormat.cpp | 75 ++
libs/phylogeny/fastaFormat.h | 35 +
libs/phylogeny/findRateOfGene.cpp | 81 ++
libs/phylogeny/findRateOfGene.h | 24 +
.../fromCountTableComponentToDistance.cpp | 23 +
libs/phylogeny/fromCountTableComponentToDistance.h | 37 +
.../fromCountTableComponentToDistance2Codon.cpp | 22 +
.../fromCountTableComponentToDistance2Codon.h | 34 +
.../fromCountTableComponentToDistance2USSRV.cpp | 22 +
.../fromCountTableComponentToDistance2USSRV.h | 39 +
.../fromCountTableComponentToDistanceProp.cpp | 18 +
.../fromCountTableComponentToDistanceProp.h | 33 +
.../fromCountTableComponentToDistancefixRoot.cpp | 26 +
.../fromCountTableComponentToDistancefixRoot.h | 39 +
libs/phylogeny/fromInstructionFile.cpp | 555 ++++++++++
libs/phylogeny/fromInstructionFile.h | 60 +
libs/phylogeny/fromQtoPt.cpp | 303 ++++++
libs/phylogeny/fromQtoPt.h | 67 ++
libs/phylogeny/gainLossAlphabet.cpp | 59 +
libs/phylogeny/gainLossAlphabet.h | 25 +
libs/phylogeny/gammaDistribution.cpp | 36 +
libs/phylogeny/gammaDistribution.h | 33 +
.../phylogeny/gammaDistributionFixedCategories.cpp | 35 +
libs/phylogeny/gammaDistributionFixedCategories.h | 38 +
libs/phylogeny/gammaDistributionLaguerre.cpp | 42 +
libs/phylogeny/gammaDistributionLaguerre.h | 34 +
libs/phylogeny/gammaDistributionPlusInvariant.cpp | 13 +
libs/phylogeny/gammaDistributionPlusInvariant.h | 35 +
libs/phylogeny/gammaUtilities.cpp | 170 +++
libs/phylogeny/gammaUtilities.h | 48 +
libs/phylogeny/generalGammaDistribution.cpp | 115 ++
libs/phylogeny/generalGammaDistribution.h | 61 ++
.../generalGammaDistributionFixedCategories.cpp | 360 ++++++
.../generalGammaDistributionFixedCategories.h | 36 +
.../phylogeny/generalGammaDistributionLaguerre.cpp | 113 ++
libs/phylogeny/generalGammaDistributionLaguerre.h | 47 +
.../generalGammaDistributionPlusInvariant.cpp | 13 +
.../generalGammaDistributionPlusInvariant.h | 51 +
libs/phylogeny/geneticCodeHolder.cpp | 49 +
libs/phylogeny/geneticCodeHolder.h | 33 +
libs/phylogeny/getRandomWeights.cpp | 53 +
libs/phylogeny/getRandomWeights.h | 31 +
libs/phylogeny/givenRatesMLDistance.cpp | 139 +++
libs/phylogeny/givenRatesMLDistance.h | 61 ++
libs/phylogeny/goldmanYangModel.cpp | 144 +++
libs/phylogeny/goldmanYangModel.h | 56 +
libs/phylogeny/granthamChemicalDistances.cpp | 187 ++++
libs/phylogeny/granthamChemicalDistances.h | 32 +
libs/phylogeny/gtrModel.cpp | 210 ++++
libs/phylogeny/gtrModel.h | 62 ++
libs/phylogeny/hky.cpp | 593 ++++++++++
libs/phylogeny/hky.h | 46 +
libs/phylogeny/indel.cpp | 58 +
libs/phylogeny/indel.h | 28 +
libs/phylogeny/indelModel.cpp | 15 +
libs/phylogeny/indelModel.h | 61 ++
libs/phylogeny/integerAlphabet.cpp | 61 ++
libs/phylogeny/integerAlphabet.h | 29 +
libs/phylogeny/jcDistance.h | 141 +++
libs/phylogeny/jones.dat.q | 131 +++
libs/phylogeny/khTest.cpp | 56 +
libs/phylogeny/khTest.h | 10 +
libs/phylogeny/likeDist.cpp | 379 +++++++
libs/phylogeny/likeDist.h | 203 ++++
libs/phylogeny/likeDist2Codon.cpp | 25 +
libs/phylogeny/likeDist2Codon.h | 110 ++
libs/phylogeny/likeDist2USSRV.cpp | 65 ++
libs/phylogeny/likeDist2USSRV.h | 152 +++
libs/phylogeny/likeDistProp.cpp | 21 +
libs/phylogeny/likeDistProp.h | 91 ++
libs/phylogeny/likeDistfixRoot.cpp | 378 +++++++
libs/phylogeny/likeDistfixRoot.h | 211 ++++
libs/phylogeny/likelihoodComputation.cpp | 440 ++++++++
libs/phylogeny/likelihoodComputation.h | 166 +++
libs/phylogeny/likelihoodComputation2Codon.cpp | 94 ++
libs/phylogeny/likelihoodComputation2Codon.h | 35 +
libs/phylogeny/likelihoodComputation2USSRV.cpp | 82 ++
libs/phylogeny/likelihoodComputation2USSRV.h | 36 +
libs/phylogeny/likelihoodComputationFactors.cpp | 33 +
libs/phylogeny/likelihoodComputationFactors.h | 28 +
libs/phylogeny/likelihoodComputationGL.cpp | 326 ++++++
libs/phylogeny/likelihoodComputationGL.h | 97 ++
libs/phylogeny/logFile.cpp | 48 +
libs/phylogeny/logFile.h | 50 +
libs/phylogeny/logRep.cpp | 30 +
libs/phylogeny/logRep.h | 162 +++
libs/phylogeny/make.dep | 715 ++++++++++++
libs/phylogeny/maseFormat.cpp | 86 ++
libs/phylogeny/maseFormat.h | 42 +
libs/phylogeny/matrixUtils.cpp | 331 ++++++
libs/phylogeny/matrixUtils.h | 148 +++
libs/phylogeny/mixtureDistribution.cpp | 311 ++++++
libs/phylogeny/mixtureDistribution.h | 67 ++
libs/phylogeny/molphyFormat.cpp | 85 ++
libs/phylogeny/molphyFormat.h | 47 +
libs/phylogeny/mtREV24.dat.q | 35 +
libs/phylogeny/mulAlphabet.cpp | 175 +++
libs/phylogeny/mulAlphabet.h | 51 +
libs/phylogeny/multipleStochasticProcess.cpp | 38 +
libs/phylogeny/multipleStochasticProcess.h | 23 +
libs/phylogeny/nexusFormat.cpp | 152 +++
libs/phylogeny/nexusFormat.h | 43 +
libs/phylogeny/nj.cpp | 410 +++++++
libs/phylogeny/nj.h | 90 ++
libs/phylogeny/njConstrain.cpp | 130 +++
libs/phylogeny/njConstrain.h | 29 +
libs/phylogeny/normalDist.cpp | 67 ++
libs/phylogeny/normalDist.h | 35 +
libs/phylogeny/nucJC.cpp | 5 +
libs/phylogeny/nucJC.h | 53 +
libs/phylogeny/nucleotide.cpp | 122 +++
libs/phylogeny/nucleotide.h | 110 ++
libs/phylogeny/nucleotide_amir.cpp | 139 +++
libs/phylogeny/nucleotide_amir.h | 111 ++
libs/phylogeny/numRec.cpp | 498 +++++++++
libs/phylogeny/numRec.h | 275 +++++
libs/phylogeny/nyCodonModel.cpp | 0
libs/phylogeny/nyCodonModel.h | 65 ++
libs/phylogeny/optGammaMixtureEM.cpp | 291 +++++
libs/phylogeny/optGammaMixtureEM.h | 102 ++
libs/phylogeny/optGammaMixtureLS.cpp | 261 +++++
libs/phylogeny/optGammaMixtureLS.h | 275 +++++
libs/phylogeny/pDistance.h | 37 +
libs/phylogeny/pairwiseGammaDistance.cpp | 158 +++
libs/phylogeny/pairwiseGammaDistance.h | 63 ++
libs/phylogeny/pgetopt.h | 180 +++
libs/phylogeny/phylipFormat.cpp | 138 +++
libs/phylogeny/phylipFormat.h | 47 +
libs/phylogeny/phylipSequentialFormat.cpp | 130 +++
libs/phylogeny/phylipSequentialFormat.h | 35 +
libs/phylogeny/pijAccelerator.cpp | 9 +
libs/phylogeny/pijAccelerator.h | 26 +
libs/phylogeny/posteriorDistance.cpp | 420 +++++++
libs/phylogeny/posteriorDistance.h | 72 ++
libs/phylogeny/readDatMatrix.cpp | 284 +++++
libs/phylogeny/readDatMatrix.h | 68 ++
libs/phylogeny/readTree.cpp | 178 +++
libs/phylogeny/readTree.h | 40 +
libs/phylogeny/recognizeFormat.cpp | 86 ++
libs/phylogeny/recognizeFormat.h | 19 +
.../replacementMatrixSource/HIVBetween.dat | 46 +
.../replacementMatrixSource/HIVWithin.dat | 46 +
libs/phylogeny/replacementMatrixSource/cpREV45.dat | 24 +
libs/phylogeny/replacementMatrixSource/dayhoff.dat | 93 ++
libs/phylogeny/replacementMatrixSource/jones.dat | 150 +++
.../mitochondriaAscidian.code | 24 +
.../mitochondriaEchinoderm.code | 24 +
.../mitochondriaFlatworm.code | 24 +
.../mitochondriaInvertebrate.code | 24 +
.../mitochondriaProtozoan.code | 24 +
.../mitochondriaVertebrate.code | 24 +
.../replacementMatrixSource/mitochondriaYeast.code | 24 +
libs/phylogeny/replacementMatrixSource/mtREV24.dat | 41 +
.../nuclearBlepharisma.code | 24 +
.../replacementMatrixSource/nuclearCiliate.code | 24 +
.../replacementMatrixSource/nuclearEuplotid.code | 24 +
.../replacementMatrixSource/nuclearStandard.code | 24 +
libs/phylogeny/replacementMatrixSource/wag.dat | 47 +
libs/phylogeny/replacementModel.cpp | 9 +
libs/phylogeny/replacementModel.h | 26 +
libs/phylogeny/replacementModelSSRV.cpp | 198 ++++
libs/phylogeny/replacementModelSSRV.h | 73 ++
libs/phylogeny/samplingSequences.cpp | 193 ++++
libs/phylogeny/samplingSequences.h | 33 +
libs/phylogeny/searchStatus.cpp | 9 +
libs/phylogeny/searchStatus.h | 30 +
libs/phylogeny/seqContainerTreeMap.cpp | 63 ++
libs/phylogeny/seqContainerTreeMap.h | 36 +
libs/phylogeny/seqeuncesFilter.cpp | 233 ++++
libs/phylogeny/seqeuncesFilter.h | 35 +
libs/phylogeny/sequence.cpp | 178 +++
libs/phylogeny/sequence.h | 141 +++
libs/phylogeny/sequenceContainer.cpp | 389 +++++++
libs/phylogeny/sequenceContainer.h | 169 +++
libs/phylogeny/simulateCodonsJumps.cpp | 210 ++++
libs/phylogeny/simulateCodonsJumps.h | 49 +
libs/phylogeny/simulateJumps.cpp | 188 ++++
libs/phylogeny/simulateJumps.h | 48 +
libs/phylogeny/simulateJumpsAbstract.cpp | 44 +
libs/phylogeny/simulateJumpsAbstract.h | 73 ++
libs/phylogeny/simulateTree.cpp | 225 ++++
libs/phylogeny/simulateTree.h | 49 +
libs/phylogeny/siteSpecificRate.cpp | 334 ++++++
libs/phylogeny/siteSpecificRate.h | 138 +++
libs/phylogeny/siteSpecificRateGL.cpp | 299 +++++
libs/phylogeny/siteSpecificRateGL.h | 141 +++
libs/phylogeny/someUtil.cpp | 822 ++++++++++++++
libs/phylogeny/someUtil.h | 161 +++
libs/phylogeny/split.cpp | 84 ++
libs/phylogeny/split.h | 75 ++
libs/phylogeny/splitMap.cpp | 50 +
libs/phylogeny/splitMap.h | 37 +
libs/phylogeny/splitTreeUtil.cpp | 109 ++
libs/phylogeny/splitTreeUtil.h | 25 +
libs/phylogeny/ssrvDistanceSeqs2Tree.cpp | 149 +++
libs/phylogeny/ssrvDistanceSeqs2Tree.h | 63 ++
libs/phylogeny/stochasticProcess.cpp | 57 +
libs/phylogeny/stochasticProcess.h | 58 +
libs/phylogeny/stochasticProcessSSRV.cpp | 19 +
libs/phylogeny/stochasticProcessSSRV.h | 48 +
libs/phylogeny/suffStatComponent.cpp | 6 +
libs/phylogeny/suffStatComponent.h | 204 ++++
libs/phylogeny/suffStatGammaMixture.cpp | 236 ++++
libs/phylogeny/suffStatGammaMixture.h | 58 +
libs/phylogeny/talRandom.cpp | 73 ++
libs/phylogeny/talRandom.h | 98 ++
libs/phylogeny/tamura92.cpp | 167 +++
libs/phylogeny/tamura92.h | 36 +
libs/phylogeny/threeStateAlphabet.cpp | 58 +
libs/phylogeny/threeStateAlphabet.h | 26 +
libs/phylogeny/threeStateModel.cpp | 254 +++++
libs/phylogeny/threeStateModel.h | 131 +++
libs/phylogeny/tree.cpp | 1150 ++++++++++++++++++++
libs/phylogeny/tree.h | 208 ++++
libs/phylogeny/treeInference.cpp | 16 +
libs/phylogeny/treeInference.h | 26 +
libs/phylogeny/treeIt.cpp | 6 +
libs/phylogeny/treeIt.h | 128 +++
libs/phylogeny/treeUtil.cpp | 348 ++++++
libs/phylogeny/treeUtil.h | 49 +
libs/phylogeny/trivialAccelerator.h | 32 +
libs/phylogeny/unObservableData.cpp | 82 ++
libs/phylogeny/unObservableData.h | 45 +
libs/phylogeny/uniDistribution.cpp | 11 +
libs/phylogeny/uniDistribution.h | 37 +
libs/phylogeny/uniformDistribution.cpp | 64 ++
libs/phylogeny/uniformDistribution.h | 66 ++
libs/phylogeny/ussrvModel.cpp | 125 +++
libs/phylogeny/ussrvModel.h | 41 +
libs/phylogeny/wYangModel.cpp | 96 ++
libs/phylogeny/wYangModel.h | 59 +
libs/phylogeny/wag.dat.q | 42 +
manifests/trustyvm.pp | 11 +
programs/Makefile.generic | 244 +++++
programs/fastml/Makefile | 18 +
programs/fastml/bbAlg.cpp | 258 +++++
programs/fastml/bbAlg.h | 67 ++
programs/fastml/bbComputeDownAlg.cpp | 191 ++++
programs/fastml/bbComputeDownAlg.h | 23 +
programs/fastml/bbComputeUpAlg.cpp | 46 +
programs/fastml/bbComputeUpAlg.h | 26 +
programs/fastml/bbEvaluateSpecificAV.cpp | 113 ++
programs/fastml/bbEvaluateSpecificAV.h | 51 +
programs/fastml/bbNodeOrderAlg.cpp | 134 +++
programs/fastml/bbNodeOrderAlg.h | 54 +
programs/fastml/bbReport.cpp | 75 ++
programs/fastml/bbReport.h | 58 +
programs/fastml/bb_options.cpp | 159 +++
programs/fastml/bb_options.h | 69 ++
programs/fastml/bb_options_list.h | 47 +
programs/fastml/bbfindBestAVDynProg.cpp | 116 ++
programs/fastml/bbfindBestAVDynProg.h | 44 +
programs/fastml/computeMarginalReconstruction.cpp | 152 +++
programs/fastml/computeMarginalReconstruction.h | 39 +
programs/fastml/fastml.cpp | 361 ++++++
programs/fastml/jointNoGamma.cpp | 140 +++
programs/fastml/jointNoGamma.h | 44 +
programs/fastml/mainbb.cpp | 562 ++++++++++
programs/fastml/mainbb.h | 74 ++
programs/fastml/make.dep | 254 +++++
programs/fastml/sequenceDataDiff.cpp | 49 +
programs/fastml/sequenceDataDiff.h | 45 +
programs/fastml/suffStatComponentJointNoGamma.cpp | 1 +
programs/fastml/suffStatComponentJointNoGamma.h | 50 +
409 files changed, 45229 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5cda45e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.o
+.DS_Store
+*.lo
+*~
+*.a
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..c6a2444
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,2 @@
+Andrew J. Page (ap13 at sanger.ac.uk)
+Aidan Delaney (aidan at phoric.eu)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..54d6024
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,68 @@
+.PHONY: all
+
+# Copying autoconf style
+PACKAGE_NAME=fastml2
+PACKAGE_VERSION=2.2~trusty1
+
+all: libs programs
+
+debug: libs.debug
+
+%: libs.% programs.%
+ echo $@
+
+libs: libs.all
+
+programs: programs.all
+
+programs.all: libs
+programs.debug: libs.debug
+
+semphy: programs.semphy
+
+install: programs.install
+
+programs.install programs.all semphy: libs
+
+clean: libs.clean programs.clean
+
+libs.%:
+ +cd libs/phylogeny;make
+
+programs.%:
+ +cd programs/fastml;make
+
+tags: libs/*/*.cpp libs/*/*.h programs/*/*.h programs/*/*.cpp
+ etags --members --language=c++ $^
+
+dist:
+ rm -rf ${PACKAGE_NAME}-${PACKAGE_VERSION}
+ mkdir ${PACKAGE_NAME}-${PACKAGE_VERSION}
+ rm -rf libs/phylogeny/*.o libs/phylogeny/*.a programs/fastml/*.o programs/fastml/*.a programs/fastml/fastml
+ cp -R debian libs programs Makefile Readme.md ${PACKAGE_NAME}-${PACKAGE_VERSION}
+ tar czvf ${PACKAGE_NAME}-${PACKAGE_VERSION}.tar.gz ${PACKAGE_NAME}-${PACKAGE_VERSION}
+ rm -rf ${PACKAGE_NAME}-${PACKAGE_VERSION}
+
+source: dist
+ vagrant up
+ vagrant ssh -c "sudo apt-get update"
+ vagrant provision
+ vagrant ssh -c "tar xzvf /vagrant/${PACKAGE_NAME}-${PACKAGE_VERSION}.tar.gz"
+ vagrant ssh -c "dpkg-source -rfakeroot -b ${PACKAGE_NAME}-${PACKAGE_VERSION}"
+ vagrant ssh -c "cd ${PACKAGE_NAME}-${PACKAGE_VERSION} && dpkg-genchanges -S > ../${PACKAGE_NAME}_${PACKAGE_VERSION}_amd64.changes"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}.dsc /vagrant"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}_amd64.changes /vagrant"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz /vagrant"
+ vagrant halt
+
+release: dist
+ vagrant up
+ vagrant ssh -c "sudo apt-get update"
+ vagrant provision
+ vagrant ssh -c "tar xzvf /vagrant/${PACKAGE_NAME}-${PACKAGE_VERSION}.tar.gz"
+ vagrant ssh -c "cd ${PACKAGE_NAME}-${PACKAGE_VERSION} && dpkg-buildpackage -uc -us -rfakeroot"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}_amd64.deb /vagrant"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}_amd64.changes /vagrant"
+ vagrant ssh -c "cp ${PACKAGE_NAME}_${PACKAGE_VERSION}.tar.gz /vagrant"
+ vagrant halt
+ # You need to sign the changes files with gpg before uploading to ubuntu ppa
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 0000000..1e3f6db
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,39 @@
+This repository provides some additional functionality to fastml to allow it to work with Gubbins.
+The original code is available from http://fastml.tau.ac.il/source.html and should be your first port of call.
+To modify the original code, or use parts of it for other purposes, permission should be requested. Please contact Tal Pupko: talp at post.tau.ac.il
+
+In citing the FASTML server please refer to:
+
+Ashkenazy H, Penn O, Doron-Faigenboim A, Cohen O, Cannarozzi G, Zomer O, Pupko T. 2012
+FastML: a web server for probabilistic reconstruction of ancestral sequences
+Nucleic Acids Res. 40(Web Server issue):W580-4. [pdf] [abs]
+
+
+Pupko T, Pe'er I, Hasegawa M, Graur D, Friedman N. 2002
+A branch-and-bound algorithm for the inference of ancestral amino-acid sequences when the replacement rate varies among sites: Application to the evolution of five gene families.
+Bioinformatics 18(8): 1116-1123. [pdf] [abs]
+
+
+Pupko T, Pe'er I, Shamir R, Graur D. 2000.
+A fast algorithm for joint reconstruction of ancestral amino-acid sequences.
+Mol. Biol. Evol. 17(6): 890-896. [pdf] [abs]
+
+
+Pupko, T. and Pe'er I. 2000.
+Maximum likelihood reconstruction of ancestral amino-acid sequences.
+Currents in Computational Molecular Biology. Ed. Miyano, S., Shamir, R, and Takagi, T. pp. 184-185. Universal Academy Press, Tokyo, Japan. [pdf]
+
+#To install from source:
+```
+autoreconf -i
+./configure
+make
+make install
+```
+
+#To install on Ubuntu (trusty):
+```
+sudo apt-get-repository ppa:ap13/gubbins
+sudo apt-get update
+sudo apt-get install fastml2
+```
diff --git a/Vagrantfile b/Vagrantfile
new file mode 100644
index 0000000..1bd28d1
--- /dev/null
+++ b/Vagrantfile
@@ -0,0 +1,122 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
+VAGRANTFILE_API_VERSION = "2"
+
+Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
+ # All Vagrant configuration is done here. The most common configuration
+ # options are documented and commented below. For a complete reference,
+ # please see the online documentation at vagrantup.com.
+
+ # Every Vagrant virtual environment requires a box to build off of.
+ config.vm.box = "trustyvm"
+
+ # The url from where the 'config.vm.box' box will be fetched if it
+ # doesn't already exist on the user's system.
+ config.vm.box_url = "https://cloud-images.ubuntu.com/vagrant/trusty/current/trusty-server-cloudimg-amd64-vagrant-disk1.box"
+
+ # Create a forwarded port mapping which allows access to a specific port
+ # within the machine from a port on the host machine. In the example below,
+ # accessing "localhost:8080" will access port 80 on the guest machine.
+ # config.vm.network "forwarded_port", guest: 80, host: 8080
+
+ # Create a private network, which allows host-only access to the machine
+ # using a specific IP.
+ # config.vm.network "private_network", ip: "192.168.33.10"
+
+ # Create a public network, which generally matched to bridged network.
+ # Bridged networks make the machine appear as another physical device on
+ # your network.
+ # config.vm.network "public_network"
+
+ # If true, then any SSH connections made will enable agent forwarding.
+ # Default value: false
+ # config.ssh.forward_agent = true
+
+ # Share an additional folder to the guest VM. The first argument is
+ # the path on the host to the actual folder. The second argument is
+ # the path on the guest to mount the folder. And the optional third
+ # argument is a set of non-required options.
+
+ # Note from <a.j.delaney at brighton.ac.uk>: this does not work well where
+ # the host VirtualBox and the downloaded image disagree on the specific
+ # version of guest tools.
+ # config.vm.synced_folder "..", "/vagrant_data"
+
+ # Provider-specific configuration so you can fine-tune various
+ # backing providers for Vagrant. These expose provider-specific options.
+ # Example for VirtualBox:
+ #
+ # config.vm.provider "virtualbox" do |vb|
+ # # Don't boot with headless mode
+ # vb.gui = true
+ #
+ # # Use VBoxManage to customize the VM. For example to change memory:
+ # vb.customize ["modifyvm", :id, "--memory", "1024"]
+ # end
+ #
+ # View the documentation for the provider you're using for more
+ # information on available options.
+
+ # Enable provisioning with Puppet stand alone. Puppet manifests
+ # are contained in a directory path relative to this Vagrantfile.
+ # You will need to create the manifests directory and a manifest in
+ # the file trustyvm.pp in the manifests_path directory.
+ #
+ # An example Puppet manifest to provision the message of the day:
+ #
+ # # group { "puppet":
+ # # ensure => "present",
+ # # }
+ # #
+ # # File { owner => 0, group => 0, mode => 0644 }
+ # #
+ # # file { '/etc/motd':
+ # # content => "Welcome to your Vagrant-built virtual machine!
+ # # Managed by Puppet.\n"
+ # # }
+ #
+ config.vm.provision "puppet" do |puppet|
+ puppet.manifests_path = "manifests"
+ puppet.manifest_file = "trustyvm.pp"
+ end
+
+ # Enable provisioning with chef solo, specifying a cookbooks path, roles
+ # path, and data_bags path (all relative to this Vagrantfile), and adding
+ # some recipes and/or roles.
+ #
+ # config.vm.provision "chef_solo" do |chef|
+ # chef.cookbooks_path = "../my-recipes/cookbooks"
+ # chef.roles_path = "../my-recipes/roles"
+ # chef.data_bags_path = "../my-recipes/data_bags"
+ # chef.add_recipe "mysql"
+ # chef.add_role "web"
+ #
+ # # You may also specify custom JSON attributes:
+ # chef.json = { :mysql_password => "foo" }
+ # end
+
+ # Enable provisioning with chef server, specifying the chef server URL,
+ # and the path to the validation key (relative to this Vagrantfile).
+ #
+ # The Opscode Platform uses HTTPS. Substitute your organization for
+ # ORGNAME in the URL and validation key.
+ #
+ # If you have your own Chef Server, use the appropriate URL, which may be
+ # HTTP instead of HTTPS depending on your configuration. Also change the
+ # validation key to validation.pem.
+ #
+ # config.vm.provision "chef_client" do |chef|
+ # chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME"
+ # chef.validation_key_path = "ORGNAME-validator.pem"
+ # end
+ #
+ # If you're using the Opscode platform, your validator client is
+ # ORGNAME-validator, replacing ORGNAME with your organization name.
+ #
+ # If you have your own Chef Server, the default validation client name is
+ # chef-validator, unless you changed the configuration.
+ #
+ # chef.validation_client_name = "ORGNAME-validator"
+end
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..8aac9a5
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,19 @@
+fastml2 (2.2~trusty1) trusty; urgency=medium
+
+ * Packaging tweaks
+
+ -- Andrew Page <ap13 at sanger.ac.uk> Wed, 13 Aug 2014 15:29:49 +0100
+
+fastml2 (2.2) saucy; urgency=medium
+
+ * Proper Debian package.
+ * Incorporates some of AndrewJPage's source-level fixes.
+
+ -- Aidan Delaney (Packaging Key) <aidan at ontologyengineering.org> Wed, 23 Apr 2014 13:16:49 +0100
+
+fastml2 (2.2) unstable; urgency=low
+
+ * Initial package
+
+ -- Aidan Delaney <aidan at ontologyengineering.org> Tue, 01 Apr 2014 14:46:07 +0000
+
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..90ec6ba
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,13 @@
+Source: fastml2
+Section: misc
+Priority: extra
+Maintainer: Aidan Delaney <aidan at ontologyengineering.org>
+Build-Depends: debhelper (>= 8.0.0), make
+Standards-Version: 3.9.4
+Vcs-Git: https://github.com/sanger-pathogens/fastml
+
+Package: fastml2
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: Maximum likelihood reconstruction of ancestral amino-acid sequences.
+ A branch-and-bound algorithm for the inference of ancestral amino-acid sequences when the replacement rate varies among sites.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..4d29b17
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,9 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: fastml
+Source: https://github.com/sanger-pathogens/fastml
+
+Files: *
+Copyright: 2012-2014 bioSequence at tauex.tau.ac.il
+
+License: All Rights Reserved
+ This code is not open-source, see http://fastml.tau.ac.il/source.html.
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..63fc6d6
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,19 @@
+#!/usr/bin/make -f
+# -*- makefile -*-
+# Sample debian/rules that uses debhelper.
+# This file was originally written by Joey Hess and Craig Small.
+# As a special exception, when this file is copied by dh-make into a
+# dh-make output file, you may use that output file without restriction.
+# This special exception was added by Craig Small in version 0.37 of dh-make.
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+%:
+ dh $@
+
+# We need to do this horrible stuff as the fastml install target
+# doesn't do a system install.
+override_dh_auto_install:
+ mkdir -p $$(pwd)/debian/fastml2/usr/bin
+ install programs/fastml/fastml $$(pwd)/debian/fastml2/usr/bin
diff --git a/libs/phylogeny/AddLog.cpp b/libs/phylogeny/AddLog.cpp
new file mode 100644
index 0000000..12c976a
--- /dev/null
+++ b/libs/phylogeny/AddLog.cpp
@@ -0,0 +1,25 @@
+// $Id: AddLog.cpp 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#include "AddLog.h"
+#include <cmath>
+
+const int tAddLog_Precompute::G_LOGADD = 500;
+const int tAddLog_Precompute::D_LOGADD = 50;
+
+tAddLog_Precompute AddLogData;
+
+int tAddLog_Precompute::d_logadd;
+
+tAddLog_Precompute::tAddLog_Precompute(){
+ d_logadd = int(D_LOGADD*log(10.0)*G_LOGADD);
+ logaddf = new double [d_logadd+1];
+ for (int i=0; i<= d_logadd; i++)
+ logaddf[i] = log(1.0+exp(-static_cast<double>(i)/G_LOGADD));
+}
+
+tAddLog_Precompute::~tAddLog_Precompute(){
+ delete [] logaddf;
+}
diff --git a/libs/phylogeny/AddLog.h b/libs/phylogeny/AddLog.h
new file mode 100644
index 0000000..e845e7a
--- /dev/null
+++ b/libs/phylogeny/AddLog.h
@@ -0,0 +1,67 @@
+// $Id: AddLog.h 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 2 Nov 2002
+
+#ifndef __AddLog_h
+#define __AddLog_h
+
+#include <iostream>
+using namespace std;
+
+class tAddLog_Precompute {
+ public:
+
+ tAddLog_Precompute();
+ ~tAddLog_Precompute();
+
+ double AddLog( double x, double y );
+
+private:
+ static const int D_LOGADD; // = 50; // y/x < 1e-D discard
+ static const int G_LOGADD;// = 500; // step function look-up every 1/G
+ static int d_logadd;
+
+ double *logaddf;
+};
+
+extern tAddLog_Precompute AddLogData;
+
+inline
+double
+AddLog(double x, double y ){
+ return AddLogData.AddLog(x, y);
+}
+
+inline double
+tAddLog_Precompute::AddLog(double x, double y ){
+ if (x < y) {
+ double dummy = x;
+ x = y;
+ y = dummy;
+ }
+
+#ifdef notdef
+ return x + log(1 + exp(y-x));
+#endif
+
+ double z = (x-y)*G_LOGADD;
+ int i = int(z);
+ if( i < d_logadd ) x += ((i+1-z)*logaddf[i] + (z-i)*logaddf[i+1]);
+ return x;
+}
+
+#endif
+
+
+/*
+Folks,
+
+In many of our program we use the AddLog procedure that compute the sum of
+two numbers in log form. Gill spent some time investigating faster versions
+of this procedure, which gave him 3-4 fold speedup on his program. Attached
+is my re-packaging of his solution. I think it will be useful in some of the
+code we use.
+
+-Nir
+*/
diff --git a/libs/phylogeny/C_evalParamUSSRV.cpp b/libs/phylogeny/C_evalParamUSSRV.cpp
new file mode 100644
index 0000000..d4b97b0
--- /dev/null
+++ b/libs/phylogeny/C_evalParamUSSRV.cpp
@@ -0,0 +1,112 @@
+// $Id: C_evalParamUSSRV.cpp 1915 2007-04-04 15:56:24Z privmane $
+#include "C_evalParamUSSRV.h"
+
+// *********************
+// * USSRV *
+// *********************
+
+MDOUBLE C_evalParamUSSRV::operator() (MDOUBLE param) {
+
+ setParam(param);
+ MDOUBLE res = likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_baseSc,*_pModel,_weights);
+ print(param,res);
+ return -res;
+}
+
+void C_evalAlphaUSSRV::setParam(MDOUBLE alpha)
+{
+ if (_pModel->noOfCategor() == 1)
+ errorMsg::reportError(" one category when trying to optimize alpha");
+ _pModel->updateAlpha(alpha);
+}
+
+void C_evalAlphaUSSRV::print(MDOUBLE alpha,MDOUBLE res) {
+ LOG(5,<<" with Alpha = "<<alpha<<" logL = " <<res<<endl);
+}
+
+
+void C_evalNuUSSRV::setParam(MDOUBLE Nu)
+{
+ _pModel->updateNu(Nu);
+}
+
+void C_evalNuUSSRV::print(MDOUBLE nu,MDOUBLE res) {
+ LOG(5,<<" with Nu = "<<nu<<" logL = " <<res<<endl);
+}
+
+void C_evalFUSSRV::setParam(MDOUBLE f)
+{
+ _pModel->updateF(f);
+}
+
+void C_evalFUSSRV::print(MDOUBLE f,MDOUBLE res) {
+ LOG(5,<<" with F = "<<f<<" logL = " <<res<<endl);
+}
+
+
+// *********************
+// * SSRV *
+// *********************
+
+MDOUBLE C_evalParamSSRV::operator() (MDOUBLE param) {
+
+ setParam(param);
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_ssrvSp,_weights);
+ print(param,res);
+ return -res;
+}
+
+void C_evalAlphaSSRV::setParam(MDOUBLE alpha)
+{
+ if (alpha<0)
+ errorMsg::reportError("ERROR in C_evalAlphaSSRV::setParam, alpha is < 0 ");
+
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
+ gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
+ gammaDist->setAlpha(alpha);
+ pMulRM->updateQ();
+}
+
+void C_evalAlphaSSRV::print(MDOUBLE alpha,MDOUBLE res) {
+ LOG(5,<<" with Alpha = "<<alpha<<" logL = " <<res<<endl);
+}
+
+
+void C_evalNuSSRV::setParam(MDOUBLE Nu)
+{
+ if (Nu<0)
+ errorMsg::reportError("C_evalNuSSRV::setParam, nu is < 0 ");
+
+ static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(Nu);
+}
+
+void C_evalNuSSRV::print(MDOUBLE nu,MDOUBLE res) {
+ LOG(5,<<" with Nu = "<<nu<<" logL = " <<res<<endl);
+}
+
+void C_evalTrTvSSRV::setParam(MDOUBLE TrTv)
+{
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
+ static_cast<tamura92*>(pMulRM->getBaseRM())->changeTrTv(TrTv);
+ pMulRM->updateQ();
+}
+
+void C_evalTrTvSSRV::print(MDOUBLE TrTv,MDOUBLE res) {
+ LOG(5,<<" with TrTv = "<<TrTv<<" logL = " <<res<<endl);
+}
+
+void C_evalThetaSSRV::setParam(MDOUBLE Theta)
+{
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp.getPijAccelerator()->getReplacementModel());
+ static_cast<tamura92*>(pMulRM->getBaseRM())->changeTheta(Theta);
+ pMulRM->updateFreq();
+ pMulRM->updateQ();
+}
+
+void C_evalThetaSSRV::print(MDOUBLE Theta,MDOUBLE res) {
+ LOG(5,<<" with Theta = "<<Theta<<" logL = " <<res<<endl);
+}
+
+
+
+
diff --git a/libs/phylogeny/C_evalParamUSSRV.h b/libs/phylogeny/C_evalParamUSSRV.h
new file mode 100644
index 0000000..4b5e025
--- /dev/null
+++ b/libs/phylogeny/C_evalParamUSSRV.h
@@ -0,0 +1,177 @@
+// $Id: C_evalParamUSSRV.h 1915 2007-04-04 15:56:24Z privmane $
+#ifndef ___C_EVAL_PARAM_USSRV
+#define ___C_EVAL_PARAM_USSRV
+
+#include "definitions.h"
+
+#include "likelihoodComputation.h"
+#include "likelihoodComputation2USSRV.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "replacementModelSSRV.h"
+#include "tamura92.h"
+#include "stochasticProcessSSRV.h"
+#include "ussrvModel.h"
+#include "logFile.h"
+
+// *********************
+// * USSRV *
+// *********************
+
+class C_evalParamUSSRV {
+public:
+ C_evalParamUSSRV(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel* pModel,
+ const Vdouble* weights = NULL)
+ : _et(et),_sc(sc),_baseSc(baseSc),_pModel(pModel),_weights(weights){}
+
+ MDOUBLE operator() (MDOUBLE param) ;
+ virtual ~C_evalParamUSSRV(){}
+
+protected:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const sequenceContainer& _baseSc;
+ ussrvModel* _pModel;
+ const Vdouble * _weights;
+
+
+protected:
+ virtual void setParam(MDOUBLE param) = 0;
+ virtual void print(MDOUBLE param,MDOUBLE res) =0;
+};
+
+
+class C_evalAlphaUSSRV : public C_evalParamUSSRV {
+public:
+ C_evalAlphaUSSRV(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel* pModel,
+ const Vdouble *weights = NULL)
+ : C_evalParamUSSRV(et,sc,baseSc,pModel,weights)
+ {}
+
+protected:
+ virtual void setParam(MDOUBLE alpha);
+ virtual void print(MDOUBLE alpha,MDOUBLE res);
+};
+
+
+
+class C_evalNuUSSRV : public C_evalParamUSSRV{
+public:
+ C_evalNuUSSRV( const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel* pModel,
+ const Vdouble * weights = NULL)
+ : C_evalParamUSSRV(et,sc,baseSc,pModel,weights){}
+
+protected:
+ virtual void setParam(MDOUBLE Nu);
+ virtual void print(MDOUBLE nu,MDOUBLE res);
+};
+
+class C_evalFUSSRV : public C_evalParamUSSRV{
+public:
+ C_evalFUSSRV( const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel* pModel,
+ const Vdouble * weights = NULL)
+ : C_evalParamUSSRV(et,sc,baseSc,pModel,weights){}
+
+protected:
+ virtual void setParam(MDOUBLE F);
+ virtual void print(MDOUBLE f,MDOUBLE res);
+};
+
+// *********************
+// * SSRV *
+// *********************
+
+class C_evalParamSSRV {
+public:
+ C_evalParamSSRV(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble* weights = NULL)
+ : _et(et),_sc(sc),_ssrvSp(ssrvSp),_weights(weights){}
+
+ MDOUBLE operator() (MDOUBLE param) ;
+ virtual ~C_evalParamSSRV(){}
+
+protected:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ stochasticProcessSSRV& _ssrvSp;
+ const Vdouble * _weights;
+
+
+protected:
+ virtual void setParam(MDOUBLE param) = 0;
+ virtual void print(MDOUBLE param,MDOUBLE res) =0;
+};
+
+
+class C_evalAlphaSSRV : public C_evalParamSSRV {
+public:
+ C_evalAlphaSSRV(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble *weights = NULL)
+ : C_evalParamSSRV(et,sc,ssrvSp,weights)
+ {}
+
+protected:
+ virtual void setParam(MDOUBLE alpha);
+ virtual void print(MDOUBLE alpha,MDOUBLE res);
+};
+
+
+
+class C_evalNuSSRV : public C_evalParamSSRV{
+public:
+ C_evalNuSSRV( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights = NULL)
+ : C_evalParamSSRV(et,sc,ssrvSp,weights){}
+
+protected:
+ virtual void setParam(MDOUBLE Nu);
+ virtual void print(MDOUBLE nu,MDOUBLE res);
+};
+
+class C_evalTrTvSSRV : public C_evalParamSSRV{
+public:
+ C_evalTrTvSSRV(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights = NULL)
+ : C_evalParamSSRV(et,sc,ssrvSp,weights){}
+
+protected:
+ virtual void setParam(MDOUBLE TrTv);
+ virtual void print(MDOUBLE TrTv,MDOUBLE res);
+};
+
+class C_evalThetaSSRV : public C_evalParamSSRV{
+public:
+ C_evalThetaSSRV(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights = NULL)
+ : C_evalParamSSRV(et,sc,ssrvSp,weights){}
+
+protected:
+ virtual void setParam(MDOUBLE Theta);
+ virtual void print(MDOUBLE Theta,MDOUBLE res);
+};
+
+#endif
diff --git a/libs/phylogeny/ConversionUtils.cpp b/libs/phylogeny/ConversionUtils.cpp
new file mode 100644
index 0000000..bb9061c
--- /dev/null
+++ b/libs/phylogeny/ConversionUtils.cpp
@@ -0,0 +1,52 @@
+#include "ConversionUtils.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+
+#include <cmath>
+
+using namespace std;
+
+void appendIntToString (string& ioString, const int inValue) {
+ std::ostringstream o;
+ o << ioString<< inValue;
+ ioString = o.str();
+}
+
+string appendInt2string(const int x)
+{
+ string res;
+ appendIntToString(res, x);
+ return res;
+}
+
+string appendDouble2string(const double x, const int lenght){
+
+ // first getting the integer part:
+ int theIntegerPart = static_cast<int>(x);
+ double theRemainingPart = fabs(x-theIntegerPart);
+ int integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght));
+ string part1, part2;
+ appendIntToString(part1, theIntegerPart);
+ appendIntToString(part2, integerRepresentingTheRemainingPart);
+ while (part2.length()<lenght){
+ part2.insert(0, "0");
+ }
+
+ string result = part1;
+ result += ".";
+ result += part2;
+
+ // removing 0 from the end
+ int i = result.length()-1;
+ while (result[i]!='.' && i>0 && result[i]=='0'){
+ result.erase(i);
+ i--;
+ }
+
+ // removing "." if this is the last character in the string.
+ if (result[result.length()-1]=='.')
+ result.erase(result.length()-1);
+
+ return result;
+}
+
diff --git a/libs/phylogeny/ConversionUtils.h b/libs/phylogeny/ConversionUtils.h
new file mode 100644
index 0000000..f46a2a0
--- /dev/null
+++ b/libs/phylogeny/ConversionUtils.h
@@ -0,0 +1,51 @@
+//utility class that converts between data types
+#ifndef ___ConversionUtils_h
+#define ___ConversionUtils_h
+
+#include <sstream>
+#include <string>
+#include "definitions.h"
+
+using namespace std;
+
+//a function that turns an integer to string
+
+void appendIntToString (string& ioString, const int inValue);
+string appendDouble2string(const double x, int const howManyDigitsAfterTheDot=5);
+string appendInt2string(const int x);
+
+
+// Trims spaces at the left side of a string
+static inline string trim_left(const string& str )
+{
+ int i=str.find_first_not_of(" \t");
+ if(str.size()==0 || i >= str.size())
+ return str;
+ return str.substr( i ) ;
+}
+
+
+////
+// Trims spaces at the right side of a string
+static inline string trim_right(const string& str )
+{
+ int i=str.find_last_not_of(" \t");
+ if(str.size()==0 || i >= str.size())
+ return str;
+ return str.substr(0, i + 1);
+}
+
+////
+// Trims spaces at both sides of a string
+static inline string trim(const string& str )
+{
+ return trim_left(trim_right(str));
+}
+
+
+#endif
+
+
+
+
+
diff --git a/libs/phylogeny/GLaguer.cpp b/libs/phylogeny/GLaguer.cpp
new file mode 100644
index 0000000..0f72f23
--- /dev/null
+++ b/libs/phylogeny/GLaguer.cpp
@@ -0,0 +1,178 @@
+// $Id: GLaguer.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "definitions.h"
+#include "GLaguer.h"
+
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+
+
+
+GLaguer::GLaguer(const int pointsNum, const MDOUBLE alf, Vdouble & points, Vdouble & weights)
+{
+ gaulag(_points, _weights, alf, pointsNum);
+
+ weights = _weights;
+ points = _points;
+}
+
+
+//Input: alf = the alpha parameter of the Laguerre polynomials
+// pointsNum = the polynom order
+//Output: the abscissas and weights are stored in the vecotrs x and w, respectively.
+//Discreption: given alf, the alpha parameter of the Laguerre polynomials, the function returns the abscissas and weights
+// of the n-point Guass-Laguerre quadrature formula.
+// The smallest abscissa is stored in x[0], the largest in x[pointsNum - 1].
+void GLaguer::gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum)
+{
+ x.resize(pointsNum, 0.0);
+ w.resize(pointsNum, 0.0);
+ const int MAXIT=10000;
+ const MDOUBLE EPS=1.0e-6;
+ int i,its,j;
+ MDOUBLE ai,p1,p2,p3,pp,z=0.0,z1;
+
+ int n= x.size();
+ for (i=0;i<n;i++) {
+ //loops over the desired roots
+ if (i == 0) { //initial guess for the smallest root
+ z=(1.0+alf)*(3.0+0.92*alf)/(1.0+2.4*n+1.8*alf);
+ } else if (i == 1) {//initial guess for the second smallest root
+ z += (15.0+6.25*alf)/(1.0+0.9*alf+2.5*n);
+ } else { //initial guess for the other roots
+ ai=i-1;
+ z += ((1.0+2.55*ai)/(1.9*ai)+1.26*ai*alf/
+ (1.0+3.5*ai))*(z-x[i-2])/(1.0+0.3*alf);
+ }
+ for (its=0;its<MAXIT;its++) { //refinement by Newton's method
+ p1=1.0;
+ p2=0.0;
+ for (j=0;j<n;j++) { //Loop up the recurrence relation to get the Laguerre polynomial evaluated at z.
+ p3=p2;
+ p2=p1;
+ p1=((2*j+1+alf-z)*p2-(j+alf)*p3)/(j+1);
+ }
+ //p1 is now the desired Laguerre polynomial. We next compute pp, its derivative,
+ //by a standard relation involving also p2, the polynomial of one lower order.
+ pp=(n*p1-(n+alf)*p2)/z;
+ z1=z;
+ z=z1-p1/pp; //Newton's formula
+ if (fabs(z-z1) <= EPS)
+ break;
+ }
+ if (its >= MAXIT)
+ errorMsg::reportError("too many iterations in gaulag");
+ x[i]=z;
+ w[i] = -exp(gammln(alf+n)-gammln(MDOUBLE(n)))/(pp*n*p2);
+ }
+}
+
+
+void GLaguer::GetPhylipLaguer(const int categs, MDOUBLE alpha, Vdouble & points, Vdouble & weights)
+{
+ /* calculate rates and probabilities to approximate Gamma distribution
+ of rates with "categs" categories and shape parameter "alpha" using
+ rates and weights from Generalized Laguerre quadrature */
+
+ points.resize(categs, 0.0);
+ weights.resize(categs, 0.0);
+ long i;
+ raterootarray lgroot; /* roots of GLaguerre polynomials */
+ double f, x, xi, y;
+
+ alpha = alpha - 1.0;
+ lgroot[1][1] = 1.0+alpha;
+ for (i = 2; i <= categs; i++)
+ {
+ cerr<<lgroot[i][1]<<"\t";
+ lgr(i, alpha, lgroot); /* get roots for L^(a)_n */
+ cerr<<lgroot[i][1]<<endl;
+ }
+ /* here get weights */
+ /* Gamma weights are (1+a)(1+a/2) ... (1+a/n)*x_i/((n+1)^2 [L_{n+1}^a(x_i)]^2) */
+ f = 1;
+ for (i = 1; i <= categs; i++)
+ f *= (1.0+alpha/i);
+ for (i = 1; i <= categs; i++) {
+ xi = lgroot[categs][i];
+ y = glaguerre(categs+1, alpha, xi);
+ x = f*xi/((categs+1)*(categs+1)*y*y);
+ points[i-1] = xi/(1.0+alpha);
+ weights[i-1] = x;
+ }
+}
+
+
+void GLaguer::lgr(long m, double alpha, raterootarray lgroot)
+{ /* For use by initgammacat. Get roots of m-th Generalized Laguerre
+ polynomial, given roots of (m-1)-th, these are to be
+ stored in lgroot[m][] */
+ long i;
+ double upper, lower, x, y;
+ bool dwn; /* is function declining in this interval? */
+
+ if (m == 1) {
+ lgroot[1][1] = 1.0+alpha;
+ } else {
+ dwn = true;
+ for (i=1; i<=m; i++) {
+ if (i < m) {
+ if (i == 1)
+ lower = 0.0;
+ else
+ lower = lgroot[m-1][i-1];
+ upper = lgroot[m-1][i];
+ }
+ else { /* i == m, must search above */
+ lower = lgroot[m-1][i-1];
+ x = lgroot[m-1][m-1];
+ do {
+ x = 2.0*x;
+ y = glaguerre(m, alpha,x);
+ } while ((dwn && (y > 0.0)) || ((!dwn) && (y < 0.0)));
+ upper = x;
+ }
+ while (upper-lower > 0.000000001) {
+ x = (upper+lower)/2.0;
+ if (glaguerre(m, alpha, x) > 0.0) {
+ if (dwn)
+ lower = x;
+ else
+ upper = x;
+ }
+ else {
+ if (dwn)
+ upper = x;
+ else
+ lower = x;
+ }
+ }
+ lgroot[m][i] = (lower+upper)/2.0;
+ dwn = !dwn; // switch for next one
+ }
+ }
+} /* lgr */
+
+
+double GLaguer::glaguerre(long m, double b, double x)
+{ /* Generalized Laguerre polynomial computed recursively.
+ For use by initgammacat */
+ long i;
+ double gln, glnm1, glnp1; /* L_n, L_(n-1), L_(n+1) */
+
+ if (m == 0)
+ return 1.0;
+ else {
+ if (m == 1)
+ return 1.0 + b - x;
+ else {
+ gln = 1.0+b-x;
+ glnm1 = 1.0;
+ for (i=2; i <= m; i++) {
+ glnp1 = ((2*(i-1)+b+1.0-x)*gln - (i-1+b)*glnm1)/i;
+ glnm1 = gln;
+ gln = glnp1;
+ }
+ return gln;
+ }
+ }
+} /* glaguerre */
diff --git a/libs/phylogeny/GLaguer.h b/libs/phylogeny/GLaguer.h
new file mode 100644
index 0000000..6c6e465
--- /dev/null
+++ b/libs/phylogeny/GLaguer.h
@@ -0,0 +1,30 @@
+// $Id: GLaguer.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___GLAGUER
+#define ___GLAGUER
+
+#include "definitions.h"
+#include <vector>
+using namespace std;
+
+typedef double raterootarray[35][35];
+
+class GLaguer
+{
+public:
+ explicit GLaguer(const int pointsNum, const MDOUBLE alpha, Vdouble & points, Vdouble & weights);
+
+ void GetPhylipLaguer(const int pointsNum, MDOUBLE alf, Vdouble & points, Vdouble & weights);
+
+private:
+ void gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum);
+
+ void lgr(long m, double alpha, raterootarray lgroot);
+ double glaguerre(long m, double b, double x);
+
+
+private:
+ Vdouble _points;
+ Vdouble _weights;
+};
+
+#endif
diff --git a/libs/phylogeny/GamMixtureOptimizer.cpp b/libs/phylogeny/GamMixtureOptimizer.cpp
new file mode 100644
index 0000000..065fec4
--- /dev/null
+++ b/libs/phylogeny/GamMixtureOptimizer.cpp
@@ -0,0 +1,156 @@
+#include "GamMixtureOptimizer.h"
+#include "someUtil.h"
+#include "optGammaMixtureEM.h"
+#include "optGammaMixtureLS.h"
+
+#include <fstream>
+#include <algorithm>
+#include <ctime>
+using namespace std;
+
+
+
+GamMixtureOptimizer::GamMixtureOptimizer(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p)
+{
+ _pSc = ≻
+ _pTree = &inTree;
+ _pSp = pSp;
+ _unObservableData_p = unObservableData_p;
+ _tolOptSpecific = 0.001;
+
+}
+
+
+GamMixtureOptimizer::~GamMixtureOptimizer()
+{
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
+//The function starts form few starting points.
+//For each point it tries to optimize the likellihood doing only a small number of iterations.
+//It then picks the best points (highest likelihood) and continue the maximization for these points only.
+//This can be repeated a number of times, each cycle with a different optimization algorithm.
+//The best gammaMixture is stored in _sp and the best likelihood is returned.
+//input Parameters:
+//pointsNum: a vector with the number of points to peformed the current cycle of optimization.
+//iterNum: the number of iterations to perform in each cycle.
+//OptAlgs: the optimization algorithm to be performed in each cycle.
+//tol = for determining convergence in the maximization process.
+MDOUBLE GamMixtureOptimizer::findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector<OptimAlg> OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF/*= NULL*/)
+{
+ //make sure that the number of points in each cycle is not bigger than the previous cycle.
+ int i;
+ for (i = 0; i < pointsNum.size()-1; ++i)
+ {
+ if (pointsNum[i] < pointsNum[i+1])
+ errorMsg::reportError("input error in GamMixtureOptimizer::findBestParamManyStarts()");
+ }
+
+ //create starting distributions
+ vector<mixtureDistribution*> distVec;
+ const mixtureDistribution * pMixture = getMixtureDist();
+ for (i = 0; i < pointsNum[0]; ++i)
+ {
+ //the first distribution will be the current one
+ if (i == 0)
+ distVec.push_back(new mixtureDistribution(*pMixture));
+ else
+ distVec.push_back(new mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15));
+ }
+
+ //make a small number of iterations for all random starts
+ int numOfOptCycles = pointsNum.size();
+ Vdouble likelihoodVec;
+ for (i = 0; i < numOfOptCycles; ++i)
+ {
+ if (i != 0)
+ {
+ vector<mixtureDistribution*> tmpDistVec(0);
+ //sort results and continue optimization only with the best (pointsNum[i]) points
+ Vdouble sortedL = likelihoodVec;
+ sort(sortedL.begin(),sortedL.end());
+ MDOUBLE threshold = sortedL[sortedL.size()- pointsNum[i]];
+ for (int j = 0; j < likelihoodVec.size(); ++j)
+ {
+ if (likelihoodVec[j] >= threshold)
+ tmpDistVec.push_back(distVec[j]);
+ else
+ delete distVec[j];
+ }
+ distVec.clear();
+ distVec = tmpDistVec;
+ }
+
+ likelihoodVec.clear();
+ likelihoodVec.resize(pointsNum[i]);
+ int c;
+ for (c = 0; c < pointsNum[i]; ++c)
+ {
+ cerr <<"optimizing point " <<c<<endl;
+ MDOUBLE ll = optimizeParam(distVec[c], iterNum[i], OptAlgs[i], tols[i], pWeights, pOutF);
+ cerr<<"pointi: "<<c<<" likelihood = "<<ll<<endl;
+ likelihoodVec[c] = ll;
+ }
+ }
+
+ Vdouble sortedL = likelihoodVec;
+ sort(sortedL.begin(),sortedL.end());
+ MDOUBLE bestL = sortedL[likelihoodVec.size() - 1];
+ for (i = 0; i < likelihoodVec.size(); ++i)
+ {
+ if (bestL == likelihoodVec[i])
+ {
+ _pSp->setDistribution(distVec[i]);
+ }
+ delete distVec[i];
+ }
+ distVec.clear();
+ return bestL;
+}
+
+MDOUBLE GamMixtureOptimizer::findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ mixtureDistribution* pInDistribution = static_cast<mixtureDistribution*>(_pSp->distr());
+ return optimizeParam(pInDistribution, maxIterations, alg, tol, pWeights, pOutF);
+}
+
+
+MDOUBLE GamMixtureOptimizer::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ MDOUBLE res = 0.0;
+ switch (alg)
+ {
+ case EM: {
+ optGammaMixtureEM emOpt(*_pSp, *_pSc, *_pTree);
+ res = emOpt.optimizeParam(pInDistribution, maxIterations, tol, _tolOptSpecific, pOutF);
+ break;
+ }
+ case ONE_DIM: {
+ optGammaMixtureLS lsOpt(_pSp, *_pSc, *_pTree,MAXIMUM_ALPHA_PARAM,MAXIMUM_BETA_PARAM,_unObservableData_p);
+ res = lsOpt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::ONE_DIM);
+ MDOUBLE resRecompute = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree,*_pSc,*_pSp,pWeights,_unObservableData_p);
+ if(!DEQUAL(res,resRecompute)){
+ LOGnOUT(3,<<"--- error: different likelihood after GamMixtureOptimizer::optimizeParam,diff= "<<res-resRecompute <<"\n");
+ }
+ break;
+ }
+ //case TX_CONJUGATE_DERIVATIVES:
+ // {
+ // txGamMixtureOptimizer txOpt(_pSp, *_pSc, *_pTree);
+ // txOpt.setOptimizationParameters(tol, _tolOptSpecific, _tolOptSpecific, _tolOptSpecific);
+ // res = txOpt.optimizeParam(pInDistribution, maxIterations, pWeights, alg, pOutF);
+ // break;
+ // }
+ //case NR_CONJUGATE_DERIVATIVES:
+ // {
+ // optGammaMixtureLS opt(_pSp, *_pSc, *_pTree);
+ // res = opt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::CONJUGATE_DERIVATIVES, pOutF);
+ // break;
+ // }
+ default:
+ errorMsg::reportError("unknown optimization algorithm in GamMixtureOptimizer::optimizeParam()");
+ }
+ return res;
+}
diff --git a/libs/phylogeny/GamMixtureOptimizer.h b/libs/phylogeny/GamMixtureOptimizer.h
new file mode 100644
index 0000000..ddae901
--- /dev/null
+++ b/libs/phylogeny/GamMixtureOptimizer.h
@@ -0,0 +1,52 @@
+#ifndef __GAMMIXTURE_OPTIMIZER
+#define __GAMMIXTURE_OPTIMIZER
+/************************************************************
+GamMixtureOptimizer class is used to find the best Gamma mixture parameters.
+The parameters to otimized are the alpha and beta of each component and the components probabilities.
+The optimizer can choose between several optimization algorithms (EM, ConjugateDerivatives, etc).
+The interface to the optimizer is the functions:
+1. findBestParam() = given a gammaMixture - finds the best parameters.
+2. findBestParamManyStarts() - finds the best parameters but starts from many initial points.
+3. SetOptAlg() - choose the optimization algorithm to be used.
+************************************************************/
+#include "definitions.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include "mixtureDistribution.h"
+#include "unObservableData.h"
+
+
+
+class GamMixtureOptimizer{
+public:
+ enum OptimAlg {EM, ONE_DIM, TX_CONJUGATE_DERIVATIVES, NR_CONJUGATE_DERIVATIVES};
+public:
+
+ explicit GamMixtureOptimizer(stochasticProcess* cur_sp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p = NULL);
+ virtual ~GamMixtureOptimizer();
+
+ const stochasticProcess* getSp() const {return _pSp;}
+ const mixtureDistribution* getMixtureDist() const {return static_cast<mixtureDistribution*>(_pSp->distr());}
+
+ MDOUBLE findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector<OptimAlg> OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF = NULL);
+ //return the logLikelihood. the final distribution is stored in the stochasticProcess
+ MDOUBLE findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
+
+ void setTolOptSpecific(const MDOUBLE tol) {_tolOptSpecific = tol;}
+
+private:
+ MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF);
+
+
+private:
+ stochasticProcess* _pSp;
+ const sequenceContainer* _pSc;
+ const tree* _pTree;
+ unObservableData* _unObservableData_p;
+
+ MDOUBLE _tolOptSpecific; //tolerance specific to the optimization algorithm
+};
+
+#endif
+
diff --git a/libs/phylogeny/HIVb.dat.q b/libs/phylogeny/HIVb.dat.q
new file mode 100644
index 0000000..e7cebf0
--- /dev/null
+++ b/libs/phylogeny/HIVb.dat.q
@@ -0,0 +1,24 @@
+""
+"0.16315391 "
+"0.0026528488 0.15680618 "
+"0.77200021 0.0026528488 9.3704985 "
+"0.065662251 0.18661252 0.045663061 0.0026528488 "
+"0.029241185 1.8153444 0.35657046 0.0026528488 0.0026528488 "
+"0.7859595 0.039751241 0.042054709 5.6172481 0.0026528488 1.3583647 "
+"1.1329574 1.9384101 0.17158679 1.5057888 0.47638319 0.032849536 2.0839453 "
+"0.044971782 4.796584 4.0566567 1.0170492 0.12737547 3.7434084 0.063530422 0.0026528488 "
+"0.0026528488 0.35934906 0.3610872 0.0093800488 0.0026528488 0.0026528488 0.0032315889 0.0026528488 0.054707578 "
+"0.11420832 0.37215595 0.0026528488 0.0046480457 0.068855751 0.79296833 0.0026528488 0.0026528488 0.92409864 3.1615537 "
+"0.0026528488 10.850151 4.1938515 0.0026528488 0.0026528488 3.4738365 2.4484839 0.27680089 0.0026528488 0.17101271 0.04324117 "
+"0.009902713 1.3338205 0.0026528488 0.0026528488 0.0026528488 0.1611213 0.093268326 0.0026528488 0.0026528488 5.9458299 2.8224242 0.68043448 "
+"0.0074953058 0.0026528488 0.0026528488 0.0026528488 4.9333171 0.0026528488 0.0026528488 0.15469345 0.077228672 1.803067 4.5230222 0.018180397 0.099760378 "
+"1.1259592 0.68101281 0.0039239772 0.018180397 0.0026528488 2.3727663 0.0063788279 0.0026528488 1.3015831 0.021784823 1.1022958 0.016652568 0.0026528488 0.0026528488 "
+"1.3085601 1.8459052 6.9741802 0.28026286 2.4900381 0.061711098 0.0026528488 2.324113 0.20307398 0.64624988 0.49218621 0.26746605 0.0026528488 0.50747511 2.8532025 "
+"8.4457685 1.5220348 3.6538588 0.14576024 0.39260517 0.12924096 0.15374532 0.19610654 0.37755025 4.5693569 0.023221606 2.4785142 2.6211525 0.0074953058 1.0686577 4.7385556 "
+"0.0026528488 0.52597396 0.0026528488 0.0026528488 1.3968681 0.014142867 0.0026528488 0.64556544 0.036884095 0.0026528488 0.39731344 0.0026528488 0.047262092 0.44002431 0.023584144 0.013196755 0.0026528488 "
+"0.0026528488 0.0052623288 0.93601524 0.35795048 4.0213579 0.059971891 0.042054709 0.0026528488 9.9186301 0.078613459 0.059416384 0.0026528488 0.0026528488 8.13894 0.016149535 0.34382193 0.056055755 0.67924601 "
+"4.0399067 0.043106352 0.014142867 0.55599996 0.22285362 0.011097026 0.54567507 0.50571521 0.0026528488 9.4117238 0.74829436 0.14104083 3.6361006 0.38374731 0.0026528488 0.039751241 0.37629386 0.0026528488 0.021784823 "
+
+
+
+"0.060490222 0.066039665 0.044127815 0.042109048 0.020075899 0.053606488 0.071567447 0.072308239 0.022293943 0.069730629 0.098851122 0.056968211 0.019768318 0.028809447 0.046025282 0.05060433 0.053636813 0.033011601 0.028350243 0.061625237 "
diff --git a/libs/phylogeny/HIVw.dat.q b/libs/phylogeny/HIVw.dat.q
new file mode 100644
index 0000000..3af617c
--- /dev/null
+++ b/libs/phylogeny/HIVw.dat.q
@@ -0,0 +1,23 @@
+""
+"0.021810606 "
+"0.18082842 0.046923924 "
+"1.2987859 0.019752881 8.6119047 "
+"0.049094712 0.83857481 0.017714543 0.0014641764 "
+"0.0014641764 3.1258994 0.10016958 0.0014641764 0.0014641764 "
+"1.6291158 0.0073686726 0.059013922 3.5501299 0.0014641764 0.93899388 "
+"0.54716271 3.9350911 0.017714543 3.0445791 0.014343013 0.017714543 4.3281346 "
+"0.0014641764 2.0041793 2.5180202 0.67873067 0.0014641764 5.4310694 0.0014641764 0.0014641764 "
+"0.0014641764 0.39260132 0.28903662 0.042497426 0.0014641764 0.010022346 0.011435569 0.0014641764 0.0014641764 "
+"0.046923924 0.17182315 0.0014641764 0.0014641764 0.0014641764 0.8464345 0.038021439 0.014343013 0.51650871 2.6655214 "
+"0.17358807 11.681111 3.1232346 0.26188639 0.0014641764 3.8275035 7.0170946 0.081825497 0.065612672 0.23938727 0.0014641764 "
+"0.0014641764 0.96240899 0.059013922 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.014343013 0.0014641764 5.0679244 3.3336075 1.1993479 "
+"0.17509295 0.0014641764 0.0014641764 0.0014641764 0.1062872 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.43423957 2.1926949 0.0014641764 0.0014641764 "
+"0.29570799 0.11851717 0.10098366 0.0014641764 0.0014641764 0.89168927 0.0014641764 0.0014641764 4.0834122 0.0014641764 2.8788489 0.032776467 0.0014641764 0.010022346 "
+"2.5166849 2.4452448 4.2665807 0.12529865 0.32854654 0.046923924 0.0014641764 1.838906 0.21235155 0.21672475 1.7991682 0.0014641764 0.11495981 1.2531563 4.1726098 "
+"7.0696878 0.27181058 1.3300754 0.18460189 0.0014641764 0.059472209 0.13433613 0.014343013 0.28099302 2.7419485 0.0014641764 1.185403 2.170826 0.033533153 1.2700295 1.856807 "
+"0.0014641764 1.7469498 0.0014641764 0.0014641764 1.6102836 0.012981329 0.0014641764 0.82749392 0.0014641764 0.0014641764 0.40127511 0.0014641764 0.0014641764 0.0014641764 0.0014641764 0.32257563 0.0014641764 "
+"0.0014641764 0.0014641764 1.4831375 0.66811539 2.4446914 0.0014641764 0.0014641764 0.0014641764 13.906425 0.033533153 0.0014641764 0.0014641764 0.16960961 1.2086132 0.0014641764 0.27325689 0.14366733 0.0014641764 "
+"7.2650675 0.081825497 0.021810606 0.85445233 0.0014641764 0.0014641764 0.64409704 0.81883185 0.24231504 7.2690793 0.86487141 0.037501949 4.3246792 0.66766443 0.0014641764 0.25261054 0.0014641764 0.0014641764 0.39673909 "
+
+
+"0.0377494 0.057321 0.0891129 0.0342034 0.0240105 0.0437824 0.0618606 0.0838496 0.0156076 0.0983641 0.0577867 0.0641682 0.0158419 0.0422741 0.0458601 0.0550846 0.0813774 0.019597 0.0205847 0.0515639 "
diff --git a/libs/phylogeny/LG.dat.q b/libs/phylogeny/LG.dat.q
new file mode 100644
index 0000000..e92e039
--- /dev/null
+++ b/libs/phylogeny/LG.dat.q
@@ -0,0 +1,23 @@
+" 0.425093 "
+" 0.276818 0.751878 "
+" 0.395144 0.123954 5.076149 "
+" 2.489084 0.534551 0.528768 0.062556 "
+" 0.969894 2.807908 1.695752 0.523386 0.084808 "
+" 1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 "
+" 2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 "
+" 0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 "
+" 0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 "
+" 0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 "
+" 0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 "
+" 1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 "
+" 0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 "
+" 1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 "
+" 4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 "
+" 2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 "
+" 0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 "
+" 0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 "
+" 2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 "
+
+" 0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 "
+" 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147 "
+" Si Quang Le and Olivier Gascuel (LG) matrix "
diff --git a/libs/phylogeny/Makefile b/libs/phylogeny/Makefile
new file mode 100644
index 0000000..3c844dc
--- /dev/null
+++ b/libs/phylogeny/Makefile
@@ -0,0 +1,231 @@
+#! /usr/local/bin/gmake
+# $Id: Makefile 6111 2009-04-26 14:08:30Z cohenofi $
+# makfile for yaep5
+
+
+
+# use LOGREP=t or DOUBLEREP=t to activate logRep or doubleRep respectively (or setenv DOUBLEREP in the shell)
+#DOUBLEREP=t
+#LOGREP=t
+
+
+Libsources= AddLog.cpp NNiProp.cpp NNiSep.cpp Nni.cpp aaJC.cpp \
+ allTrees.cpp allTreesSeparateModel.cpp alphabet.cpp amino.cpp \
+ bestAlpha.cpp bestAlphaManyTrees.cpp bestHKYparam.cpp bootstrap.cpp \
+ bblEM.cpp bblEMfixRoot.cpp bblEMProprtional.cpp bblEMSeperate.cpp \
+ chebyshevAccelerator.cpp clustalFormat.cpp codon.cpp codonJC.cpp \
+ computeCounts.cpp computeDownAlg.cpp computeMarginalAlg.cpp \
+ computePijComponent.cpp computeUpAlg.cpp computeUpAlgFactors.cpp \
+ computeSubstitutionCounts.cpp \
+ computePosteriorExpectationOfSubstitutions.cpp \
+ computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp \
+ ConversionUtils.cpp countTableComponent.cpp datMatrixHolder.cpp distanceTable.cpp \
+ distribution.cpp errorMsg.cpp evaluateCharacterFreq.cpp \
+ fastStartTree.cpp fastaFormat.cpp findRateOfGene.cpp \
+ fromCountTableComponentToDistance.cpp fromCountTableComponentToDistancefixRoot.cpp \
+ fromCountTableComponentToDistanceProp.cpp fromQtoPt.cpp \
+ generalGammaDistributionFixedCategories.cpp gammaDistribution.cpp gammaUtilities.cpp \
+ generalGammaDistribution.cpp getRandomWeights.cpp goldmanYangModel.cpp \
+ granthamChemicalDistances.cpp hky.cpp khTest.cpp likeDist.cpp likeDistfixRoot.cpp \
+ likeDistProp.cpp likelihoodComputation.cpp \
+ likelihoodComputationFactors.cpp logFile.cpp maseFormat.cpp \
+ molphyFormat.cpp nexusFormat.cpp nj.cpp njConstrain.cpp \
+ nucJC.cpp nucleotide.cpp numRec.cpp Parameters.cpp phylipFormat.cpp \
+ pijAccelerator.cpp readDatMatrix.cpp readTree.cpp recognizeFormat.cpp \
+ replacementModel.cpp searchStatus.cpp seqContainerTreeMap.cpp \
+ sequence.cpp sequenceContainer.cpp simulateTree.cpp \
+ siteSpecificRate.cpp someUtil.cpp split.cpp splitMap.cpp \
+ splitTreeUtil.cpp stochasticProcess.cpp suffStatComponent.cpp \
+ talRandom.cpp tree.cpp treeIt.cpp treeUtil.cpp uniDistribution.cpp \
+ uniformDistribution.cpp cmdline2EvolObjs.cpp \
+ generalGammaDistributionLaguerre.cpp gammaDistributionLaguerre.cpp GLaguer.cpp \
+ givenRatesMLDistance.cpp distanceBasedSeqs2Tree.cpp \
+ posteriorDistance.cpp pairwiseGammaDistance.cpp doubleRep.cpp \
+ logRep.cpp indel.cpp indelModel.cpp mulAlphabet.cpp \
+ replacementModelSSRV.cpp stochasticProcessSSRV.cpp bestAlphaAndNu.cpp \
+ C_evalParamUSSRV.cpp matrixUtils.cpp betaOmegaDistribution.cpp \
+ betaUtilities.cpp betaDistribution.cpp geneticCodeHolder.cpp \
+ samplingSequences.cpp bblEM2USSRV.cpp bestParamUSSRV.cpp \
+ likeDist2USSRV.cpp ussrvModel.cpp likelihoodComputation2USSRV.cpp \
+ fromCountTableComponentToDistance2USSRV.cpp normalDist.cpp \
+ tamura92.cpp bestTamura92param.cpp phylipSequentialFormat.cpp \
+ simulateCodonsJumps.cpp \
+ simulateJumpsAbstract.cpp \
+ ssrvDistanceSeqs2Tree.cpp multipleStochasticProcess.cpp distributionPlusInvariant.cpp\
+ gammaDistributionFixedCategories.cpp generalGammaDistributionPlusInvariant.cpp gammaDistributionPlusInvariant.cpp \
+ distributionPlusCategory.cpp simulateJumps.cpp computeJumps.cpp seqeuncesFilter.cpp \
+ optGammaMixtureLS.cpp mixtureDistribution.cpp suffStatGammaMixture.cpp GamMixtureOptimizer.cpp optGammaMixtureEM.cpp gainLossAlphabet.cpp \
+ wYangModel.cpp codonUtils.cpp likelihoodComputation2Codon.cpp likeDist2Codon.cpp unObservableData.cpp likelihoodComputationGL.cpp \
+ threeStateModel.cpp threeStateAlphabet.cpp betaDistributionFixedCategories.cpp betaDistributionFixedCategoriesWithOmegaUniform.cpp \
+ bblEM2codon.cpp bestAlphaAndK.cpp fromCountTableComponentToDistance2Codon.cpp\
+ gtrModel.cpp bestGtrModelParams.cpp
+
+# do not use: fromInstructionFile.cpp, simulateSequnce.cpp split.save.cpp
+
+
+# LibCsources= cmdline.c
+# LibCsources += getopt.c getopt1.c
+
+EXEC =
+#TEST_EXEC_SUB = split_test splitMap_test bootstrap_test
+TEST_EXEC = $(addprefix tests/,$(TEST_EXEC_SUB))
+LIB = libEvolTree.a
+DEBUGLIB = $(LIB:.a=Debug.a)
+DOUBLEREPLIB = $(LIB:.a=DoubleRep.a)
+
+
+#CC=g++
+CXX=g++
+CC=$(CXX)
+
+#requres 2.13, but may work with 2.11
+GENGETOPT = gengetopt
+# osX/tiger
+#GENGETOPT = /opt/local/bin/gengetopt
+
+.SECONDARY: semphy_cmdline.c semphy_cmdline.h
+
+#LDFLAGS=
+
+CPPFLAGS= -O3 -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32
+CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32 -DVERBOS
+#CPPFLAGSDOU= $(CPPFLAGS)
+#-pg
+
+
+#CPPFLAGS+= -I/usr/include/g++-v3
+#CPPFLAGS+= -DLOG -DLOGCLS -DMEMCHK
+
+# sources
+sources= $(Libsources) $(LibCsources) $(addsuffix .cpp,$(EXEC) $(TEST_EXEC))
+
+.PHONY: tests lib test debug %.debug
+.PHONY: dat DOUBLEREP doubleRep
+
+all: lib $(EXEC)
+
+test: all tests
+ +cd tests; make -k test
+
+ifdef DOUBLEREP
+CPPFLAGS+= -DLOGREP
+CPPFLAGSDEBUG += -DLOGREP
+LDFLAGSDEBUG += -DLOGREP
+endif
+
+
+debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-32
+debug: $(DEBUGLIB)
+pl:
+ @echo "lib ="$(LIB)
+ @echo "debug="$(DEBUGLIB)
+#debug: all
+# cp libEvolTree.a libEvolTreeDebug.a
+
+# <<<<<<< Makefile
+# %.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -DLOG -ftemplate-depth-25
+# % debug: LIB = libEvolTreeDebug.a
+# %.debug: %
+# @echo "made \""$(*)"\" in debug mode"
+
+# =======
+#>>>>>>> 2.34
+
+lib: $(LIB)
+
+$(LIB): $(Libsources:.cpp=.o) $(LibCsources:.c=.o)
+ ar rv $@ $?
+ ranlib $@
+
+tags: *.cpp *.h
+ etags --members --language=c++ $^
+
+$(EXEC) $(TEST_EXEC): $(LIB)
+tests: $(TEST_EXEC)
+
+-include make.dep
+
+install:
+ cd ../fast; make -f Makefile.lib install_do
+
+
+clean:
+ -rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) *.o
+
+
+ifneq ($(wildcard make.dep), make.dep)
+ make.dep: depend
+endif
+
+depend makedep: _make.dep
+ @mv -f _make.dep make.dep
+
+_make.dep: $(sources)
+ @echo making depend
+# $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] || rm -f $@'
+ @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed "s/\(^[^.]*\)\.o/\1.o \1.debug.o/g" > $@'
+_fast:
+ cd ../fast;make -f Makefile.lib -k all
+
+fast.% _fast.%:
+ cd ../fast;make -f Makefile.lib -k $(*)
+
+
+simulateSequnce: simulateSequnce_cmdline.o
+
+
+evolObjsTest.ggo: evolObjs.header evolObjs.args
+ cat $^ > $@
+
+
+# commandline (gengetopts)
+%_cmdline.h %_cmdline.c: %.ggo
+ $(GENGETOPT) -i$< -F$(*)_cmdline
+
+%.dat.q: %.dat
+ awk 'BEGIN{RS="[\n\r]+";};{print "\" "$$0" \"\r"}' $< > $@
+# cat $@
+
+DAT = cpREV45.dat.q dayhoff.dat.q jones.dat.q mtREV24.dat.q wag.dat.q HIVb.dat.q HIVw.dat.q
+
+dat: $(DAT)
+
+cleandat:
+ rm $(DAT)
+
+datMatrixHolder.o: $(DAT)
+
+.PRECIOUS: $(DAT)
+
+debug: LIB = $(DEBUGLIB)
+
+%.debug: CPPFLAGS = $(CPPFLAGSDEBUG)
+%.debug: %
+ @echo "made \""$(*)"\" in debug mode"
+
+
+%.debug.o: %.c
+ $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@
+
+%.debug.o: %.cpp
+ $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@
+
+$(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
+ ar rv $@ $?
+ ranlib $@
+
+doubleRep: LOGREP=t
+doubleRep: CPPFLAGS+= -DLOGREP
+doubleRep: $(DOUBLEREPLIB)
+
+%.doubleRep.o: %.c
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
+%.doubleRep.o: %.cpp
+ $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
+
+$(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o)
+ ar rv $@ $?
+ ranlib $@
+
+# DO NOT DELETE
diff --git a/libs/phylogeny/NNiProp.cpp b/libs/phylogeny/NNiProp.cpp
new file mode 100644
index 0000000..3657d42
--- /dev/null
+++ b/libs/phylogeny/NNiProp.cpp
@@ -0,0 +1,139 @@
+// $Id: NNiProp.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "treeIt.h"
+#include "treeUtil.h"
+#include "NNiProp.h"
+#include "bblEM.h"
+#include "bblEMProportional.h"
+#include "logFile.h"
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+using namespace std;
+
+NNiProp::NNiProp(vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<char>* nodeNotToSwap):_nodeNotToSwap(nodeNotToSwap),
+ _sc(sc),_sp(sp),_weights(weights) {
+ _bestScore = VERYSMALL;
+ _treeEvaluated =-1;
+ _out = NULL;
+
+}
+
+void NNiProp::setOfstream(ostream* out) {
+ _out = out;
+}
+
+tree NNiProp::NNIstep(tree et) {
+ et.create_names_to_internal_nodes();
+ _bestScore = evalTree(et);
+ _bestTree = et;
+ treeIterTopDown tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isLeaf() || mynode->isRoot()) continue; // swaping only internal nodes
+
+ if (_nodeNotToSwap) {
+ if ((*_nodeNotToSwap)[mynode->id()]) {
+ continue;
+ }
+ }
+ tree newT1 = NNIswap1(et,mynode);
+ tree newT2 = NNIswap2(et,mynode);
+ MDOUBLE treeScore1 = evalTree(newT1);
+ if (treeScore1 > _bestScore) {
+ _bestTree = newT1;
+ _bestScore = treeScore1;
+ LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
+ if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
+ _bestTree.output(*_out);
+
+ }
+ MDOUBLE treeScore2 = evalTree(newT2);
+ if (treeScore2 > _bestScore) {
+ _bestTree = newT2;
+ _bestScore = treeScore2;
+ LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
+ if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
+ _bestTree.output(*_out);
+ }
+ }
+ return _bestTree;
+}
+
+tree NNiProp::NNIswap1(tree et,tree::nodeP mynode) {
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+#ifdef VERBOS
+ LOG(5,<<"b4 swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
+
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ return et;
+}
+
+tree NNiProp::NNIswap2(tree et,tree::nodeP mynode) {
+#ifdef VERBOS
+ LOG(5,<<"b4 swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ return et;
+
+}
+
+MDOUBLE NNiProp::evalTree(tree& et) {
+#ifdef VERBOS
+ LOG(5,<<"b4 bbl in alltrees"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ bblEMProportional bblEMprop1(et,_sc,_sp,_weights);
+ MDOUBLE res = bblEMprop1.getTreeLikelihood();
+// MDOUBLE res = 12;
+ _treeEvaluated++;
+ // cerr.precision(5);
+ _out->precision(5);
+
+ if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl);
+ if ((_out)&&(_treeEvaluated)) (*_out)<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl;
+ return res;
+}
+
+
+
+
diff --git a/libs/phylogeny/NNiProp.h b/libs/phylogeny/NNiProp.h
new file mode 100644
index 0000000..497f417
--- /dev/null
+++ b/libs/phylogeny/NNiProp.h
@@ -0,0 +1,39 @@
+// $Id: NNiProp.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___NNI_PROP
+#define ___NNI_PROP
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "definitions.h"
+#include "stochasticProcess.h"
+#include <vector>
+using namespace std;
+
+class NNiProp {
+public:
+ explicit NNiProp(vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<char>* nodeNotToSwap);
+
+ tree NNIstep(tree et);
+ MDOUBLE bestScore(){ return _bestScore;}
+ void setOfstream(ostream* out);
+private:
+ ostream* _out;
+ vector<char> * _nodeNotToSwap;
+private:
+ tree _bestTree;
+ MDOUBLE _bestScore;
+ vector<sequenceContainer>& _sc;
+ vector<stochasticProcess>& _sp;
+ const vector<Vdouble *> * _weights;
+
+ MDOUBLE evalTree(tree& et);
+ tree NNIswap1(tree et,tree::nodeP mynode);
+ tree NNIswap2(tree et,tree::nodeP mynode);
+ int _treeEvaluated;
+
+};
+#endif
diff --git a/libs/phylogeny/NNiSep.cpp b/libs/phylogeny/NNiSep.cpp
new file mode 100644
index 0000000..5985425
--- /dev/null
+++ b/libs/phylogeny/NNiSep.cpp
@@ -0,0 +1,174 @@
+// $Id: NNiSep.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "treeIt.h"
+#include "treeUtil.h"
+#include "NNiSep.h"
+#include "bblEM.h"
+#include "logFile.h"
+#include "bblEMSeperate.h"
+
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+
+using namespace std;
+
+NNiSep::NNiSep(vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<char>* nodeNotToSwap): _nodeNotToSwap(nodeNotToSwap),
+ _sc(sc),_sp(sp),_weights(weights) {
+ _bestTrees.resize(sc.size());
+ _bestScore=VERYSMALL;
+ _treeEvaluated =-1;
+
+}
+
+void NNiSep::setOfstream(ostream* out) {
+ _out = out;
+}
+
+
+vector<tree> NNiSep::NNIstep(vector<tree> et) {
+ const int nGene = et.size();
+ int z;
+ for (z=0; z < nGene; ++z) {
+ et[z].create_names_to_internal_nodes();
+ }
+ _bestTrees = et;
+ _bestScore = evalTrees(_bestTrees);
+
+ treeIterTopDown tIt(et[0]);
+
+ vector<tree::nodeP> mynode(nGene);
+ mynode[0] = tIt.first();
+ for (z=1; z < nGene; ++z ) {
+ mynode[z] = et[z].findNodeByName(mynode[0]->name());
+ }
+
+ while (mynode[0] != tIt.end()) {
+ bool haveToBeChecked = true;
+ if ((mynode[0]->isLeaf() || mynode[0]->isRoot())) haveToBeChecked = false;
+ if (_nodeNotToSwap) {
+ if ((*_nodeNotToSwap)[mynode[0]->id()]) {
+ haveToBeChecked = false;
+ }
+ }
+
+ if (haveToBeChecked) { // swaping only internal nodes that are not "fixed"
+ for (z=1; z < nGene; ++z ) {
+ mynode[z] = et[z].findNodeByName(mynode[0]->name());
+ }
+
+ vector<tree> newT1;
+ vector<tree> newT2;
+
+ for (z=0; z < nGene; ++z ) {
+ newT1.push_back(NNIswap1(et[z],mynode[z]));
+ newT2.push_back(NNIswap2(et[z],mynode[z]));
+ }
+ MDOUBLE treeScore1 = evalTrees(newT1);
+ if (treeScore1 > _bestScore) {
+ _bestTrees = newT1;
+ _bestScore = treeScore1;
+ LOG(5,<<"new Best Trees: "<<_bestScore<<endl);
+ if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
+ if (_out) (*_out)<<"tree topology (of gene 1 in case of many genes): "<<endl;
+ _bestTrees[0].output(*_out);
+ }
+ MDOUBLE treeScore2 = evalTrees(newT2);
+ if (treeScore2 > _bestScore) {
+ _bestTrees = newT2;
+ _bestScore = treeScore2;
+ LOG(5,<<"new Best Trees: "<<_bestScore<<endl);
+ if (_out) (*_out)<<"new Best Tree: "<<_bestScore<<endl;
+ if (_out) (*_out)<<"tree topology (of gene 1 in case of many genes): "<<endl;
+ _bestTrees[0].output(*_out);
+ }
+ }
+ //nextloop:
+ mynode[0] = tIt.next();
+ }
+ return _bestTrees;
+}
+
+tree NNiSep::NNIswap1(tree et,tree::nodeP mynode) {
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+#ifdef VERBOS
+ LOG(5,<<"b4 swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
+
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ return et;
+}
+
+tree NNiSep::NNIswap2(tree et,tree::nodeP mynode) {
+#ifdef VERBOS
+ LOG(5,<<"b4 swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree) nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ return et;
+
+}
+
+
+
+
+
+MDOUBLE NNiSep::evalTrees(vector<tree>& et) {
+#ifdef VERBOS
+ LOG(5,<<"b4 bbl in alltrees"<<endl);
+ for (vector<tree>::const_iterator i=et.begin();i!=et.end();++i)
+ LOGDO(5,i->output(myLog::LogFile()));
+#endif
+ bblEMSeperate bblemsep1(et,_sc,_sp,_weights);
+ MDOUBLE res = bblemsep1.getTreeLikelihood();
+ _treeEvaluated++;
+ LOG(5,.precision(5));
+ _out->precision(5);
+
+
+ if (_treeEvaluated) LOG(5,<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl);
+ if ((_out)&&(_treeEvaluated)) (*_out)<<"tree: "<<_treeEvaluated<< "score = "<<res<<endl;
+ return res;
+}
+
+
+
+
diff --git a/libs/phylogeny/NNiSep.h b/libs/phylogeny/NNiSep.h
new file mode 100644
index 0000000..0ef1e6f
--- /dev/null
+++ b/libs/phylogeny/NNiSep.h
@@ -0,0 +1,40 @@
+// $Id: NNiSep.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___NNI_SEP
+#define ___NNI_SEP
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "definitions.h"
+#include "stochasticProcess.h"
+#include <vector>
+using namespace std;
+
+class NNiSep {
+public:
+ explicit NNiSep(vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<char>* nodeNotToSwap);
+
+ vector<tree> NNIstep(vector<tree> et);
+ MDOUBLE bestScore(){ return _bestScore;}
+ void setOfstream(ostream* out);
+
+private:
+ vector<char>* _nodeNotToSwap;
+ vector<tree> _bestTrees;
+ MDOUBLE _bestScore;
+ vector<sequenceContainer>& _sc;
+ vector<stochasticProcess>& _sp;
+ const vector<Vdouble *> * _weights;
+
+ MDOUBLE evalTrees(vector<tree>& et);
+ tree NNIswap1(tree et,tree::nodeP mynode);
+ tree NNIswap2(tree et,tree::nodeP mynode);
+ int _treeEvaluated;
+ ostream* _out;
+
+};
+#endif
diff --git a/libs/phylogeny/Nni.cpp b/libs/phylogeny/Nni.cpp
new file mode 100644
index 0000000..1127336
--- /dev/null
+++ b/libs/phylogeny/Nni.cpp
@@ -0,0 +1,119 @@
+// $Id: Nni.cpp 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+#include "definitions.h"
+#include "treeUtil.h"
+#include "treeIt.h"
+#include "Nni.h"
+#include "bblEM.h"
+#include "logFile.h"
+#include <algorithm>
+#include <iostream>
+using namespace std;
+
+NNI::NNI(const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights): _sc(sc),_sp(sp),_weights(weights) {
+ _bestScore = VERYSMALL;
+}
+
+
+tree NNI::NNIstep(tree et) {
+ et.create_names_to_internal_nodes();
+ treeIterTopDown tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isLeaf() || mynode->isRoot()) continue; // swaping only internal nodes
+ tree newT1 = NNIswap1(et,mynode);
+ tree newT2 = NNIswap2(et,mynode);
+ MDOUBLE treeScore1 = evalTree(newT1,_sc);
+ MDOUBLE treeScore2 = evalTree(newT2,_sc);
+ if (treeScore1 > _bestScore) {
+ _bestTree = newT1;
+ _bestScore = treeScore1;
+ LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+ }
+ if (treeScore2 > _bestScore) {
+ _bestTree = newT2;
+ _bestScore = treeScore2;
+ LOG(5,<<"new Best Tree: "<<_bestScore<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+ }
+ }
+ return _bestTree;
+}
+
+tree NNI::NNIswap1(tree et,tree::nodeP mynode) {
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+#ifdef VERBOS
+ LOG(5,<<"b4 swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree)
+ nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(0);
+
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap1"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ return et;
+}
+
+tree NNI::NNIswap2(tree et,tree::nodeP mynode) {
+#ifdef VERBOS
+ LOG(5,<<"b4 swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ tree::nodeP mynodeInNewTree = et.findNodeByName(mynode->name());
+
+
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP nodeToSwap1 = mynodeInNewTree->father()->getSon(0);
+ // it might be me
+ if (nodeToSwap1 == mynodeInNewTree)
+ nodeToSwap1 = mynodeInNewTree->father()->getSon(1);
+ tree::nodeP nodeToSwap2 = mynodeInNewTree->getSon(1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap1);
+ et.removeNodeFromSonListOfItsFather(nodeToSwap2);
+ nodeToSwap2->setFather(fatherNode);
+ fatherNode->setSon(nodeToSwap2);
+ nodeToSwap1->setFather(mynodeInNewTree);
+ mynodeInNewTree->setSon(nodeToSwap1);
+#ifdef VERBOS
+ LOG(5,<<"after swap2"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif //VERBOS
+ return et;
+
+}
+
+
+
+
+
+MDOUBLE NNI::evalTree(tree& et,const sequenceContainer& sc) {
+#ifdef VERBOS
+ LOG(5,<<"b4 bbl in alltrees"<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ bblEM bblEM1(et,sc,_sp,_weights);
+ MDOUBLE res = bblEM1.getTreeLikelihood();
+ return res;
+}
+
+
+
+
diff --git a/libs/phylogeny/Nni.h b/libs/phylogeny/Nni.h
new file mode 100644
index 0000000..17746d9
--- /dev/null
+++ b/libs/phylogeny/Nni.h
@@ -0,0 +1,32 @@
+// $Id: Nni.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___NNI
+#define ___NNI
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include <vector>
+using namespace std;
+
+class NNI {
+public:
+ explicit NNI(const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights);
+
+ tree NNIstep(tree et);
+ MDOUBLE bestScore(){ return _bestScore;}
+
+private:
+ tree _bestTree;
+ MDOUBLE _bestScore;
+ const sequenceContainer& _sc;
+ const stochasticProcess& _sp;
+ const Vdouble * _weights;
+ MDOUBLE evalTree(tree& et,const sequenceContainer& sd);
+ tree NNIswap1(tree et,tree::nodeP mynode);
+ tree NNIswap2(tree et,tree::nodeP mynode);
+};
+#endif
diff --git a/libs/phylogeny/Parameters.cpp b/libs/phylogeny/Parameters.cpp
new file mode 100644
index 0000000..1a263a2
--- /dev/null
+++ b/libs/phylogeny/Parameters.cpp
@@ -0,0 +1,361 @@
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include "Parameters.h"
+#include "ConversionUtils.h"
+#include <cstdio>
+#include <cstdlib>
+
+using namespace std;
+
+typedef Parameters::ParamType ParamType;
+
+class Parameter
+{
+public:
+
+ Parameter();
+ Parameter(const string& name, const int val);
+ Parameter(const string& name, const float val);
+ Parameter(const string& name, const string& val);
+ Parameter(const Parameter& param);
+
+ void dump(FILE* outputFile) const;
+
+ ~Parameter() {}
+ const string& paramLabel() const;
+ ParamType paramType() const;
+
+ int intValue() const;
+ float floatValue() const;
+ const string& stringValue() const;
+
+ Parameter& operator=(const Parameter& param);
+
+ friend bool operator<(const Parameter& p, const Parameter& q);
+ friend ostream& operator<<(ostream& out, const Parameter& p);
+
+private:
+ string paramName;
+ ParamType type;
+ union {
+ int i;
+ float f;
+ };
+ string s;
+};
+
+typedef vector<Parameter> ParamList;
+
+static ParamList paramList;
+
+Parameter::Parameter() : paramName(), type(Parameters::Undef)
+{}
+
+Parameter::Parameter(const string& name, const int val)
+{
+ paramName = name;
+ i = val;
+ type = Parameters::Int;
+}
+
+Parameter::Parameter(const string& name, const float val)
+{
+ paramName = name;
+ f = val;
+ type = Parameters::Float;
+}
+
+Parameter::Parameter(const string& name, const string& val)
+{
+ paramName = name;
+ s = val;
+ type = Parameters::Str;
+}
+Parameter::Parameter(const Parameter& param)
+{
+ paramName = param.paramName;
+ type = param.type;
+ if (type == Parameters::Int)
+ i = param.i;
+ else
+ f = param.f;
+ s = param.s;
+}
+
+
+const string& Parameter::paramLabel() const
+{
+ return paramName;
+}
+
+ParamType Parameter::paramType() const
+{
+ return type;
+}
+
+int Parameter::intValue() const
+{
+ return i;
+}
+
+float Parameter::floatValue() const
+{
+ return f;
+}
+
+const string& Parameter::stringValue() const
+{
+ return s;
+}
+
+Parameter& Parameter::operator=(const Parameter& param)
+{
+ paramName = param.paramName;
+ type = param.type;
+ if (type == Parameters::Int)
+ i = param.i;
+ else
+ f = param.f;
+ s = param.s;
+ return *this;
+}
+
+bool operator<(const Parameter& p, const Parameter& q)
+{
+ return (p.paramName < q.paramName);
+}
+
+ostream& operator<<(ostream& out, const Parameter& p) {
+ switch(p.type) {
+ case Parameters::Int:
+ return out << p.paramName << '\t' << "(Int)" << '\t' << p.i;
+ case Parameters::Float:
+ return out << p.paramName << '\t' << "(Float)" << '\t' << p.f;
+ case Parameters::Str:
+ return out << p.paramName << '\t' << "(Str)" << '\t' << p.s;
+ case Parameters::Undef:
+ break;
+ }
+ return out << '\n';
+}
+
+
+void Parameter::dump(FILE* outputFile) const {
+ switch(type) {
+ case Parameters::Int:
+ fprintf(outputFile, "%s = %d", paramName.c_str(), i);
+ case Parameters::Float:
+ fprintf(outputFile, "%s = %f", paramName.c_str(), f);
+ case Parameters::Str:
+ fprintf(outputFile, "%s = %s", paramName.c_str(), s.c_str());
+ case Parameters::Undef:
+ break;
+ }
+}
+
+
+ParamList::iterator findInsertionPoint(ParamList& paramList,
+ const string& paramName)
+{
+ unsigned short start = 0;
+ unsigned short stop = paramList.size();
+ while (stop != start) {
+ unsigned short pos = start + (stop-start)/2;
+ int comp = paramName.compare(paramList[pos].paramLabel());
+ if (comp == 0)
+ stop = start = pos;
+ else if (comp > 0)
+ start = pos + 1;
+ else
+ stop = pos;
+ }
+
+ ParamList::iterator it=paramList.begin();
+ it+=stop;
+ return it;
+}
+
+Parameters::Parameters()
+{}
+
+void Parameters::readParameters(istream& paramStream)
+{
+ while (!paramStream.eof()) {
+ string param;
+ getline(paramStream, param);
+ trim(param);
+ string paramName = nextToken(param);
+
+ if (paramName.length() == 0) continue;
+
+ if (*(paramName.data()) == '#') continue;
+
+ updateParameter(paramName, param.c_str());
+ }
+}
+
+
+bool Parameters::empty() {
+ return paramList.empty();
+}
+
+void Parameters::addParameter(const string& paramName, const int value)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ (*pos) = Parameter(paramName, value);
+ else
+ paramList.insert(pos, Parameter(paramName, value));
+}
+
+void Parameters::addParameter(const string& paramName, const double value)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ (*pos) = Parameter(paramName, (float)value);
+ else
+ paramList.insert(pos, Parameter(paramName, (float)value));
+}
+
+void Parameters::addParameter(const string& paramName, const string& value)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ (*pos) = Parameter(paramName, value);
+ else
+ paramList.insert(pos, Parameter(paramName, value));
+}
+
+void Parameters::updateParameter(const string& paramName,
+ const char* const value)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ switch ((*pos).paramType()) {
+ case Int:
+ (*pos) = Parameter(paramName, atoi(value));
+ break;
+ case Float:
+ (*pos) = Parameter(paramName, (float)atof(value));
+ break;
+ case Str:
+ (*pos) = Parameter(paramName, string(value));
+ case Undef:
+ (*pos) = Parameter(paramName, string(value));
+ }
+ else
+ paramList.insert(pos, Parameter(paramName, string(value)));
+}
+
+
+ParamType Parameters::paramType(const string& paramName)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ return (*pos).paramType();
+ else
+ return Undef;
+}
+
+
+int Parameters::getInt(const string& paramName, const int& defaultValue)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ switch ((*pos).paramType()) {
+ case Int:
+ return (*pos).intValue();
+ case Float:
+ return (int)(*pos).floatValue();
+ case Str:
+ return atoi((*pos).stringValue().data());
+ case Undef:
+ break;
+ }
+ return defaultValue;
+}
+
+float Parameters::getFloat(const string& paramName, const float& defaultValue)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ switch ((*pos).paramType()) {
+ case Float:
+ return (*pos).floatValue();
+ case Int:
+ return (float)(*pos).intValue();
+ case Str:
+ return (float) atof((*pos).stringValue().data());
+ case Undef:
+ break;
+ }
+ return defaultValue;
+}
+
+string Parameters::getString(const string& paramName,const string& defaultValue)
+{
+ ParamList::iterator pos = findInsertionPoint(paramList, paramName);
+ if (pos != paramList.end() && (*pos).paramLabel() == paramName)
+ switch ((*pos).paramType()) {
+ case Str:
+ return (*pos).stringValue();
+ case Float: {
+ return appendDouble2string((*pos).floatValue());
+ }
+ case Int: {
+ return appendInt2string((*pos).intValue());
+ }
+ case Undef:
+ break;
+ }
+ return defaultValue;
+}
+
+void Parameters::dump(ostream& out)
+{
+ for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i)
+ out << *i << '\n';
+}
+
+//void Parameters::dump(DebugStream& out, const unsigned int msgLevel)
+//{
+// for (ParamList::iterator i=paramList.begin(); i != paramList.end(); ++i)
+// out(msgLevel) << *i;
+//}
+
+void Parameters::dump(FILE* outputFile) {
+ for (ParamList::iterator i = paramList.begin() ; i != paramList.end() ; i++) {
+ i->dump(outputFile);
+ fprintf(outputFile, "\n");
+ }
+
+ fprintf(outputFile, "\n");
+}
+
+string Parameters::nextToken(string& str)
+{
+ unsigned int start = 0;
+ while (start < str.length() &&
+ (str[start] == ' ' || str[start] == '\t' || str[start] == '\n'))
+ ++start;
+
+ if (start >= str.length()) {
+ str = "";
+ return "";
+ }
+
+ unsigned int stop = start+1;
+ while (stop < str.length() &&
+ str[stop] != ' ' && str[stop] != '\t' && str[stop] != '\n')
+ ++stop;
+
+ unsigned int next = stop;
+ while (next < str.length() &&
+ (str[next] == ' ' || str[next] == '\t' || str[next] == '\n'))
+ ++next;
+
+ string result = str.substr((int)start, stop-start);
+ str = str.substr((int)next);
+ return result;
+}
diff --git a/libs/phylogeny/Parameters.h b/libs/phylogeny/Parameters.h
new file mode 100644
index 0000000..e132942
--- /dev/null
+++ b/libs/phylogeny/Parameters.h
@@ -0,0 +1,249 @@
+#ifndef _Parameters_h
+#define _Parameters_h
+
+#include <iostream>
+#include <ostream>
+#include <string>
+//#include "macros.h"
+//#include "DebugStream.h"
+//#include "StringUtils.h"
+
+using std::string;
+using std::istream;
+using namespace std;
+
+/*
+CLASS
+ Parameters
+
+ A utility class used to manage program parameters. The class supports
+ setting default values for parameters, reading values from a parameters
+ file and accessing parameters values from other parts of the program.
+
+KEYWORDS
+ parameters
+
+AUTHORS
+ Meir Fuchs (mailto: meirfux at math.tau.ac.il)
+
+ Copyright: SAMBA group, Tel-Aviv Univ. Israel, 1997.
+
+CHANGES LOG
+<UL>
+<LI>9.01.05 Dina:
+Bug fix: adding check to iterator end() to findInsertionPoint result
+to paramType, getInt, getString, getFloat functions
+</LI>
+<LI>17.05.04 Oranit Dror:
+Adding new methods: dump() and empty()
+</LI>
+</UL>
+
+GOALS
+ Aid in managing program parameters. The Parameters class's main goal is to
+ relieve programmers from the need to rewrite specialized parameters reading
+ code sections for each of the programs. The Parameters class holds integer,
+ floating point or string values in static storage indexed using the
+ parameter's name. Class also supplies method for parsing strings.
+
+USAGE
+ The following section covers several issues regarding the Parameters class
+ and its usage. Users should understand the issues covered below before
+ using the class.
+
+USAGE: SETTING DEFAULT PARAMETERS
+ Default parameters are set using the addParameter methods. Note that the
+ type of the parameter is set according to the addParameter arguments. If
+ a parameter is set using addParameter with an integer argument then
+ subsequent updates (using updateParameter) to the same parameter will all
+ be stored as integers. Therefore the following code should output a 0:
+ EXAMPLE
+ Parameters::addParameter("Dummy", 3);
+ Parameters::updateParameter("Dummy", "This should set it to zero");
+ cout << Parameters::getstring("Dummy");
+ END
+
+ Note also that when setting defuault values of float parameters always use
+ a decimal point or else these parameters will be added as intgers. For
+ example:
+ EXAMPLE
+ Parameters::addParameter("CubeSize", 1.0); OK
+ Parameters::addParameter("CubeSize", 1); Not OK. Integer parameter
+ END
+
+USAGE: READING PARAMETERS FROM FILE
+ The readParameters method recieves an input stream from which parameters are
+ to be read. Files are structured so that each line specifies the value of a
+ parameter. Each line gives the parameter name, a white space and then the
+ parameter value. Lines whose first non white-space charachter is # are
+ ignored. A basic schema for using the Parameters class is to set the default
+ values using addParameter calls and then calling readParameters to read in
+ parameters with other values or new parameters. The following example works
+ as such using the Parameters::dump method to print all the parameters
+ and their values:
+ EXAMPLE
+ Parameters::addParameter("CubeSize", 1.0);
+ Parameters::addParameter("MinVote", 8);
+ ifstream params("params");
+ Parameters::readParameters(params);
+ params.close();
+ Parameters::dump(cout);
+ END
+ With the following parameters file:
+ EXAMPLE
+ CubeSize 0.5
+ File pdb4hhb.ent
+ END
+ The following output should result:
+ EXAMPLE
+ CubeSize (Float) 0.5
+ File (Str) pdb4hhb.ent
+ MinVote (Int) 8
+ END
+
+USAGE: ACCESSING PARAMETERS VALUES
+ using the getInt, getFloat and getstring methods one may access the
+ parameters values. Note that a value will always be returned even if the
+ parameter is not stored as the same type. The get methods attempt to
+ convert the parameter type to the requested return type of the method.
+ The follwing code should produce 3 1's as its output:
+ EXAMPLE:
+ Parameters::addParameter("MaxMix", 1); OK added an integer parameter
+ cout << Parameters::getInt("MaxMix");
+ cout << Parameters::getFloat("MaxMix");
+ cout << Parameters::getstring("MaxMix");
+ END
+ Also note that parameters names are case sensitive.
+
+USAGE: SUBCLASSING AND PERFORMANCE
+ The Parameters engine keeps the parameters in a sorted list. Although
+ finding a parameter and its value in this list is considerably fast most
+ users will not want this overhead of searching for the parameter using
+ string comparisons inside their main loops, as part of a code which can be
+ executed a great number of times.
+ The idea is to subclass the Parameters class and hold the values which
+ require direct and fast access in seperate static variables. All parameters
+ are accessed not throguh the getParameter methods but rather through
+ specialized methods of the subclass. The following is an example of such an
+ implementation. Notice the readParameters method.
+ EXAMPLE:
+ static int min_vote = 8; // Default values
+ static float cube_size = 1.0;
+
+ class ProgParams : protected Parameters
+ {
+ int minVote() { return min_vote };
+
+ float cubeSize() { return cube_size };
+
+ // file name is not held in static variable. Don't care about parameter
+ // access time.
+ string fileName() { return getstring("FileName"); }
+
+ int readParameters(char* paramsfile) {
+ addParameter("MinVote", min_vote);
+ addParameter("CubeSize", cube_size);
+
+ ifstream params(paramsfile);
+ Parameters::readParameters(params);
+ params.close();
+
+ min_vote = getInt("MinVote");
+ cube_size = getFloat("CubeSize");
+ }
+ }
+ END
+*/
+class Parameters
+{
+public:
+ //// Used by the paramType method. See below.
+ enum ParamType { Undef, Int, Float, Str };
+
+ //// readParameters recieves an input stream and reads parameters off this
+ // input stream. See the usage section for details of how a parameters
+ // file may be structured.
+ static void readParameters(istream& paramStream);
+
+ ////
+ // Returns true if no parameters are defined. <br>
+ // Author: Oranit Dror (oranit at tau.ac.il)
+ static bool empty();
+
+ // GROUP: Setting parameters
+
+ //// Adds an integer parameter. The integer value added will actually be
+ // stored as an integer. Subsequent updates to the same parameter using
+ // updateParameter will all be stored as integers.
+ static void addParameter(const string& paramName, const int value);
+
+ //// Adds a float parameter. The float value added will actually be
+ // stored as a float. Subsequent updates to the same parameter using
+ // updateParameter will all be stored as floats.
+ static void addParameter(const string& paramName, const double value);
+
+ //// Adds a string parameter. The string value added will actually be
+ // stored as a string. Subsequent updates to the same parameter using
+ // updateParameter will all be stored as strings.
+ static void addParameter(const string& paramName, const string& value);
+
+ //// Update the parameter value without changing the parameter type. The
+ // value parameter is converted to the parameter's type if this parameter
+ // already exists. If the parameter is not yet listed then updateParameter
+ // adds a new parameter of string type.
+ static void updateParameter(const string& paramName,
+ const char* const value);
+
+ // GROUP: Getting parameters values.
+
+ //// Returns the storage type of the given parameter. If a parameter
+ // of the given name does not exist then Undef is returned. See enum
+ // ParamType above for possible return values.
+ static ParamType paramType(const string& paramName);
+
+ //// Gets the integer value of a given parameter. If parameter is not of
+ // integer type then its value is converted to integer. If parameter does
+ // not exist a 0 is returned.
+ static int getInt(const string& paramName, const int& defaultValue=0);
+
+ //// Gets the float value of a given parameter. If parameter is not of
+ // float type then its value is converted to float. If parameter does
+ // not exist a 0 is returned.
+ static float getFloat(const string& paramName, const float& defaultValue=0.0);
+
+ //// Gets the string value of a given parameter. If parameter is not of
+ // string type then its value is converted to string. If parameter does
+ // not exist an empty string is returned.
+ static string getString(const string& paramName, const string& defaultValue=string());
+
+ // GROUP: Other methods
+
+
+
+ //// Output all listed parameters. Used for debugging.
+ static void dump(ostream& out);
+
+ //// Output all listed parameters. Used for debugging.
+ //static void dump(DebugStream& out, const unsigned int msgLevel);
+
+ ////
+ // Output all listed parameters. <br>
+ // Author: Oranit Dror (oranit at tau.ac.il)
+ static void dump(FILE* outputFile);
+
+ //// A utility method. nextToken recieves an argument string, finds the first
+ // white-space delimited token in this string and returns it while cutting
+ // this token off of the argument string (It it passed by reference). Tokens
+ // are returned without any spaces. This method may be used repetitively to
+ // tokenize a string.
+ static string nextToken(string& str);
+
+protected:
+ //// Constructor is protected since all methods are static. No need to
+ // actually form an instance of this class.
+ Parameters();
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/aaJC.cpp b/libs/phylogeny/aaJC.cpp
new file mode 100644
index 0000000..6fc9aea
--- /dev/null
+++ b/libs/phylogeny/aaJC.cpp
@@ -0,0 +1,7 @@
+// $Id: aaJC.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "aaJC.h"
+#include "errorMsg.h"
+
+
+
diff --git a/libs/phylogeny/aaJC.h b/libs/phylogeny/aaJC.h
new file mode 100644
index 0000000..e826a76
--- /dev/null
+++ b/libs/phylogeny/aaJC.h
@@ -0,0 +1,52 @@
+// $Id: aaJC.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___AA_JC
+#define ___AA_JC
+
+#include "replacementModel.h"
+#include <cmath>
+using namespace std;
+
+namespace aaDef {
+ const MDOUBLE Alp = 20.0;
+ const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
+ const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
+ const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
+ const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
+}
+
+class aaJC : public replacementModel {
+public:
+
+ virtual replacementModel* clone() const { return new aaJC(*this); }// see note down:
+// virtual aaJC* clone() const { return new aaJC(*this); }
+ const int alphabetSize() const {return 20;}
+
+ explicit aaJC(){};
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+//(wrong!) return ((i==j) ? 0.05+0.95*exp(-20.0*d): 0.05-0.05*exp(-20.0*d));
+ return ((i==j) ? aaDef::odAl+aaDef::om_odAl*exp(aaDef::m_alDiv_omalp*d): aaDef::odAl-aaDef::odAl*exp(aaDef::m_alDiv_omalp*d));
+
+ }
+
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ //(worng!)return ((i==j) ? -19.0*exp(-20.0*d): exp(-20.0*d));
+ return ((i==j) ? -exp(aaDef::m_alDiv_omalp*d): exp(aaDef::m_alDiv_omalp*d)/(aaDef::Alp-1));
+ }
+ const MDOUBLE freq(const int i) const {return aaDef::odAl;};
+
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ //(wrong!) return ((i==j) ? 19.0*20.0*exp(-20.0*d): 0.0-20.0*exp(-20.0*d));
+ return ((i==j) ? aaDef::alDiv_omalp*exp(aaDef::m_alDiv_omalp*d): aaDef::m_alDiv_omalp*exp(aaDef::m_alDiv_omalp*d));
+ }
+
+};
+
+#endif
+
+// note: according to the new C++ rules, the clone function should be like this:
+// virtual aaJC* clone() const { return new aaJC(*this); }
+// however, not all compiler support it yet. look at More Effective C++ page 126.
+
+
+
diff --git a/libs/phylogeny/adrianCodon.dat.q b/libs/phylogeny/adrianCodon.dat.q
new file mode 100644
index 0000000..b69ded8
--- /dev/null
+++ b/libs/phylogeny/adrianCodon.dat.q
@@ -0,0 +1,72 @@
+" 634 "
+" 25105 560 "
+" 1209 37271 620 "
+" 1353 344 196 494 "
+" 112 2048 176 34 21460 "
+" 0 140 1656 380 71026 41523 "
+" 238 255 56 2967 35040 33972 43340 "
+" 8628 295 812 370 1546 65 0 23 "
+" 328 7142 272 370 715 4680 1286 876 707 "
+" 1192 289 7588 303 103 124 1929 82 52300 924 "
+" 509 0 304 10057 836 0 806 6124 1328 45060 1132 "
+" 607 43 47 105 5067 0 0 0 863 56 221 189 "
+" 0 301 43 0 0 2141 279 0 0 475 32 0 27331 "
+" 167 88 393 141 1487 366 3364 545 193 140 538 162 5087 1030 "
+" 34 0 42 421 0 0 346 3233 0 0 61 718 31469 35230 1626 "
+" 2841 308 69 647 711 76 0 346 1297 278 124 413 193 49 200 0 "
+" 195 2491 229 114 57 356 73 12 114 945 197 0 8 74 42 9 2449 "
+" 286 295 1514 350 199 128 640 63 66 257 565 175 42 15 241 41 31892 2201 "
+" 352 19 175 3379 195 32 0 441 246 85 129 1259 106 0 126 176 4155 62775 2262 "
+" 190 36 58 114 2112 0 0 0 0 51 81 158 201 0 114 51 2926 203 490 116 "
+" 37 204 30 71 0 1701 355 109 35 444 1 0 27 114 56 21 205 1284 335 79 21842 "
+" 81 99 218 95 183 0 4067 30 94 182 10 76 164 61 192 0 617 512 2569 361 57041 44793 "
+" 54 30 30 239 134 158 0 2062 10 30 35 370 101 0 70 141 263 0 183 1574 32490 33996 32457 "
+" 1891 0 623 93 0 147 671 0 46674 151 12628 0 11 0 0 134 8237 543 0 277 818 47 0 0 "
+" 701 549 1184 0 0 246 241 87 5836 1540 12311 0 6 41 48 0 452 5598 739 0 16 841 253 0 40388 "
+" 854 120 2602 57 54 69 359 0 13337 47 37725 91 0 31 105 0 0 660 5014 399 118 0 2656 0 82443 40802 "
+" 695 0 735 893 81 28 0 661 12916 0 6008 2384 89 35 60 56 1344 0 484 9142 0 0 0 1483 85032 87710 53112 "
+" 208 39 0 46 600 0 0 0 19 0 0 55 7884 0 1512 386 2427 200 95 0 3069 0 0 0 2011 0 15 0 "
+" 35 133 6 0 0 387 59 0 0 142 42 0 365 3634 769 272 79 813 191 114 0 1470 0 70 95 1012 0 0 17551 "
+" 0 15 74 0 97 91 378 52 27 44 46 8 876 732 2298 588 106 83 604 90 286 0 1947 0 0 70 707 0 33878 14863 "
+" 63 0 14 229 8 0 114 484 67 48 0 147 280 278 720 3849 349 0 160 1407 0 0 0 1951 0 3 43 1427 22703 32337 15002 "
+" 1304 155 0 389 408 75 0 79 444 170 0 236 197 11 45 0 2595 59 234 256 149 35 74 60 51 0 0 143 109 12 0 27 "
+" 120 2602 73 69 0 258 160 112 46 821 22 78 0 43 18 0 158 647 151 46 14 149 84 17 1 119 23 2 0 42 7 20 2320 "
+" 0 168 893 221 158 73 415 109 0 180 336 209 35 1 131 44 138 148 1538 143 107 83 168 39 1 91 217 0 0 26 55 14 23280 3052 "
+" 117 9 91 3406 173 5 0 311 55 62 40 1017 39 0 16 75 274 0 113 787 57 14 76 93 0 16 26 138 10 0 1 44 3660 28072 2533 "
+" 450 59 100 310 7741 0 0 0 225 220 182 557 1008 0 588 153 639 41 127 145 2469 39 211 190 150 0 48 78 625 0 97 61 1324 82 245 122 "
+" 28 466 94 52 0 6013 75 0 50 1265 106 0 0 452 240 0 47 248 183 0 14 2010 303 164 55 277 0 61 0 333 64 62 86 670 189 0 17008 "
+" 130 336 356 168 0 401 16072 0 103 537 357 370 656 161 817 0 379 93 512 228 0 428 10166 0 0 195 789 0 0 83 543 33 0 379 1907 42 47381 29661 "
+" 89 80 65 525 0 0 0 7268 98 211 0 1307 86 67 226 484 125 11 108 230 58 130 0 2312 0 0 130 252 29 17 64 381 133 0 145 799 30850 26704 28871 "
+" 285 65 23 253 446 24 106 6 2230 278 0 315 177 0 65 33 468 34 17 54 163 23 55 33 366 0 0 0 145 26 0 28 1661 180 0 104 2231 92 0 278 "
+" 28 1227 58 189 12 521 129 103 53 5470 0 0 37 87 33 21 31 344 83 26 64 236 268 30 0 941 162 1 0 80 13 28 0 1655 105 0 0 2186 744 149 19297 "
+" 27 356 299 139 176 0 843 160 0 684 3262 829 29 45 241 0 110 65 309 60 76 115 522 18 0 0 1073 0 0 2 82 0 0 67 1559 183 750 315 5134 73 44365 23295 "
+" 92 205 66 1727 96 190 0 728 0 13 0 7147 0 0 47 96 0 0 89 555 60 34 0 335 244 0 0 1432 18 0 18 105 59 0 63 2203 356 0 0 2632 28434 37047 23095 "
+" 318 54 33 115 3527 41 76 0 518 181 0 64 23970 0 1303 260 576 75 64 47 821 131 0 0 179 0 0 0 4505 11 14 274 764 39 80 51 6746 0 0 30 1310 0 179 0 "
+" 27 179 23 44 3 2249 0 308 11 354 78 34 330 12669 395 164 61 157 53 32 75 413 144 0 0 108 75 38 251 3338 87 38 51 294 54 0 0 4666 0 0 0 797 0 0 22326 "
+" 20 26 113 25 429 137 2071 322 0 22 220 58 3262 1931 2537 1548 21 38 128 48 121 44 321 57 0 41 121 25 34 50 1723 0 34 0 336 11 1230 167 5933 77 0 0 790 43 45141 19340 "
+" 76 42 6 207 135 150 294 2554 64 143 0 486 810 110 539 13791 171 2 57 142 0 0 134 537 0 31 5 200 0 58 22 3459 129 0 7 388 0 0 0 5346 0 31 0 1160 31707 35610 22203 "
+" 18 407 23 0 0 68 19 36 42 165 0 0 2 88 44 2 117 3381 122 0 0 99 45 6 0 290 36 0 0 266 30 38 18 159 13 0 1 58 142 23 0 80 5 0 3 144 35 0 "
+" 33 0 23 658 24 44 108 126 0 20 64 327 60 14 66 133 254 286 87 4548 15 0 8 90 141 9 0 754 142 1 81 288 33 0 33 255 39 24 0 112 18 0 29 130 37 14 20 166 53555 "
+" 277 164 108 290 6514 235 482 1018 165 446 8 1100 435 12 319 0 838 111 227 157 5890 0 507 0 340 0 64 3 320 0 0 0 245 73 90 109 6631 419 0 627 412 59 338 125 825 102 176 201 47 59 "
+" 51 577 50 66 169 4821 1421 355 54 2047 24 106 112 72 80 62 188 439 166 46 0 5279 0 0 0 368 104 23 0 445 72 0 30 239 87 0 264 4869 738 374 122 466 103 38 4 415 40 126 541 37 22923 "
+" 110 82 145 163 1203 0 14459 754 24 1451 151 763 183 30 477 38 0 233 599 0 273 0 12183 0 111 219 802 0 707 0 0 0 110 158 176 42 520 675 20335 0 0 499 1107 178 0 0 564 0 146 0 76141 40261 "
+" 112 181 54 602 1180 581 0 5578 112 651 68 1954 0 31 157 150 297 90 115 657 0 135 0 5714 0 0 0 679 0 0 41 578 87 3 74 288 631 521 937 5109 167 107 21 611 147 31 96 454 0 834 31553 32600 44414 "
+" 31 241 33 0 45 319 0 86 16 1649 42 219 33 130 46 2 125 576 73 12 0 102 64 24 110 1890 226 0 0 317 18 43 0 51 24 0 56 410 279 66 0 774 82 0 50 219 33 124 1297 0 172 1595 327 77 "
+" 20 14 49 22 39 18 92 0 48 39 549 56 30 14 95 20 79 42 201 130 28 0 142 1 270 84 1199 29 74 19 105 19 10 3 27 6 35 31 67 0 8 14 446 0 13 9 56 41 166 229 174 47 576 33 341 "
+" 43 8 28 397 156 20 280 403 108 352 75 2043 29 21 59 187 145 43 89 989 22 0 91 211 106 0 104 2711 123 20 35 305 27 19 7 86 180 46 200 423 72 80 88 1072 108 58 112 224 0 2135 495 0 187 2090 61046 387 "
+" 123 9 9 101 615 6 102 140 180 69 106 0 6752 231 1116 418 193 9 104 167 205 38 23 15 0 83 4 103 54777 7485 8703 8464 67 34 41 0 611 88 0 90 49 0 18 92 6666 153 0 364 159 265 4644 80 0 186 96 51 168 "
+" 12 70 0 0 23 155 11 48 3 70 0 0 70 742 186 61 38 346 27 46 25 170 6 0 0 117 17 14 220 2693 284 0 5 27 8 0 0 162 124 20 3 48 23 0 28 957 87 0 3979 750 23 924 0 0 574 154 61 1268 "
+" 59 45 80 75 192 81 637 163 28 0 74 99 1733 57 3345 832 131 42 198 55 181 6 226 149 53 0 94 204 14044 5603 27723 9664 97 0 37 34 172 50 1000 141 1 32 254 0 523 72 2117 491 102 84 1377 107 5207 276 111 744 201 47609 814 "
+" 9 17 12 85 47 14 107 170 21 21 26 101 326 48 262 910 73 74 23 359 0 0 55 201 17 0 20 146 234 0 387 2714 16 0 5 30 82 2 39 186 30 18 5 66 172 13 125 928 625 4904 160 0 206 991 125 212 787 1638 32469 1494 "
+" 0.0282483 0.0206292 0.0319075 0.0182494 0.0168831 0.0159757 0.0058938 0.0144022 0.0135116 0.0190724 0.0118542 0.0136325 0.0093705 0.0199714 0.0218874 0.0174818 "
+" 0.0136792 0.0143825 0.0337043 0.0116006 0.0177685 0.0150006 0.0058835 0.0176118 0.0061893 0.0087184 0.0084944 0.0054224 0.0080368 0.0173529 0.0373569 0.0150280 "
+" 0.0311168 0.0246045 0.0388972 0.0251865 0.0179100 0.0212765 0.0059683 0.0199671 0.0184506 0.0176209 0.0132786 0.0115579 0.0083782 0.0137699 0.0265260 0.0136025 "
+" 0.0159995 0.0132055 0.0133496 0.0159777 0.0043280 0.0171276 0.0119089 0.0124708 0.0109899 0.0085271 0.0195872 0.0141357 0.0190797 "
+" AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT "
+" CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT "
+" GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT "
+" TAC TAT TCA TCC TCG TCT TGC TGG TGT TTA TTC TTG TTT "
+" S_ij = S_ji and PI_i based on the empirical codon matrix: "
+" A Schneider, GM Cannarozzi and GH Gonnet. Empirical codon "
+" substitution matrix. BMC Bioinformatics 6:134. 2005. "
+
diff --git a/libs/phylogeny/allTrees.cpp b/libs/phylogeny/allTrees.cpp
new file mode 100644
index 0000000..38611ee
--- /dev/null
+++ b/libs/phylogeny/allTrees.cpp
@@ -0,0 +1,134 @@
+// $Id: allTrees.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "allTrees.h"
+#include "treeUtil.h"
+#include "treeIt.h"
+#include "bblEM.h"
+#include <algorithm>
+#include <iostream>
+
+#include "someUtil.h"
+
+using namespace std;
+#ifndef VERBOS
+#define VERBOS
+#endif
+
+
+allTrees::allTrees(bool keepAllTrees) : _keepAllTrees(keepAllTrees) {
+ _bestScore = VERYSMALL;
+}
+
+void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
+ tree& starT,
+ vector<int>& idList){
+ sequenceContainer::constTaxaIterator tIt;
+ sequenceContainer::constTaxaIterator tItEnd;
+ tIt.begin(*sc);
+ tItEnd.end(*sc);
+ while(tIt != tItEnd) {
+ idList.push_back(tIt->id());
+ ++tIt;
+ }
+ if (sc->numberOfSeqs()<3) errorMsg::reportError(" searching a tree for number of sequences < 3 ");
+ starT.createRootNode();
+ starT.createNode(starT.getRoot(),1);
+ starT.createNode(starT.getRoot(),2);
+ starT.createNode(starT.getRoot(),3);
+
+ const string nameOfSeq1 = (*sc)[idList[idList.size()-1]].name();
+ const string nameOfSeq2 = (*sc)[idList[idList.size()-2]].name();
+ const string nameOfSeq3 = (*sc)[idList[idList.size()-3]].name();
+ idList.pop_back();
+ idList.pop_back();
+ idList.pop_back();
+
+ starT.getRoot()->getSon(0)->setName(nameOfSeq1);
+ starT.getRoot()->getSon(1)->setName(nameOfSeq2);
+ starT.getRoot()->getSon(2)->setName(nameOfSeq3);
+ starT.createFlatLengthMatrix();
+}
+
+void allTrees::recursiveFind( const sequenceContainer* sc,
+ const stochasticProcess* sp,
+ const Vdouble * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon){
+ tree starT;
+ vector<int> ids;
+ get3seqTreeAndIdLeftVec(sc,starT,ids);
+ recursiveFind(starT,*sp,*sc,ids,weights,maxIterations,epsilon);
+}
+
+tree getAnewTreeFrom(const tree& et, tree::nodeP & mynode,
+ vector<int> & idLeft, const string& nameToAdd) {
+ tree newT = et;
+ tree::nodeP mynodeInNewTree = newT.findNodeByName(mynode->name());
+// int NameToAdd = idLeft[idLeft.size()-1];
+ idLeft.pop_back();
+ tree::nodeP fatherNode = mynodeInNewTree->father();
+ tree::nodeP newInternalNode = newT.createNode(fatherNode, newT.getNodesNum());
+ mynodeInNewTree->setFather(newInternalNode);
+ newInternalNode->setSon(mynodeInNewTree);
+
+ fatherNode->removeSon(mynodeInNewTree);
+ tree::nodeP newOTU= newT.createNode(newInternalNode, newT.getNodesNum());;
+ //string nameX = (*sc)[NameToAdd].name();
+ newOTU->setName(nameToAdd);
+ newOTU->setDisToFather(tree::FLAT_LENGTH_VALUE);
+ newInternalNode->setDisToFather(tree::FLAT_LENGTH_VALUE);
+ newT.create_names_to_internal_nodes();
+
+ return newT;
+}
+
+void allTrees::recursiveFind(tree et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ vector<int> idLeft,
+ const Vdouble * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon) {
+
+ if (idLeft.empty()) {
+ //static int k=1; k++;
+ MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights);
+ if (_keepAllTrees) {
+ _allPossibleTrees.push_back(et);
+ _allPossibleScores.push_back(treeScore);
+ }
+ LOG(5,<<".");
+ //LOG(5,<<"tree: "<<k<<" l= "<<treeScore<<endl);
+ if (treeScore > _bestScore) {
+ //LOG(5,<<"new Best score!"<<endl);
+ _bestTree = et;
+ _bestScore = treeScore;
+ }
+ } else {
+ treeIterTopDown tIt(et);
+ tree::nodeP mynode = tIt.first();
+ mynode = tIt.next(); // skipping the root
+ for (; mynode != tIt.end(); mynode = tIt.next()) {
+ int NameToAdd = idLeft[idLeft.size()-1];
+ tree newT = getAnewTreeFrom(et,mynode,idLeft,sc[NameToAdd].name());
+ recursiveFind(newT,sp,sc,idLeft,weights,maxIterations,epsilon);
+ idLeft.push_back(NameToAdd);
+ }
+ }
+}
+
+MDOUBLE allTrees::evalTree( tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const Vdouble * weights) {
+ bblEM bblEM1(et,sc,sp,weights,maxIterations,epsilon);
+ MDOUBLE res =bblEM1.getTreeLikelihood();
+ return res;
+}
+
+
+
+
diff --git a/libs/phylogeny/allTrees.h b/libs/phylogeny/allTrees.h
new file mode 100644
index 0000000..86e6e21
--- /dev/null
+++ b/libs/phylogeny/allTrees.h
@@ -0,0 +1,68 @@
+// $Id: allTrees.h 1731 2007-02-26 13:45:23Z itaymay $
+
+#ifndef ___ALL_TREES
+#define ___ALL_TREES
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include <vector>
+using namespace std;
+
+void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
+ tree& starT,
+ vector<int>& idList);
+
+tree getAnewTreeFrom( const tree& et,
+ tree::nodeP & mynode,
+ vector<int> & idLeft,
+ const string& nameToAdd);
+class allTrees {
+public:
+ explicit allTrees(bool keepAllTrees = false);
+ MDOUBLE getBestScore() {return _bestScore;}
+ tree getBestTree() {return _bestTree;}
+
+ void getAllTreesAndLikelihoods(vector<tree>& resTree,VdoubleRep & scores) {
+ resTree = _allPossibleTrees;
+ scores = _allPossibleScores;
+ }
+
+ void recursiveFind( tree et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ vector<int> idLeft,
+ const Vdouble * weights = NULL,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+
+ void recursiveFind( const sequenceContainer* sc,
+ const stochasticProcess* sp,
+ const Vdouble * weights = NULL,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05); // one tree.
+
+
+
+private:
+ tree _bestTree;
+ MDOUBLE _bestScore;
+ vector<tree> _allPossibleTrees;
+ vector<doubleRep> _allPossibleScores;
+ const bool _keepAllTrees;
+
+
+ MDOUBLE evalTree(tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const Vdouble * weights = NULL);
+
+
+
+
+};
+#endif
+
diff --git a/libs/phylogeny/allTreesSeparateModel.cpp b/libs/phylogeny/allTreesSeparateModel.cpp
new file mode 100644
index 0000000..acb7407
--- /dev/null
+++ b/libs/phylogeny/allTreesSeparateModel.cpp
@@ -0,0 +1,83 @@
+// $Id: allTreesSeparateModel.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "treeIt.h"
+#include "allTreesSeparateModel.h"
+#include "bblEMSeperate.h"
+#include <algorithm>
+#include <iostream>
+
+#include "someUtil.h"
+
+using namespace std;
+#ifndef VERBOS
+#define VERBOS
+#endif
+
+
+allTreesSeparateModel::allTreesSeparateModel(){
+ _bestScore = VERYSMALL;
+}
+
+void allTreesSeparateModel::recursiveFind( const vector<sequenceContainer>* sc,
+ const vector<stochasticProcess>* sp,
+ const vector<Vdouble* > * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon){
+ tree starT;
+ vector<int> ids;
+ get3seqTreeAndIdLeftVec(&(*sc)[0],starT,ids);
+ recursiveFind(starT,*sp,*sc,ids,weights,maxIterations,epsilon);
+}
+
+void allTreesSeparateModel::recursiveFind(tree et,
+ const vector<stochasticProcess>& sp,
+ const vector<sequenceContainer>& sc,
+ vector<int> idLeft,
+ const vector<Vdouble* > * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon) {
+
+ if (idLeft.empty()) {
+ //static int k=1; k++;
+ MDOUBLE treeScore = evalTree(et,sp,sc,maxIterations,epsilon,weights);
+ //LOG(5,<<"tree: "<<k<<" l= "<<treeScore<<endl);
+ LOG(5,<<".");
+ if (treeScore > _bestScore) {
+ //LOG(5,<<"new Best score!"<<endl);
+ _bestTree = et;
+ _bestScore = treeScore;
+ _treeVecBest = _treeVecTmp; // keep the seperate trees too.
+ }
+ } else {
+ et.create_names_to_internal_nodes();
+ treeIterTopDown tIt(et);
+ tree::nodeP mynode = tIt.first();
+ mynode = tIt.next(); // skipping the root
+ for (; mynode != tIt.end(); mynode = tIt.next()) {
+ int NameToAdd = idLeft[idLeft.size()-1];
+ tree newT = getAnewTreeFrom(et,mynode,idLeft,sc[0][NameToAdd].name());
+ recursiveFind(newT,sp,sc,idLeft,weights,maxIterations,epsilon);
+ idLeft.push_back(NameToAdd);
+ }
+ }
+}
+
+MDOUBLE allTreesSeparateModel::evalTree( tree& et,
+ const vector<stochasticProcess>& sp,
+ const vector<sequenceContainer>& sc,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const vector<Vdouble* > * weights) {
+ MDOUBLE res = 0;
+ vector<tree> tVec;
+ for (int k=0; k < sc.size(); ++k ) tVec.push_back(et);
+ bblEMSeperate bblemsep1(tVec,sc,sp,weights,maxIterations,epsilon);
+ res = bblemsep1.getTreeLikelihood();
+ _treeVecTmp = tVec;
+ return res;
+}
+
+
+
+
diff --git a/libs/phylogeny/allTreesSeparateModel.h b/libs/phylogeny/allTreesSeparateModel.h
new file mode 100644
index 0000000..a59744f
--- /dev/null
+++ b/libs/phylogeny/allTreesSeparateModel.h
@@ -0,0 +1,76 @@
+// $Id: allTreesSeparateModel.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___ALL_TREES_SEPARATE_MODEL
+#define ___ALL_TREES_SEPARATE_MODEL
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include <vector>
+using namespace std;
+
+void get3seqTreeAndIdLeftVec(const sequenceContainer* sc,
+ tree& starT,
+ vector<int>& idList);
+
+ tree getAnewTreeFrom( const tree& et,
+ tree::nodeP & mynode,
+ vector<int> & idLeft,
+ const string& nameToAdd);
+
+
+class allTreesSeparateModel {
+public:
+ explicit allTreesSeparateModel();
+ MDOUBLE getBestScore() {return _bestScore;}
+ tree getBestTree() {return _bestTree;}
+
+ void recursiveFind(tree et,
+ const vector<stochasticProcess>& sp,
+ const vector<sequenceContainer>& sc,
+ vector<int> idLeft,
+ const vector<Vdouble* > * weights=NULL,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+
+ void recursiveFind( const vector<sequenceContainer>* sc,
+ const vector<stochasticProcess>* sp,
+ const vector<Vdouble* > * weights= NULL,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05); // one tree.
+
+ vector<tree> getTreeVecBest() {return _treeVecBest;}
+
+private:
+ tree _bestTree;
+ MDOUBLE _bestScore;
+ vector<tree> _treeVecTmp; // same tree topologies, diff branch lengths
+ vector<tree> _treeVecBest;// same tree topologies, diff branch lengths
+
+
+ MDOUBLE evalTree( tree& et,
+ const vector<stochasticProcess>& sp,
+ const vector<sequenceContainer>& sc,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const vector<Vdouble* > * weights = NULL);
+
+};
+#endif
+
+ // const stochasticProcess* _sp;
+ //const sequenceContainer* _sc;
+ //const Vdouble * _weights;
+
+ //vector<tree> getBestTreesSep() {return _bestSepTrees;}
+ //vector<tree> _bestSepTrees;
+ //vector<tree> _tmpSepTrees;
+ //vector<tree> recursiveFindSep(const vector<sequenceContainer>* sc,
+ // const vector<stochasticProcess>* sp,
+ // const vector<Vdouble *> * weights,
+ // const int maxIterations=1000,
+ // const MDOUBLE epsilon=0.05); // sep model
+ //const vector<sequenceContainer>* _scVec;
+ //vector<stochasticProcess>* _spVec; // not const, so in proportional for example it can be changed.
+ //const vector<Vdouble *> * _weightsVec;
diff --git a/libs/phylogeny/alphaTrivialAccelerator.h b/libs/phylogeny/alphaTrivialAccelerator.h
new file mode 100644
index 0000000..fdfe74a
--- /dev/null
+++ b/libs/phylogeny/alphaTrivialAccelerator.h
@@ -0,0 +1,56 @@
+// $Id: alphaTrivialAccelerator.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___ALPHA_TRIVIAL_ACCELERATOR
+#define ___ALPHA_TRIVIAL_ACCELERATOR
+
+#include "pijAccelerator.h"
+#include "readDatMatrix.h"
+class alphaTrivialAccelerator : public pijAccelerator {
+public:
+
+ explicit alphaTrivialAccelerator(pupAll* pb, const MDOUBLE alpha) :
+ _pb(static_cast<pupAll *> (pb->clone())),
+ _alpha(alpha)
+ {};
+
+ alphaTrivialAccelerator(const alphaTrivialAccelerator& other):
+ _pb(NULL),
+ _alpha(other._alpha) {
+ if (other._pb != NULL)
+ _pb = static_cast<pupAll *>(other._pb->clone());
+ }
+
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_tAlpha(i,j,d,_alpha);}
+
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt(i,j,d,_alpha);};
+
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->Pij_tAlpha_dt2(i,j,d,_alpha);};
+
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const {return _pb->Pij_tAlpha(i,j,d,alpha);}
+
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt(i,j,d,alpha);};
+
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d, const MDOUBLE alpha) const{return _pb->Pij_tAlpha_dt2(i,j,d,alpha);};
+
+ const MDOUBLE freq(const int i) const{return _pb->freq(i);}
+
+ virtual pijAccelerator* clone() const { return new alphaTrivialAccelerator(*this);}
+
+ virtual ~alphaTrivialAccelerator() {delete _pb;}
+
+ virtual const int alphabetSize() const {return _pb->alphabetSize();}
+
+ virtual replacementModel* getReplacementModel() const {
+ return (static_cast<replacementModel * const>(_pb));
+ }
+
+ const MDOUBLE alpha(void) const {return _alpha;}
+ void setAlpha(const MDOUBLE alpha) {_alpha=alpha;}
+
+private:
+ pupAll* _pb;
+ MDOUBLE _alpha;
+};
+
+#endif
+
diff --git a/libs/phylogeny/alphabet.cpp b/libs/phylogeny/alphabet.cpp
new file mode 100644
index 0000000..5729b63
--- /dev/null
+++ b/libs/phylogeny/alphabet.cpp
@@ -0,0 +1,7 @@
+// $Id: alphabet.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "alphabet.h"
+
+alphabet::~alphabet(){}
+// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
+// assignment
diff --git a/libs/phylogeny/alphabet.h b/libs/phylogeny/alphabet.h
new file mode 100644
index 0000000..036fb99
--- /dev/null
+++ b/libs/phylogeny/alphabet.h
@@ -0,0 +1,32 @@
+// $Id: alphabet.h 1901 2007-03-15 13:21:06Z nimrodru $
+
+// version 1.01
+// last modified 1 Jan 2004
+
+#ifndef ___ALPHABET_H
+#define ___ALPHABET_H
+
+#include <string>
+#include <vector>
+using namespace std;
+
+class alphabet {
+public:
+ virtual int relations(const int charInSeq, const int charToCheck) const =0;
+ virtual int fromChar(const string& seq,const int pos) const =0;
+ virtual string fromInt(const int in_id) const =0;
+ virtual int size() const =0;
+ virtual ~alphabet()=0;
+ virtual int unknown() const =0;
+ virtual int gap() const =0;
+ virtual alphabet* clone() const = 0;
+ virtual int stringSize() const =0;
+ virtual vector<int> fromString(const string& str) const =0;
+
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ virtual bool isSpecific(const int in_id) const =0;
+
+};
+
+#endif
+
diff --git a/libs/phylogeny/amino.cpp b/libs/phylogeny/amino.cpp
new file mode 100644
index 0000000..959c3fa
--- /dev/null
+++ b/libs/phylogeny/amino.cpp
@@ -0,0 +1,152 @@
+// $Id: amino.cpp 2414 2007-10-08 14:34:42Z adist $
+
+#include "amino.h"
+
+//VVint amino::_relation;
+
+amino::amino() {
+ _relation.resize(24); // relation should realy be an allocted, two dimentional array, not a vector.
+ for (int i=0; i < _relation.size(); ++i) { // this implementation would be much faster. with some c-tricks, this checkup could be done with one access only.
+ _relation[i].resize(20);
+ }
+
+ for (int k=-2;k<=21;++k){
+ for (int j=0;j<20;++j){
+ _relation[k+2][j]=relations_internal(k,j);
+ }
+ }
+}
+
+int amino::fromChar(const char s) const{
+ switch (s) {
+ case 'A' : case'a' : return 0 ; break;
+ case 'R' : case'r' : return 1 ; break;
+ case 'N' : case'n' : return 2 ; break;
+ case 'D' : case'd' : return 3 ; break;
+ case 'C' : case'c' : return 4 ; break;
+ case 'Q' : case'q' : return 5 ; break;
+ case 'E' : case'e' : return 6 ; break;
+ case 'G' : case'g' : return 7 ; break;
+ case 'H' : case'h' : return 8 ; break;
+ case 'I' : case'i' : return 9 ; break;
+ case 'L' : case'l' : return 10; break;
+ case 'K' : case'k' : return 11; break;
+ case 'M' : case'm' : return 12; break;
+ case 'F' : case'f' : return 13; break;
+ case 'P' : case'p' : return 14; break;
+ case 'S' : case's' : return 15; break;
+ case 'T' : case't' : return 16; break;
+ case 'W' : case'w' : return 17; break;
+ case 'Y' : case'y' : return 18; break;
+ case 'V' : case'v' : return 19; break;
+ case 'B' : case'b' : return 20 ; break; // aspartate(D) or asparagine(N)
+ case 'Z' : case'z' : return 21 ; break; // glutamate (E) or glutamine(Q)
+ case '-' : case'_' : return -1; break;
+ case '?' : case'*' : return -2; break;
+ case 'x' : case'X' : return -2; break;
+ case '.' : return -3; break;
+ default:
+ vector<string> err;
+ err.push_back(" The amino-acid sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" Amino acid was not one of the following: ");
+ err.push_back(" A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, X, Z, -, ?");
+ err.push_back(" a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, x, z, _, *");
+ errorMsg::reportError(err);
+ }// end of switch
+ return -99; // never suppose to be here.
+}// end of function
+
+vector<int> amino::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+string amino::fromInt(const int in_id) const{
+ char res = 0;
+ switch (in_id) {
+ case 0 : res = 'A' ; break;
+ case 1 : res = 'R' ; break;
+ case 2 : res = 'N' ; break;
+ case 3 : res = 'D' ; break;
+ case 4 : res = 'C' ; break;
+ case 5 : res = 'Q' ; break;
+ case 6 : res = 'E' ; break;
+ case 7 : res = 'G' ; break;
+ case 8 : res = 'H' ; break;
+ case 9 : res = 'I' ; break;
+ case 10: res = 'L' ; break;
+ case 11: res = 'K' ; break;
+ case 12: res = 'M' ; break;
+ case 13: res = 'F' ; break;
+ case 14: res = 'P' ; break;
+ case 15: res = 'S' ; break;
+ case 16: res = 'T' ; break;
+ case 17: res = 'W' ; break;
+ case 18: res = 'Y' ; break;
+ case 19: res = 'V' ; break;
+ case 20: res = 'B' ; break;
+ case 21: res = 'Z' ; break;
+ case -1: res = '-' ; break;
+ case -2: res = 'X' ; break;
+ case -3: res = '.' ; break;
+ default:
+ vector<string> err;
+ err.push_back(" unable to print amino ac_id. amino ac_id was not one of the following: ");
+ err.push_back("A, B, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, Z, -, ?");
+ err.push_back("a, b, r, n, d, c, q, e, g, h, i, l, k, m, f, p, s, t, w, y, v, z, _, *");
+ errorMsg::reportError(err);
+ }//end of switch
+ string vRes;
+ vRes.append(1,res);
+ return vRes;
+}// end of function
+
+int amino::relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == -1) {
+ errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions");
+ }
+ return _relation[charInSeq+2][charToCheck];// <-MATAN, HERE YOU SWITHCED THE ORDER...
+}
+
+int amino::fromChar(const string& str, const int pos) const{
+ return fromChar(str[pos]);
+}
+
+int amino::relations_internal(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == charToCheck) return 1;
+ else if (charInSeq == fromChar('?')) return 1;
+ else if ((charInSeq == fromChar('B')) &&
+ ((charToCheck == fromChar('N')) ||
+ (charToCheck == fromChar('D')))) return 1; // B is either N or D
+ else if ((charInSeq == fromChar('Z')) &&
+ ((charToCheck == fromChar('Q')) ||
+ (charToCheck == fromChar('E')))) return 1; // Z is either E or Q
+ return 0;
+}
+
+
+vector<int> aminoUtility::codonOf(const int a, codon &cod){
+ vector<int> codons;
+ amino amin;
+ string strAmino=amin.fromInt(a);
+ map <string, string> genCode=cod.geneticCode();
+ map <string, string>::iterator it=genCode.begin();
+ int tmp2=genCode.size();
+ while (it!=genCode.end()){
+ string tmp=(*it).second;
+ if ((*it).second==strAmino){
+ string strCodon=(*it).first;
+ int c=cod.fromChar(strCodon,0);
+ codons.push_back(c);
+ }
+ it++;
+ }
+ if (codons.empty()){
+ cout<<tmp2<<" amino is = "<<a<<endl;
+ errorMsg::reportError("error in function aminoUtility::codonOf: no codon found for amino acid");
+ }
+ return codons;
+}
diff --git a/libs/phylogeny/amino.h b/libs/phylogeny/amino.h
new file mode 100644
index 0000000..085de81
--- /dev/null
+++ b/libs/phylogeny/amino.h
@@ -0,0 +1,46 @@
+// $Id: amino.h 1901 2007-03-15 13:21:06Z nimrodru $
+
+#ifndef ____AMINO
+#define ____AMINO
+
+#include "definitions.h"
+#include "errorMsg.h"
+#include "alphabet.h"
+#include "geneticCodeHolder.h"
+#include "codon.h"
+
+
+//utility of amino acid
+class aminoUtility {
+public:
+
+ static vector<int> codonOf(const int a, codon &cod); //returns vector of codons that code to a under a specific genetic code.
+
+};
+
+//based on the amino-acid list found in http://www.dur.ac.uk/~dbl0www/Bioinformatics/aminoacids.htm
+class amino : public alphabet {
+public:
+ explicit amino();
+ virtual ~amino() {}
+ virtual alphabet* clone() const { return new amino(*this); }
+ int unknown() const {return -2;}
+ int gap() const {return -1;}
+ int size() const {return 20;}
+ int stringSize() const {return 1;} // one letter code.
+ int relations(const int charInSeq, const int charToCheck) const;
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ string fromInt(const int in_id) const;
+ vector<int> fromString(const string& str) const;
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ bool isSpecific(const int id) const {return (id>=0 && id < size());}
+
+private:
+ int relations_internal(const int charInSeq, const int charToCheck) const;
+ VVint _relation;
+};//end of class
+
+#endif
+
+
diff --git a/libs/phylogeny/bblEM.cpp b/libs/phylogeny/bblEM.cpp
new file mode 100644
index 0000000..b188f7b
--- /dev/null
+++ b/libs/phylogeny/bblEM.cpp
@@ -0,0 +1,156 @@
+// $Id: bblEM.cpp 6110 2009-04-26 14:06:02Z cohenofi $
+#include "bblEM.h"
+#include "likelihoodComputation.h"
+using namespace likelihoodComputation;
+#include "computeUpAlg.h"
+#include "computeDownAlg.h"
+#include "computeCounts.h"
+#include "treeIt.h"
+#include "fromCountTableComponentToDistance.h"
+#include <ctime>
+
+bblEM::bblEM(tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist,
+ unObservableData* _unObservableData_p) :
+_et(et),_sc(sc),_sp(sp),_weights(weights),_unObservableData_p(_unObservableData_p)
+{
+ _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
+}
+
+
+MDOUBLE bblEM::compute_bblEM(
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist){
+ allocatePlace();
+ MDOUBLE oldL=VERYSMALL;
+ MDOUBLE currL = VERYSMALL;
+ tree oldT = _et;
+ for (int i=0; i < maxIterations; ++i) {
+ computeUp();
+ currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
+ //////////////
+ //MDOUBLE checkUpLL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et, _sc, _sp, _weights);
+ //cerr << "checkUpLL = "<<checkUpLL <<" curll = "<<currL<<endl;
+ ///////////////
+ oldT = _et;
+ if (currL < oldL + epsilon) { // need to break
+ if (currL<oldL) {
+ _et = oldT;
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ return oldL; // keep the old tree, and old likelihood
+ } else {
+ //update the tree and likelihood and return
+ return currL;
+ }
+ }
+ bblEM_it(tollForPairwiseDist);
+ oldL = currL;
+ }
+ // in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
+ computeUp();
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ }
+ currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
+ if (currL<oldL) {
+ _et = oldT;
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ return oldL; // keep the old tree, and old likelihood
+ }
+ else
+ return currL;
+}
+
+void bblEM::allocatePlace() {
+ _computeCountsV.resize(_et.getNodesNum()); //initiateTablesOfCounts
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ _computeCountsV[i].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories());
+ }
+ _cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
+ _cdown.allocatePlace(_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
+}
+
+void bblEM::bblEM_it(const MDOUBLE tollForPairwiseDist){
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ _computeCountsV[i].zero();
+ }
+ for (int i=0; i < _sc.seqLen(); ++i) {
+ computeDown(i);
+ addCounts(i); // computes the counts and adds to the table.
+ }
+ optimizeBranches(tollForPairwiseDist);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ }
+}
+
+void bblEM::optimizeBranches(const MDOUBLE tollForPairwiseDist){
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ fromCountTableComponentToDistance from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p);
+ from1.computeDistance();
+ mynode->setDisToFather(from1.getDistance());
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ }
+ }
+ }
+}
+
+void bblEM::computeUp(){
+ _pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
+ computeUpAlg cupAlg;
+ for (int pos=0; pos < _sc.seqLen(); ++pos) {
+ for (int categor = 0; categor < _sp.categories(); ++categor) {
+ cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
+ }
+ }
+ }
+
+void bblEM::computeDown(const int pos){
+ computeDownAlg cdownAlg;
+ for (int categor = 0; categor < _sp.categories(); ++categor) {
+ cdownAlg.fillComputeDown(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
+ }
+ }
+
+void bblEM::addCounts(const int pos){
+ //MDOUBLE posProb =
+ // likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
+
+ MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
+ if (weig == 0) return;
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ addCounts(pos,mynode,_posLike[pos],weig);
+ }
+ }
+}
+
+void bblEM::addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
+
+ computeCounts cc;
+ for (int categor =0; categor< _sp.categories(); ++ categor) {
+ cc.computeCountsNodeFatherNodeSonHomPos(_sc,
+ _pij[categor],
+ _sp,
+ _cup[pos][categor],
+ _cdown[categor],
+ weig,
+ posProb,
+ mynode,
+ _computeCountsV[mynode->id()][categor],
+ _sp.ratesProb(categor));
+ }
+}
+
diff --git a/libs/phylogeny/bblEM.h b/libs/phylogeny/bblEM.h
new file mode 100644
index 0000000..e88315e
--- /dev/null
+++ b/libs/phylogeny/bblEM.h
@@ -0,0 +1,56 @@
+// $Id: bblEM.h 4742 2008-08-19 17:40:56Z cohenofi $
+#ifndef ___BBL_EM_H
+#define ___BBL_EM_H
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "countTableComponent.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "unObservableData.h"
+
+#include <vector>
+
+using namespace std;
+
+class bblEM {
+public:
+ explicit bblEM(tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights = NULL,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.001,
+ unObservableData* unObservableData_p=NULL);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+
+private:
+ MDOUBLE compute_bblEM(const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist);
+ void bblEM_it(const MDOUBLE tollForPairwiseDist);
+ void computeDown(const int pos);
+ void computeUp();
+ void addCounts(const int pos);
+ void addCounts(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
+ void optimizeBranches(const MDOUBLE tollForPairwiseDist);
+ void allocatePlace();
+
+
+ MDOUBLE _treeLikelihood;
+ tree& _et;
+ const sequenceContainer& _sc;
+ const stochasticProcess& _sp;
+ vector<countTableComponentGam> _computeCountsV; // for each node - a table of rate*alph*alph
+ computePijGam _pij;
+ suffStatGlobalGam _cup;
+ suffStatGlobalGamPos _cdown;
+ const Vdouble * _weights;
+ VdoubleRep _posLike;
+ unObservableData* _unObservableData_p;
+};
+
+#endif
diff --git a/libs/phylogeny/bblEM2USSRV.cpp b/libs/phylogeny/bblEM2USSRV.cpp
new file mode 100644
index 0000000..199f8e5
--- /dev/null
+++ b/libs/phylogeny/bblEM2USSRV.cpp
@@ -0,0 +1,181 @@
+// $Id: bblEM2USSRV.cpp 1944 2007-04-18 12:41:14Z osnatz $
+#include "bblEM2USSRV.h"
+
+bblEM2USSRV::bblEM2USSRV(tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ const ussrvModel& model,
+ const Vdouble * weights,
+ int maxIterations,
+ MDOUBLE epsilon,
+ MDOUBLE tollForPairwiseDist) :
+_et(et),_sc(sc),_baseSc(baseSc),_model(model),_weights (weights)
+{
+ LOG(5,<<"******BBL EM USSRV*********"<<endl<<endl);
+ _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
+}
+
+// @@@@ Need to check if we can make it more efficient
+MDOUBLE bblEM2USSRV::compute_bblEM(
+ int maxIterations,
+ MDOUBLE epsilon,
+ MDOUBLE tollForPairwiseDist){
+
+ allocatePlace();
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE currL = VERYSMALL;
+ tree oldT = _et;
+ for (int i=0; i < maxIterations; ++i) {
+ computeUp();
+ // Calculate the likelihood and fill the _posLike
+ currL = likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(_et,
+ _sc,_baseSc,_model,_cupBase,_cupSSRV,_posLike,_weights);
+ //////////////
+ LOGDO(5,printTime(myLog::LogFile()));
+ LOG(5,<<"iteration no "<<i << " in BBL "<<endl);
+ LOG(5,<<"old best L= "<<oldL<<endl);
+ LOG(5,<<"current best L= "<<currL<<endl);
+
+
+ if (currL < oldL + epsilon) { // need to break
+ if (currL<oldL) {
+ cout<<"******** PROBLEMS IN BBL USSRV*********"<<endl;
+ LOG(5,<<"old best L= "<<oldL<<endl);
+ LOG(5,<<"current best L= "<<currL<<endl);
+ _et = oldT;
+ return oldL; // keep the old tree, and old likelihood
+ } else {
+ //update the tree and likelihood and return
+ LOG(5,<<"old best L= "<<oldL<<endl);
+ LOG(5,<<"current best L= "<<currL<<endl);
+ return currL;
+ }
+ }
+ oldT = _et;
+ bblEM_it(tollForPairwiseDist);
+ oldL = currL;
+ }
+ // in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
+ computeUp();
+ currL = likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(_et,
+ _sc,_baseSc,_model,_cupBase,_cupSSRV,_posLike,_weights);
+ if (currL<oldL) {
+ _et = oldT;
+ return oldL; // keep the old tree, and old likelihood
+ }
+ else
+ return currL;
+}
+
+
+void bblEM2USSRV::allocatePlace() {
+ _computeCountsBaseV.resize(_et.getNodesNum()); //initiateTablesOfCounts
+ _computeCountsSsrvV.resize(_et.getNodesNum()); //initiateTablesOfCounts
+
+ for (int i=0; i < _computeCountsBaseV.size(); ++i) {
+ _computeCountsBaseV[i].countTableComponentAllocatePlace(_model.getBaseModel().alphabetSize(),_model.noOfCategor());
+ _computeCountsSsrvV[i].countTableComponentAllocatePlace(_model.getSSRVmodel().alphabetSize());
+ }
+ _cupBase.allocatePlace(_baseSc.seqLen(),_model.noOfCategor(), _et.getNodesNum(), _baseSc.alphabetSize());
+ _cupSSRV.allocatePlace(_sc.seqLen(), _et.getNodesNum(), _sc.alphabetSize());
+
+ _cdownBase.allocatePlace(_model.noOfCategor(), _et.getNodesNum(), _baseSc.alphabetSize());
+ _cdownSSRV.allocatePlace( _et.getNodesNum(), _sc.alphabetSize());
+
+}
+
+void bblEM2USSRV::bblEM_it(MDOUBLE tollForPairwiseDist){
+ for (int i=0; i < _computeCountsBaseV.size(); ++i) {
+ _computeCountsBaseV[i].zero();
+ _computeCountsSsrvV[i].zero();
+ }
+ for (int i=0; i < _sc.seqLen(); ++i) {
+ computeDown(i);
+ addCounts(i); // computes the counts and adds to the table.
+ }
+ optimizeBranches(tollForPairwiseDist);
+}
+
+// @@@@ need to print the tree
+void bblEM2USSRV::optimizeBranches(MDOUBLE tollForPairwiseDist){
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ fromCountTableComponentToDistance2USSRV
+ from1(_computeCountsBaseV[mynode->id()],_computeCountsSsrvV[mynode->id()],_model,tollForPairwiseDist,mynode->dis2father());
+ from1.computeDistance();
+ mynode->setDisToFather(from1.getDistance());
+ }
+ }
+}
+
+void bblEM2USSRV::computeUp(){
+ _pijBase.fillPij(_et,_model.getBaseModel(),0); // 0 is becaues we compute Pij(t) and not its derivations...
+ _pijSSRV.fillPij(_et,_model.getSSRVmodel(),0);
+
+ computeUpAlg cupAlg;
+ for (int pos=0; pos < _sc.seqLen(); ++pos) {
+ // compute up for the base model
+ for (int categor = 0; categor < _model.noOfCategor(); ++categor) {
+ cupAlg.fillComputeUp(_et,_baseSc,pos,_pijBase[categor],_cupBase[pos][categor]);
+ }
+ // compute up for the ssrv model
+ cupAlg.fillComputeUp(_et,_sc,pos,_pijSSRV,_cupSSRV[pos]);
+ }
+}
+
+void bblEM2USSRV::computeDown(int pos){
+ computeDownAlg cdownAlg;
+ // compute down for the base model
+ for (int categor = 0; categor < _model.noOfCategor(); ++categor) {
+ cdownAlg.fillComputeDown(_et,_baseSc,pos,_pijBase[categor],_cdownBase[categor],_cupBase[pos][categor]);
+ }
+ // compute down for the ssrv model
+ cdownAlg.fillComputeDown(_et,_sc,pos,_pijSSRV,_cdownSSRV,_cupSSRV[pos]);
+}
+
+void bblEM2USSRV::addCounts(int pos){
+
+ MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
+ if (weig == 0) return;
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ addCounts(pos,mynode,_posLike[pos],weig);
+ }
+ }
+}
+
+void bblEM2USSRV::addCounts(int pos, tree::nodeP mynode, doubleRep posProb, MDOUBLE weig){
+
+ computeCounts cc;
+ int categor;
+ // base Model
+ for (categor =0; categor< _model.noOfCategor(); ++categor) {
+ cc.computeCountsNodeFatherNodeSonHomPos(_baseSc,
+ _pijBase[categor],
+ _model.getBaseModel(),
+ _cupBase[pos][categor],
+ _cdownBase[categor],
+ weig,
+ posProb,
+ mynode,
+ _computeCountsBaseV[mynode->id()][categor],
+ _model.getCategorProb(categor)*(1-_model.getF()));
+
+ }
+ // SSRV model
+ cc.computeCountsNodeFatherNodeSonHomPos(_sc,
+ _pijSSRV,
+ _model.getSSRVmodel(),
+ _cupSSRV[pos],
+ _cdownSSRV,
+ weig,
+ posProb,
+ mynode,
+ _computeCountsSsrvV[mynode->id()],
+ _model.getF());
+}
+
+
+
diff --git a/libs/phylogeny/bblEM2USSRV.h b/libs/phylogeny/bblEM2USSRV.h
new file mode 100644
index 0000000..d58f084
--- /dev/null
+++ b/libs/phylogeny/bblEM2USSRV.h
@@ -0,0 +1,73 @@
+// $Id: bblEM2USSRV.h 1504 2007-01-15 14:04:44Z osnatz $
+//copy of bblEM of the codon model + changes
+#ifndef ___BBL_EM_2_USSRV
+#define ___BBL_EM_2_USSRV
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "countTableComponent.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "ussrvModel.h"
+#include "computeUpAlg.h"
+#include "computeDownAlg.h"
+#include "computeCounts.h"
+#include "treeIt.h"
+#include "fromCountTableComponentToDistance2USSRV.h"
+#include "likelihoodComputation2USSRV.h"
+#include "someUtil.h"
+#include <vector>
+using namespace std;
+// @@@@ maybe should inherit from bblEM
+class bblEM2USSRV {
+public:
+ explicit bblEM2USSRV(tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ const ussrvModel &model,
+ const Vdouble * weights = NULL,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.001);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+
+private:
+ MDOUBLE compute_bblEM(int maxIterations,
+ MDOUBLE epsilon,
+ MDOUBLE tollForPairwiseDist);
+ void bblEM_it(MDOUBLE tollForPairwiseDist);
+ void computeDown(int pos);
+ void computeUp();
+ void addCounts(int pos);
+ void addCounts(int pos, tree::nodeP mynode, doubleRep posProb, MDOUBLE weig);
+ void optimizeBranches(MDOUBLE tollForPairwiseDist);
+ void allocatePlace();
+
+ MDOUBLE _treeLikelihood;
+ tree& _et;
+ const sequenceContainer& _sc;
+ const sequenceContainer& _baseSc;
+ const ussrvModel& _model;
+ vector<countTableComponentGam> _computeCountsBaseV; // for each node - a table of rate*alph*alph (see below)
+ vector<countTableComponentHom> _computeCountsSsrvV; // for each node - a table of rate*alph*alph (see below)
+ computePijGam _pijBase;
+ computePijHom _pijSSRV;
+ suffStatGlobalGam _cupBase;
+ suffStatGlobalHom _cupSSRV;
+ suffStatGlobalGamPos _cdownBase;
+ suffStatGlobalHomPos _cdownSSRV;
+ const Vdouble * _weights;
+ VdoubleRep _posLike;
+};
+
+// _computeCountsV is a vector containing for each node a countTableComponentGam.
+// countTableComponentGam is a vector containing for each rate category a table of size alphabet*alphabet
+// (VVdouble) which should be pre-filled with Pij(x,y,rk) from equation (17) in the EM-BBL theory summary.
+// Pij(x,y,rk) represents the probability of observing x and y along a branch ti at position j with rate from
+// category k.
+// For this reason, we need to initialize this class and calculate it again for every position.
+
+
+#endif // bblEM2USSRV
diff --git a/libs/phylogeny/bblEM2codon.cpp b/libs/phylogeny/bblEM2codon.cpp
new file mode 100644
index 0000000..35900aa
--- /dev/null
+++ b/libs/phylogeny/bblEM2codon.cpp
@@ -0,0 +1,165 @@
+// $Id: bblEM2codon.cpp 2350 2007-08-20 10:53:51Z adist $
+#include "bblEM2codon.h"
+#include "likelihoodComputation.h"
+#include "likelihoodComputation2Codon.h"
+#include "fromCountTableComponentToDistance2Codon.h"
+using namespace likelihoodComputation;
+using namespace likelihoodComputation2Codon;
+#include "computeUpAlg.h"
+#include "computeDownAlg.h"
+#include "computeCounts.h"
+#include "treeIt.h"
+#include "errorMsg.h"
+#include "logFile.h"
+#include <ctime>
+
+bblEM2codon::bblEM2codon(tree& et,
+ const sequenceContainer& sc,
+ const vector<stochasticProcess>& spVec,
+ const distribution *in_distr,
+ const Vdouble * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist) :
+ _et(et),_sc(sc),_spVec(spVec),_distr(in_distr->clone()),_weights (weights) {
+
+ LOG(5,<<"******BEGIN OF BBL EM*********"<<endl<<endl);
+ _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
+ LOG(5,<<"******END OF BBL EM*********"<<endl<<endl);
+}
+
+bblEM2codon::~bblEM2codon(){
+ delete _distr;
+ }
+
+MDOUBLE bblEM2codon::compute_bblEM(
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist){
+ allocatePlace();
+ MDOUBLE oldL=VERYSMALL;
+ MDOUBLE currL = VERYSMALL;
+ tree oldT = _et;
+ for (int i=0; i < maxIterations; ++i) {
+
+ computeUp();
+ //currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights);
+ currL = likelihoodComputation2Codon::getTreeLikelihoodFromUp2(_et,_sc,_spVec[0],_cup,_posLike,_distr,_weights);
+ //////////////
+ if (i!=0)
+ LOG(5,<<"last best L= "<<oldL<<endl);
+ LOG(5,<<"current best L= "<<currL<<endl<<endl);
+
+ //MDOUBLE checkUpLL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et, _sc, _sp, _weights);
+ //cerr << "checkUpLL = "<<checkUpLL <<" curll = "<<currL<<endl;
+ ///////////////
+
+ if (currL < oldL + epsilon) { // need to break
+ if (currL<oldL) {
+ _et = oldT;
+ return oldL; // keep the old tree, and old likelihood
+ } else {
+ //update the tree and likelihood and return
+ return currL;
+ }
+ }
+ oldT = _et;
+ bblEM_it(tollForPairwiseDist);
+ oldL = currL;
+ }
+ // in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
+ computeUp();
+ currL = likelihoodComputation2Codon::getTreeLikelihoodFromUp2(_et,_sc,_spVec[0],_cup,_posLike,_distr,_weights);
+ //currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights);
+ if (currL<oldL) {
+ _et = oldT;
+ return oldL; // keep the old tree, and old likelihood
+ }
+ else
+ return currL;
+}
+
+void bblEM2codon::allocatePlace() {
+ _computeCountsV.resize(_et.getNodesNum()); //initiateTablesOfCounts
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ _computeCountsV[i].countTableComponentAllocatePlace(_spVec[0].alphabetSize(),_distr->categories());
+ }
+ _cup.allocatePlace(_sc.seqLen(),_distr->categories(), _et.getNodesNum(), _sc.alphabetSize());
+ _cdown.allocatePlace(_distr->categories(), _et.getNodesNum(), _sc.alphabetSize());
+}
+
+void bblEM2codon::bblEM_it(const MDOUBLE tollForPairwiseDist){
+ int i;
+ for (i=0; i < _computeCountsV.size(); ++i) {
+ _computeCountsV[i].zero();
+ }
+ for (i=0; i < _sc.seqLen(); ++i) {
+ computeDown(i);
+ addCounts(i); // computes the counts and adds to the table.
+ }
+ optimizeBranches(tollForPairwiseDist);
+}
+
+void bblEM2codon::optimizeBranches(const MDOUBLE tollForPairwiseDist){
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ fromCountTableComponentToDistance2Codon from1(_computeCountsV[mynode->id()],_spVec,tollForPairwiseDist,mynode->dis2father());
+ from1.computeDistance();
+ mynode->setDisToFather(from1.getDistance());
+ }
+ }
+}
+
+void bblEM2codon::computeUp(){
+ //_pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
+ _pij._V.resize(_spVec.size());
+ for (int i=0; i < _spVec.size(); ++i) {
+ _pij._V[i].fillPij(_et,_spVec[i]);
+ }
+ computeUpAlg cupAlg;
+ for (int pos=0; pos < _sc.seqLen(); ++pos) {
+ for (int categor = 0; categor < _spVec.size(); ++categor) {
+ cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
+ }
+ }
+ }
+
+void bblEM2codon::computeDown(const int pos){
+ computeDownAlg cdownAlg;
+ for (int categor = 0; categor < _distr->categories(); ++categor) {
+ cdownAlg.fillComputeDown(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
+ }
+ }
+
+void bblEM2codon::addCounts(const int pos){
+ //MDOUBLE posProb =
+ // likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
+
+ MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
+ if (weig == 0) return;
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ addCounts(pos,mynode,_posLike[pos],weig);
+ }
+ }
+}
+
+void bblEM2codon::addCounts(const int pos, tree::nodeP mynode, const MDOUBLE posProb, const MDOUBLE weig){
+
+ computeCounts cc;
+ for (int categor =0; categor< _distr->categories(); ++ categor) {
+ cc.computeCountsNodeFatherNodeSonHomPos(_sc,
+ _pij[categor],
+ _spVec[categor],
+ _cup[pos][categor],
+ _cdown[categor],
+ weig,
+ posProb,
+ mynode,
+ _computeCountsV[mynode->id()][categor],
+ _distr->ratesProb(categor));
+ }
+}
+
diff --git a/libs/phylogeny/bblEM2codon.h b/libs/phylogeny/bblEM2codon.h
new file mode 100644
index 0000000..43c549a
--- /dev/null
+++ b/libs/phylogeny/bblEM2codon.h
@@ -0,0 +1,54 @@
+//copy of bblEM of the lib + changing to codon model
+#ifndef ___BBL_EM_2_CODON_H
+#define ___BBL_EM_2_CODON_H
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "countTableComponent.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include <vector>
+using namespace std;
+
+class bblEM2codon {
+public:
+ explicit bblEM2codon(tree& et,
+ const sequenceContainer& sc,
+ const vector<stochasticProcess> &spVec,
+ const distribution *in_distr,
+ const Vdouble * weights = NULL,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.001);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+ virtual ~bblEM2codon();
+private:
+ MDOUBLE compute_bblEM(const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist);
+ void bblEM_it(const MDOUBLE tollForPairwiseDist);
+ void computeDown(const int pos);
+ void computeUp();
+ void addCounts(const int pos);
+ void addCounts(const int pos, tree::nodeP mynode, const MDOUBLE posProb, const MDOUBLE weig);
+ void optimizeBranches(const MDOUBLE tollForPairwiseDist);
+ void allocatePlace();
+
+
+ MDOUBLE _treeLikelihood;
+ tree& _et;
+ const sequenceContainer& _sc;
+ const vector<stochasticProcess>& _spVec;
+ const distribution *_distr;
+ vector<countTableComponentGam> _computeCountsV; // for each node - a table of rate*alph*alph
+ computePijGam _pij;
+ suffStatGlobalGam _cup;
+ suffStatGlobalGamPos _cdown;
+ const Vdouble * _weights;
+ Vdouble _posLike;
+
+};
+
+#endif
diff --git a/libs/phylogeny/bblEMProportional.h b/libs/phylogeny/bblEMProportional.h
new file mode 100644
index 0000000..005a306
--- /dev/null
+++ b/libs/phylogeny/bblEMProportional.h
@@ -0,0 +1,50 @@
+// $Id: bblEMProportional.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___BBL_EM_PROPORTIONAL_H
+#define ___BBL_EM_PROPORTIONAL_H
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+
+#include <vector>
+using namespace std;
+
+
+class bblEMProportional {
+public:
+ explicit bblEMProportional(tree& et,
+ const vector<sequenceContainer>& sc,
+ const vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights = NULL,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.0001);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+
+private:
+ MDOUBLE compute_bblEMProp(const int maxIterations,const MDOUBLE epsilon,const MDOUBLE tollForPairwiseDist);
+ void allocatePlaceProp();
+ void computeUpProp();
+ void bblEM_itProp(const MDOUBLE tollForPairwiseDist);
+ void computeDownProp(const int gene, const int pos);
+ void addCountsProp(const int gene, const int pos);
+ void addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
+ void optimizeBranchesProp(const MDOUBLE tollForPairwiseDist);
+
+ MDOUBLE _treeLikelihood;
+ tree& _et;
+ const vector<sequenceContainer>& _sc;
+ const vector<stochasticProcess>& _sp;
+ const vector<Vdouble *> * _weights;
+ int _numberOfGenes;
+ vector< vector<countTableComponentGam> > _computeCountsV; // for each gene, for each node - a table of rate*alph*alph
+ vector<suffStatGlobalGam> _cup;
+ vector<suffStatGlobalGamPos> _cdown;
+ vector<computePijGam> _pij;
+ VVdoubleRep _posLike;
+
+
+};
+
+#endif
diff --git a/libs/phylogeny/bblEMProprtional.cpp b/libs/phylogeny/bblEMProprtional.cpp
new file mode 100644
index 0000000..f6c6a6c
--- /dev/null
+++ b/libs/phylogeny/bblEMProprtional.cpp
@@ -0,0 +1,142 @@
+// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "bblEM.h"
+#include "bblEMProportional.h"
+#include "likelihoodComputation.h"
+using namespace likelihoodComputation;
+#include "computeUpAlg.h"
+#include "computeDownAlg.h"
+#include "computeCounts.h"
+#include "treeIt.h"
+#include "fromCountTableComponentToDistance.h"
+#include <ctime>//#define VERBOS
+#include "fromCountTableComponentToDistanceProp.h"
+
+bblEMProportional::bblEMProportional(tree& et,
+ const vector<sequenceContainer>& sc,
+ const vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist):
+
+_et(et),_sc(sc),_sp(sp),_weights (weights) {
+ _numberOfGenes = _sc.size();
+ assert(_sp.size() == _sc.size());
+ _treeLikelihood = compute_bblEMProp(maxIterations,epsilon,tollForPairwiseDist);
+}
+
+MDOUBLE bblEMProportional::compute_bblEMProp(
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist){
+ allocatePlaceProp();
+ MDOUBLE oldL=VERYSMALL;
+ MDOUBLE currL = VERYSMALL;
+ for (int i=0; i < maxIterations; ++i) {
+ computeUpProp();
+ currL = 0;
+ for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
+ currL += likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],_sp[geneN],_cup[geneN],_posLike[geneN],(_weights?(*_weights)[geneN]:NULL));
+ }
+ tree oldT = _et;
+ if (currL < oldL + epsilon) { // need to break
+ if (currL<oldL) {
+ _et = oldT;
+ return oldL; // keep the old tree, and old likelihood
+ } else {
+ //update the tree and likelihood and return
+ return currL;
+ }
+ }
+ bblEM_itProp(tollForPairwiseDist);
+ oldL = currL;
+ }
+ return currL;
+}
+
+void bblEMProportional::allocatePlaceProp() {
+ _computeCountsV.resize(_numberOfGenes);
+ _cup.resize(_numberOfGenes);
+ _cdown.resize(_numberOfGenes);
+ _pij.resize(_numberOfGenes);
+ _posLike.resize(_numberOfGenes);
+ for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
+ _computeCountsV[geneN].resize(_et.getNodesNum()); //initiateTablesOfCounts
+ for (int i=0; i < _computeCountsV[geneN].size(); ++i) {
+ _computeCountsV[geneN][i].countTableComponentAllocatePlace(_sp[geneN].alphabetSize(),_sp[geneN].categories());
+ }
+ _cup[geneN].allocatePlace(_sc[geneN].seqLen(),_sp[geneN].categories(), _et.getNodesNum(), _sc[geneN].alphabetSize());
+ _cdown[geneN].allocatePlace(_sp[geneN].categories(), _et.getNodesNum(), _sc[geneN].alphabetSize());
+ }
+}
+
+void bblEMProportional::computeUpProp(){
+ for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
+ _pij[geneN].fillPij(_et,_sp[geneN],0); // 0 is becaues we compute Pij(t) and not its derivations...
+ computeUpAlg cupAlg;
+ for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
+ for (int categor = 0; categor < _sp[geneN].categories(); ++categor) {
+ cupAlg.fillComputeUp(_et,_sc[geneN],pos,_pij[geneN][categor],_cup[geneN][pos][categor]);
+ }
+ }
+ }
+ }
+
+void bblEMProportional::bblEM_itProp(const MDOUBLE tollForPairwiseDist){
+ for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ _computeCountsV[geneN][i].zero();
+ }
+ for (int i=0; i < _sc[geneN].seqLen(); ++i) {
+ computeDownProp(geneN,i);
+ addCountsProp(geneN,i); // computes the counts and adds to the table.
+ }
+ }
+ optimizeBranchesProp(tollForPairwiseDist);
+}
+
+void bblEMProportional::computeDownProp(const int gene, const int pos){
+ computeDownAlg cdownAlg;
+ for (int categor = 0; categor < _sp[gene].categories(); ++categor) {
+ cdownAlg.fillComputeDown(_et,_sc[gene],pos,_pij[gene][categor],_cdown[gene][categor],_cup[gene][pos][categor]);
+ }
+}
+
+void bblEMProportional::addCountsProp(const int gene, const int pos){
+ vector<MDOUBLE> * weightsOfGene = (_weights?(*_weights)[gene]:NULL);
+ MDOUBLE weig = (weightsOfGene ? (*weightsOfGene)[pos] : 1.0);
+ if (weig == 0) return;
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ addCountsProp(gene,pos,mynode,_posLike[gene][pos],weig);
+ }
+ }
+}
+
+void bblEMProportional::addCountsProp(const int gene,const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
+ computeCounts cc;
+ for (int categor =0; categor< _sp[gene].categories(); ++ categor) {
+ cc.computeCountsNodeFatherNodeSonHomPos(_sc[gene],
+ _pij[gene][categor],
+ _sp[gene],
+ _cup[gene][pos][categor],
+ _cdown[gene][categor],
+ weig,
+ posProb,
+ mynode,
+ _computeCountsV[gene][mynode->id()][categor],
+ _sp[gene].ratesProb(categor));
+ }
+}
+
+void bblEMProportional::optimizeBranchesProp(const MDOUBLE tollForPairwiseDist){
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ fromCountTableComponentToDistanceProp from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father());
+ from1.computeDistance();
+ mynode->setDisToFather(from1.getDistance());
+ }
+ }
+}
diff --git a/libs/phylogeny/bblEMSeperate.cpp b/libs/phylogeny/bblEMSeperate.cpp
new file mode 100644
index 0000000..d544d80
--- /dev/null
+++ b/libs/phylogeny/bblEMSeperate.cpp
@@ -0,0 +1,28 @@
+// $Id: bblEMSeperate.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "bblEM.h"
+#include "bblEMSeperate.h"
+#include "logFile.h"
+//#define VERBOS
+
+
+bblEMSeperate::bblEMSeperate(vector<tree>& et,
+ const vector<sequenceContainer>& sc,
+ const vector<stochasticProcess> &sp,
+ const vector<Vdouble *> * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist) {
+ MDOUBLE newL =0;
+ for (int i=0; i < et.size(); ++i) {
+ #ifdef VERBOS
+ LOG(5,<<" OPTIMIZING GENE "<<i<<" ... "<<endl);
+ #endif
+ bblEM bblEM1(et[i],sc[i],sp[i],(weights?(*weights)[i]:NULL),maxIterations,epsilon);
+ MDOUBLE resTmp = bblEM1.getTreeLikelihood();
+ #ifdef VERBOS
+ LOG(5,<<" GENE "<<i<<" LOG-L = "<< resTmp<<endl);
+ #endif
+ newL += resTmp;
+ }
+ _treeLikelihood = newL;
+}
diff --git a/libs/phylogeny/bblEMSeperate.h b/libs/phylogeny/bblEMSeperate.h
new file mode 100644
index 0000000..22ba3fd
--- /dev/null
+++ b/libs/phylogeny/bblEMSeperate.h
@@ -0,0 +1,30 @@
+// $Id: bblEMSeperate.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___BBL_EM_SEPERATE_H
+#define ___BBL_EM_SEPERATE_H
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+
+#include <vector>
+using namespace std;
+
+
+class bblEMSeperate {
+public:
+ explicit bblEMSeperate(vector<tree>& et,
+ const vector<sequenceContainer>& sc,
+ const vector<stochasticProcess> &sp,
+ const vector<Vdouble *> * weights,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.0001);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+
+private:
+ MDOUBLE _treeLikelihood;
+
+};
+
+#endif
diff --git a/libs/phylogeny/bblEMfixRoot.cpp b/libs/phylogeny/bblEMfixRoot.cpp
new file mode 100644
index 0000000..7dc22a8
--- /dev/null
+++ b/libs/phylogeny/bblEMfixRoot.cpp
@@ -0,0 +1,175 @@
+// $Id: bblEM.cpp 4478 2008-07-17 17:09:55Z cohenofi $
+#include "bblEMfixRoot.h"
+#include "likelihoodComputation.h"
+using namespace likelihoodComputation;
+#include "computeUpAlg.h"
+#include "computeDownAlg.h"
+#include "computeCounts.h"
+#include "treeIt.h"
+#include "fromCountTableComponentToDistancefixRoot.h"
+#include <ctime>
+
+bblEMfixRoot::bblEMfixRoot(tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist,
+ unObservableData* unObservableData_p) :
+_et(et),_sc(sc),_sp(sp),_weights (weights),_unObservableData_p(unObservableData_p)
+{
+ //if(!plogLforMissingData){
+ // _plogLforMissingData = NULL;
+ //}
+ _treeLikelihood = compute_bblEM(maxIterations,epsilon,tollForPairwiseDist);
+}
+
+
+MDOUBLE bblEMfixRoot::compute_bblEM(
+ const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist){
+ allocatePlace();
+ MDOUBLE oldL=VERYSMALL;
+ MDOUBLE currL = VERYSMALL;
+ tree oldT = _et;
+ for (int i=0; i < maxIterations; ++i) {
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ computeUp();
+ currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights,_unObservableData_p);
+ oldT = _et;
+ if (currL < oldL + epsilon) { // need to break
+ if (currL<=oldL) {
+ _et = oldT;
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ return oldL; // keep the old tree, and old likelihood
+ } else {
+ //update the tree and likelihood and return
+ return currL;
+ }
+ }
+ bblEM_it(tollForPairwiseDist);
+ oldL = currL;
+ }
+ // in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
+ computeUp();
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ currL = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc,_sp,_cup,_posLike,_weights, _unObservableData_p);
+ //////////////
+ //MDOUBLE checkUpLL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et, _sc, _sp, _weights, _plogLforMissingData);
+ //LOGnOUT(4, << "checkUpLL = "<<checkUpLL <<" curll = "<<currL<<endl);
+ ///////////////
+
+ if (currL<=oldL)
+ {
+ _et = oldT;
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ return oldL; // keep the old tree, and old likelihood
+ }
+ else
+ return currL;
+}
+
+void bblEMfixRoot::allocatePlace() {
+ _computeCountsV.resize(_et.getNodesNum());//initiateTablesOfCounts
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ {
+ _computeCountsV[i].resize(_sp.alphabetSize());
+ for (int letterAtRoot = 0; letterAtRoot < _computeCountsV[0].size(); ++letterAtRoot)
+ _computeCountsV[i][letterAtRoot].countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories());
+ //_computeCountsV[i][letterAtRoot].zero();
+ }
+ }
+ _cup.allocatePlace(_sc.seqLen(),_sp.categories(), _et.getNodesNum(), _sc.alphabetSize());
+ _cdown.resize(_sp.categories());
+ for (int categor = 0; categor < _sp.categories(); ++categor)
+ {
+ _cdown[categor].allocatePlace(_sp.alphabetSize(), _et.getNodesNum(), _sc.alphabetSize());
+ }
+}
+
+void bblEMfixRoot::bblEM_it(const MDOUBLE tollForPairwiseDist){
+ for (int i=0; i < _computeCountsV.size(); ++i) {
+ for (int j=0; j < _computeCountsV[0].size(); ++j) {
+ _computeCountsV[i][j].zero();
+ }
+ }
+ for (int i=0; i < _sc.seqLen(); ++i) {
+ computeDown(i);
+ addCounts(i); // computes the counts and adds to the table.
+ }
+ optimizeBranches(tollForPairwiseDist);
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+}
+
+void bblEMfixRoot::optimizeBranches(const MDOUBLE tollForPairwiseDist){
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ fromCountTableComponentToDistancefixRoot from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father(),_unObservableData_p);
+ from1.computeDistance();
+ mynode->setDisToFather(from1.getDistance());
+ if(_unObservableData_p)
+ _unObservableData_p->setLforMissingData(_et,&_sp);
+ }
+ }
+}
+
+void bblEMfixRoot::computeUp(){
+ _pij.fillPij(_et,_sp,0); // 0 is becaues we compute Pij(t) and not its derivations...
+ computeUpAlg cupAlg;
+ for (int pos=0; pos < _sc.seqLen(); ++pos) {
+ for (int categor = 0; categor < _sp.categories(); ++categor) {
+ cupAlg.fillComputeUp(_et,_sc,pos,_pij[categor],_cup[pos][categor]);
+ }
+ }
+ }
+
+void bblEMfixRoot::computeDown(const int pos){
+ computeDownAlg cdownAlg;
+ for (int categor = 0; categor < _sp.categories(); ++categor) {
+ cdownAlg.fillComputeDownNonReversible(_et,_sc,pos,_pij[categor],_cdown[categor],_cup[pos][categor]);
+ }
+ }
+
+void bblEMfixRoot::addCounts(const int pos){
+ //MDOUBLE posProb =
+ // likelihoodComputation::getProbOfPosWhenUpIsFilledGam(pos,_et,_sc,_sp,_cup);
+
+ MDOUBLE weig = (_weights ? (*_weights)[pos] : 1.0);
+ if (weig == 0) return;
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!tIt->isRoot()) {
+ addCountsFixedRoot(pos,mynode,_posLike[pos],weig);
+ }
+ }
+}
+
+void bblEMfixRoot::addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig){
+
+ computeCounts cc;
+ for(int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); letterAtRoot++)
+ {
+ for (int categor =0; categor< _sp.categories(); ++ categor)
+ {
+ cc.computeCountsNodeFatherNodeSonHomPos(_sc,
+ _pij[categor],
+ _sp,
+ _cup[pos][categor],
+ _cdown[categor][letterAtRoot],
+ weig,
+ posProb,
+ mynode,
+ _computeCountsV[mynode->id()][letterAtRoot][categor],
+ _sp.ratesProb(categor),
+ letterAtRoot); // letterInFather is used - FixedRoot version
+ }
+ }
+}
diff --git a/libs/phylogeny/bblEMfixRoot.h b/libs/phylogeny/bblEMfixRoot.h
new file mode 100644
index 0000000..49faf09
--- /dev/null
+++ b/libs/phylogeny/bblEMfixRoot.h
@@ -0,0 +1,60 @@
+// $Id: bblEM.h 4478 2008-07-17 17:09:55Z cohenofi $
+#ifndef ___BBL_EM_GL__FIXED_ROOT
+#define ___BBL_EM_GL__FIXED_ROOT
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "countTableComponent.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "gainLossAlphabet.h"
+#include "unObservableData.h"
+#include <vector>
+
+using namespace std;
+
+class bblEMfixRoot {
+public:
+ explicit bblEMfixRoot(tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights = NULL,
+ const int maxIterations=50,
+ const MDOUBLE epsilon=0.05,
+ const MDOUBLE tollForPairwiseDist=0.001,
+ unObservableData* _unObservableData_p=NULL);
+ MDOUBLE getTreeLikelihood() const {return _treeLikelihood;}
+
+private:
+ MDOUBLE compute_bblEM(const int maxIterations,
+ const MDOUBLE epsilon,
+ const MDOUBLE tollForPairwiseDist);
+ void bblEM_it(const MDOUBLE tollForPairwiseDist);
+ void computeDown(const int pos);
+ void computeUp();
+ void addCounts(const int pos);
+ void addCountsFixedRoot(const int pos, tree::nodeP mynode, const doubleRep posProb, const MDOUBLE weig);
+
+ void optimizeBranches(const MDOUBLE tollForPairwiseDist);
+ void allocatePlace();
+
+
+
+ MDOUBLE _treeLikelihood;
+ tree& _et;
+ const sequenceContainer& _sc;
+ const stochasticProcess& _sp;
+ //vector<countTableComponentGam> _computeCountsV; // for each node - a table of rate*alph*alph
+ vector< vector< countTableComponentGam > > _computeCountsV; // _computeCountsV[node][letterAtRoot][rate][alph][alph]
+ computePijGam _pij;
+ suffStatGlobalGam _cup; //_cup[pos][categ][nodeid][letter][prob]
+ //suffStatGlobalGamPos _cdown; // foreach pos: computeDown(pos); addCounts(pos);
+ vector<suffStatGlobalGamPos> _cdown; //_cdown[categ][letter at root][nodeid][letter][prob] - since fillComputeDownNonReversible uses this assumption
+ const Vdouble * _weights;
+ VdoubleRep _posLike;
+ unObservableData* _unObservableData_p;
+};
+
+#endif
diff --git a/libs/phylogeny/bestAlpha.cpp b/libs/phylogeny/bestAlpha.cpp
new file mode 100644
index 0000000..9142cad
--- /dev/null
+++ b/libs/phylogeny/bestAlpha.cpp
@@ -0,0 +1,301 @@
+// $Id: bestAlpha.cpp 5786 2009-01-19 22:22:48Z rubi $
+
+#include <iostream>
+using namespace std;
+
+#include "bestAlpha.h"
+#include "bblEM.h"
+#include "numRec.h"
+#include "logFile.h"
+#include "errorMsg.h"
+
+#ifndef VERBOS
+#define VERBOS
+#endif
+//void bestAlpha::checkAllocation() {
+// if (_pi->stocProcessFromLabel(0)->getPijAccelerator() == NULL) {
+// errorMsg::reportError(" error in function findBestAlpha");
+// }
+//}
+//
+// @@@@ The method works with oldL,oldA,bestA and newL,newA.
+// Only when it's about to end, the members _bestAlpha and _bestL are filled.
+
+bestAlphaAndBBL::bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE initAlpha,
+ const MDOUBLE upperBoundOnAlpha,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBBL,
+ const int maxBBLIterations,
+ const int maxTotalIterations){
+// LOG(5,<<"find Best Alpha and best BBL"<<endl);
+// LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
+// brLenOpt br1(*et,*pi,weights);
+
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ const MDOUBLE bx=initAlpha;
+ const MDOUBLE ax=0;
+ const MDOUBLE cx=upperBoundOnAlpha;
+//
+ MDOUBLE bestA=0;
+ MDOUBLE oldA=0;
+ int i=0;
+ for (i=0; i < maxTotalIterations; ++i) {
+ newL = -brent(ax,bx,cx,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonLoglikelihoodForAlphaOptimization,
+ &bestA);
+
+#ifdef VERBOS
+ LOG(5,<<"# bestAlphaAndBBL::bestAlphaAndBBL iteration " << i <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new Alpha = " << bestA << endl);
+#endif
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ oldL = newL;
+ oldA = bestA;
+ } else {
+ oldL = newL;
+ oldA = bestA;
+
+
+ _bestL = oldL;
+ _bestAlpha= oldA;
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ break;
+ }
+
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
+ newL =bblEM1.getTreeLikelihood();
+#ifdef VERBOS
+ LOG(5,<<"# bestAlphaAndBBL::bestAlphaAndBBL iteration " << i <<endl
+ <<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
+ <<"# The tree:" );
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ oldL = newL;
+ }
+ else {
+ oldL=newL;
+ _bestL = oldL;
+ _bestAlpha= oldA;
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ break;
+ }
+ }
+ if (i==maxTotalIterations) {
+ _bestL = newL;
+ _bestAlpha= bestA;
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ }
+}
+
+bestBetaAndBBL::bestBetaAndBBL(tree& et, //find Best Alpha and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE initBeta,
+ const MDOUBLE upperBoundOnBeta,
+ const MDOUBLE epsilonLoglikelihoodForBetaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBBL,
+ const int maxBBLIterations,
+ const int maxTotalIterations){
+// LOG(5,<<"find Best Beta and best BBL"<<endl);
+// LOG(5,<<" 1. bestBetaa::findBestBeta"<<endl);
+// brLenOpt br1(*et,*pi,weights);
+
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ const MDOUBLE bx=initBeta;
+ const MDOUBLE ax=0;
+ const MDOUBLE cx=upperBoundOnBeta;
+//
+ MDOUBLE bestB=0;
+ MDOUBLE oldB=0;
+ int i=0;
+ for (i=0; i < maxTotalIterations; ++i) {
+ newL = -brent(ax,bx,cx,
+ C_evalBeta(et,sc,sp,weights),
+ epsilonLoglikelihoodForBetaOptimization,
+ &bestB);
+
+#ifdef VERBOS
+ LOG(5,<<"# bestBetaAndBBL::bestBetaAndBBL iteration " << i <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new Beta = " << bestB << endl);
+#endif
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ oldL = newL;
+ oldB = bestB;
+ } else {
+ oldL = newL;
+ oldB = bestB;
+
+
+ _bestL = oldL;
+ _bestBeta= oldB;
+ (static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
+ break;
+ }
+
+ (static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
+ newL =bblEM1.getTreeLikelihood();
+#ifdef VERBOS
+ LOG(5,<<"# bestBetaAndBBL::bestBetaAndBBL iteration " << i <<endl
+ <<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
+ <<"# The tree:" );
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ oldL = newL;
+ }
+ else {
+ oldL=newL;
+ _bestL = oldL;
+ _bestBeta= oldB;
+ (static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
+ break;
+ }
+ }
+ if (i==maxTotalIterations) {
+ _bestL = newL;
+ _bestBeta= bestB;
+ (static_cast<gammaDistribution*>(sp.distr()))->setBeta(bestB);
+ }
+}
+
+bestAlphaFixedTree::bestAlphaFixedTree(const tree& et, //findBestAlphaFixedTree
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnAlpha,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization){
+ //LOG(5,<<"findBestAlphaFixedTree"<<endl);
+ MDOUBLE bestA=0;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ const MDOUBLE bx=static_cast<gammaDistribution*>(sp.distr())->getAlpha();
+ const MDOUBLE ax=0.0;
+
+
+ _bestL = -brent(ax,bx,cx,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonLoglikelihoodForAlphaOptimization,
+ &bestA);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ _bestAlpha= bestA;
+}
+
+bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE initAlpha,
+ const MDOUBLE initBeta,
+ const MDOUBLE upperBoundOnAlpha,
+ const MDOUBLE upperBoundOnBeta,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBetaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBBL,
+ const int maxBBLIterations,
+ const int maxTotalIterations){
+// LOG(5,<<"find Best Alpha and Beta and best BBL"<<endl);
+// LOG(5,<<" 1. bestAlphaAndBetaAndBBL::findBestAlphaAndBeta"<<endl);
+// brLenOpt br1(*et,*pi,weights);
+
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ MDOUBLE bx=initAlpha;
+ const MDOUBLE ax=0;
+ const MDOUBLE cx=upperBoundOnAlpha;
+ MDOUBLE ex=initBeta;
+ const MDOUBLE dx=0;
+ const MDOUBLE fx=upperBoundOnBeta;
+ bool optimize = false;
+
+//
+ MDOUBLE bestA=0;
+ MDOUBLE oldA=0;
+ MDOUBLE bestB=0;
+ MDOUBLE oldB=0;
+ int i=0;
+ for (i=0; i < maxTotalIterations; ++i) {
+//optimize alpha
+ newL = -brent(ax,bx,cx,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonLoglikelihoodForAlphaOptimization,
+ &bestA);
+ bx = bestA;
+
+#ifdef VERBOS
+ LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new Alpha = " << bestA << endl);
+#endif
+ if(newL < oldL)
+ errorMsg::reportError("likelihood decreased in alhpa optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
+ oldL = newL;
+ oldA = bestA;
+ _bestL = newL;
+ _bestAlpha= bestA;
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ optimize = true;
+ }
+ (static_cast<generalGammaDistribution*>(sp.distr()))->setAlpha(bestA);
+
+//optimize beta
+ newL = -brent(dx,ex,fx,
+ C_evalBeta(et,sc,sp,weights),
+ epsilonLoglikelihoodForBetaOptimization,
+ &bestB);
+ ex = bestB;
+
+#ifdef VERBOS
+ LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new Beta = " << bestB << endl);
+#endif
+ if(newL < oldL)
+ errorMsg::reportError("likelihood decreased in beta optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
+ oldL = newL;
+ oldB = bestB;
+ _bestL = oldL;
+ _bestBeta= oldB;
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ optimize = true;
+ }
+ (static_cast<generalGammaDistribution*>(sp.distr()))->setBeta(bestB);
+
+//bblEM
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
+ newL =bblEM1.getTreeLikelihood();
+#ifdef VERBOS
+ LOG(5,<<"# bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL iteration " << i <<endl
+ <<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
+ <<"# The tree:" );
+ LOGDO(5,et.output(myLog::LogFile()));
+#endif
+ if(newL < oldL)
+ errorMsg::reportError("likelihood decreased in bbl optimization step in bestAlphaAndBetaAndBBL::bestAlphaAndBetaAndBBL");
+ oldL = newL;
+ _bestL = newL;
+ if (newL > oldL+epsilonLoglikelihoodForBBL) {
+ optimize = true;
+ }
+ if (!optimize)
+ break;
+ }
+}
+
diff --git a/libs/phylogeny/bestAlpha.h b/libs/phylogeny/bestAlpha.h
new file mode 100644
index 0000000..b7aeb6d
--- /dev/null
+++ b/libs/phylogeny/bestAlpha.h
@@ -0,0 +1,155 @@
+// $Id: bestAlpha.h 5786 2009-01-19 22:22:48Z rubi $
+
+#ifndef ___BEST_ALPHA
+#define ___BEST_ALPHA
+
+#include "definitions.h"
+
+#include "likelihoodComputation.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "logFile.h"
+
+#ifndef VERBOS
+#define VERBOS
+#endif
+
+class bestAlphaFixedTree {
+public:
+ explicit bestAlphaFixedTree(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnAlpha = 15,
+ const MDOUBLE epsilonAlphaOptimization = 0.01);
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+class bestAlphaAndBBL {
+public:
+ explicit bestAlphaAndBBL(tree& et, //find Best Alpha and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE initAlpha = 1.5,
+ const MDOUBLE upperBoundOnAlpha = 5.0,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
+ const int maxBBLIterations=10,
+ const int maxTotalIterations=5);
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+class bestBetaAndBBL {
+public:
+ explicit bestBetaAndBBL(tree& et, //find Best Beta and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE initBeta = 1.5,
+ const MDOUBLE upperBoundOnBeta = 5.0,
+ const MDOUBLE epsilonLoglikelihoodForBetaOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
+ const int maxBBLIterations=10,
+ const int maxTotalIterations=5);
+ MDOUBLE getBestBeta() {return _bestBeta;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestBeta;
+ MDOUBLE _bestL;
+};
+
+class bestAlphaAndBetaAndBBL {
+public:
+ explicit bestAlphaAndBetaAndBBL(tree& et, //find Best Alpha and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE initAlpha = 1.5,
+ const MDOUBLE initBeta = 1.5,
+ const MDOUBLE upperBoundOnAlpha = 5.0,
+ const MDOUBLE upperBoundOnBeta = 5.0,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBetaOptimization = 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBBL= 0.05,
+ const int maxBBLIterations=10,
+ const int maxTotalIterations=5);
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestBeta() {return _bestBeta;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestBeta;
+ MDOUBLE _bestL;
+};
+
+
+class C_evalAlpha{
+public:
+ C_evalAlpha( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ : _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE alpha) {
+ if (_sp.categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize alpha");
+ }
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ //LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
+#ifdef VERBOS
+ LOG(7,<<" while in brent: with alpha = "<<alpha<<" logL = "<<res<<endl);
+#endif
+ return -res;
+ }
+};
+
+class C_evalBeta{
+public:
+ C_evalBeta( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ : _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE beta) {
+ if (_sp.categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize beta");
+ }
+ (static_cast<generalGammaDistribution*>(_sp.distr()))->setBeta(beta);
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ //LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
+#ifdef VERBOS
+ LOG(7,<<" while in brent: with beta = "<<beta<<" logL = "<<res<<endl);
+#endif
+ return -res;
+ }
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/bestAlphaAndK.cpp b/libs/phylogeny/bestAlphaAndK.cpp
new file mode 100644
index 0000000..3facae3
--- /dev/null
+++ b/libs/phylogeny/bestAlphaAndK.cpp
@@ -0,0 +1,262 @@
+#include "bestAlphaAndK.h"
+#include "computePijComponent.h"
+#include "betaOmegaDistribution.h"
+#include "codonUtils.h"
+
+
+optimizeSelectonParameters::optimizeSelectonParameters(tree& et, //find Best params and best BBL
+ const sequenceContainer& sc,
+ vector<stochasticProcess>& spVec,
+ distribution * distr,
+ bool bblFlag,
+ bool isGamma, bool isBetaProbSet,bool isOmegaSet,
+ bool isKappaSet, bool isAlphaSet, bool isBetaSet,
+ const MDOUBLE upperBoundOnAlpha,
+ const MDOUBLE upperBoundOnBeta,
+ const MDOUBLE epsilonAlphaOptimization,
+ const MDOUBLE epsilonKOptimization,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const int maxBBLIterations,
+ const int maxTotalIterations){
+ //initialization
+ MDOUBLE lowerValueOfParamK = 0;
+ MDOUBLE lowerValueOfParamAlpha = 0.1;
+ MDOUBLE lowerValueOfParamBeta = 0.1;
+ MDOUBLE omegaLowerBoundary = 0.99; // this is to allow brent to reach the exact lower bound value
+ MDOUBLE omegaUpperBoundary = 5.0;
+ MDOUBLE upperValueOfParamK = 5; // changed from 50, Adi S. 2/1/07
+
+ MDOUBLE initialGuessValueOfParamTr;
+ initialGuessValueOfParamTr = _bestK = static_cast<wYangModel*>(spVec[0].getPijAccelerator()->getReplacementModel())->getK();
+
+ MDOUBLE initialGuessValueOfParamAlpha;
+ if (isGamma) initialGuessValueOfParamAlpha = _bestAlpha = static_cast<generalGammaDistribution*>(distr)->getAlpha();
+ else initialGuessValueOfParamAlpha = _bestAlpha = static_cast<betaOmegaDistribution*>(distr)->getAlpha();
+
+ MDOUBLE initialGuessValueOfParamBeta;
+ if (isGamma) initialGuessValueOfParamBeta = _bestBeta = static_cast<generalGammaDistribution*>(distr)->getBeta();
+ else initialGuessValueOfParamBeta = _bestBeta = static_cast<betaOmegaDistribution*>(distr)->getBeta();
+
+ MDOUBLE initialGuessValueOfParamOmega = -1;
+ MDOUBLE initialGuessValueOfParamBetaProb = -1;
+ if (!isGamma) {
+ initialGuessValueOfParamOmega = _bestOmega = static_cast<betaOmegaDistribution*>(distr)->getOmega();
+ initialGuessValueOfParamBetaProb = _bestBetaProb = static_cast<betaOmegaDistribution*>(distr)->getBetaProb();
+ }
+ _bestL = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(et,sc,spVec,distr);;
+ MDOUBLE newL = _bestL;
+
+ MDOUBLE alphaFound = 0;
+ MDOUBLE kFound = 0;
+ MDOUBLE betaFound = 0;
+ MDOUBLE omegaFound = 0;
+ MDOUBLE betaProbFound = 0;
+ bool changed = false;
+ int i=0;
+ LOG(5,<<endl<<"Beginning optimization of parameters"<<endl<<endl);
+
+ for (i=0; i < maxTotalIterations; ++i) {
+ LOG(5,<<"Iteration Number= " << i <<endl);
+ LOG(5,<<"---------------------"<<endl);
+ cout<<"Iteration number = "<< i <<endl;
+ alphaFound = omegaFound = betaProbFound = kFound = betaFound=0;
+ changed = false;
+//ALPHA (beta or gamma distribution parameter)
+ if (!isAlphaSet){
+ if (isGamma) initialGuessValueOfParamAlpha = static_cast<generalGammaDistribution*>(distr)->getAlpha();
+ else initialGuessValueOfParamAlpha = static_cast<betaOmegaDistribution*>(distr)->getAlpha();
+ newL = -brent(lowerValueOfParamAlpha,
+ initialGuessValueOfParamAlpha,
+ upperBoundOnAlpha,
+ evalParam(et,sc,spVec,-1,distr,isGamma),epsilonAlphaOptimization,&alphaFound);
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After alpha= " << newL<<endl);
+ LOG(5,<<"new alpha = " <<alphaFound<<endl<<endl);
+
+
+ if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
+ _bestL = newL;
+ _bestAlpha = alphaFound;
+ if (isGamma) static_cast<generalGammaDistribution*>(distr)->setAlpha(alphaFound);
+ else static_cast<betaOmegaDistribution*>(distr)->setAlpha(alphaFound);
+ for (int categor = 0; categor < spVec.size();categor++)
+ static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setW(distr->rates(categor));
+ normalizeMatrices(spVec,distr);
+ changed = true;
+ }
+ }
+//BETA (beta distribution parameter)
+ if (!isBetaSet) {
+ if (isGamma) initialGuessValueOfParamBeta = static_cast<generalGammaDistribution*>(distr)->getBeta();
+ else initialGuessValueOfParamBeta = static_cast<betaOmegaDistribution*>(distr)->getBeta();
+ newL = -brent(lowerValueOfParamBeta,
+ initialGuessValueOfParamBeta,
+ upperBoundOnBeta,
+ evalParam(et,sc,spVec,-2,distr,isGamma),epsilonAlphaOptimization,&betaFound);
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After beta= " << newL<<endl);
+ LOG(5,<<"new beta = " <<betaFound<<endl<<endl);
+
+ if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
+ _bestL = newL;
+ _bestBeta = betaFound;
+ if (isGamma) static_cast<generalGammaDistribution*>(distr)->setBeta(betaFound);
+ else static_cast<betaOmegaDistribution*>(distr)->setBeta(betaFound);
+ for (int categor = 0; categor < spVec.size();categor++)
+ static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setW(distr->rates(categor));
+ normalizeMatrices(spVec,distr);
+ changed = true;
+ }
+ }
+//K parameter
+ if (!isKappaSet){
+ initialGuessValueOfParamTr = static_cast<wYangModel*>(spVec[0].getPijAccelerator()->getReplacementModel())->getK();
+ newL = -brent(lowerValueOfParamK, //optimaize Tr
+ initialGuessValueOfParamTr,
+ upperValueOfParamK,
+ evalParam(et,sc,spVec,0,distr,isGamma),epsilonKOptimization,&kFound);
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After kappa= " << newL<<endl);
+ LOG(5,<<"new kappa = " <<kFound<<endl);
+
+ if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood and model.
+ _bestL = newL;
+ _bestK = kFound;
+ for (int categor = 0; categor < spVec.size();categor++)
+ static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->setK(kFound);
+ normalizeMatrices(spVec,distr);
+ changed = true;
+ }
+ }
+//beta distribution part (betaProb and additional omega)
+ if (isGamma==false && !isBetaProbSet){ //optimize beta probs
+ if (!isOmegaSet){ // optimize omega (M8 or M8b)
+ MDOUBLE omegaFound;
+ newL = -brent(omegaLowerBoundary,
+ initialGuessValueOfParamOmega,
+ omegaUpperBoundary,
+ evalParam(et,sc,spVec,1,distr,isGamma),0.01,&omegaFound);
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After additional omega caetgory = " << newL<<endl);
+ LOG(5,<<"new additional omega caetgory = " <<omegaFound<<endl<<endl);
+
+ if (newL > _bestL+epsilonLikelihoodImprovment ) {
+ _bestL = newL;
+ _bestOmega = omegaFound;
+ static_cast<betaOmegaDistribution*>(distr)->setOmega(omegaFound);
+ static_cast<wYangModel*>(spVec[spVec.size()-1].getPijAccelerator()->getReplacementModel())->setW(omegaFound);
+ normalizeMatrices(spVec,distr);
+ changed = true;
+ }
+ }
+ MDOUBLE betaProbFound;
+ newL = -brent(0.0,initialGuessValueOfParamBetaProb,1.0,
+ evalParam(et,sc,spVec,2,distr,isGamma),0.01,&betaProbFound);
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After prob(additional omega caetgory)= " << newL<<endl);
+ LOG(5,<<"new prob(additional omega caetgory)= " <<1 - betaProbFound<<endl<<endl);
+ if (newL > _bestL+epsilonLikelihoodImprovment ) {// update of likelihood ,v and model.
+ _bestL = newL;
+ _bestBetaProb = betaProbFound;
+ static_cast<betaOmegaDistribution*>(distr)->setBetaProb(betaProbFound);
+ normalizeMatrices(spVec,distr);
+ changed = true;
+ }
+ }
+
+//BBL
+ if (bblFlag==true) {
+//using epsilonAlphaOptimization as the epsilon for pairwise disatnce here
+ bblEM2codon bbl(et,sc,spVec,distr,NULL,maxBBLIterations,epsilonLikelihoodImprovment,epsilonAlphaOptimization);
+ newL = bbl.getTreeLikelihood();
+
+ LOG(5,<<"current best L= "<<_bestL<<endl<<endl);
+ LOG(5,<<"new L After BL = " << newL<<endl);
+ LOG(5,<<"Tree after this BBL iteration: "<<endl);
+ LOGDO(5,et.output(myLog::LogFile()));
+
+ if (newL > _bestL+epsilonLikelihoodImprovment) {
+ _bestL = newL;
+ changed = true;
+ }
+ }
+
+ if (changed==false)
+ break;
+
+ }
+
+ LOG(5,<<endl<<"Finished optimization of parameters"<<endl<<endl);
+
+ if (i==maxTotalIterations) {
+ LOG(5,<<"Too many iterations in function optimizeCodonModelAndBBL. The last optimized parameters are used for the calculations."<<endl<<endl);
+
+ }
+
+}
+
+evalParam::~evalParam(){
+ if (_distr != NULL) delete _distr;
+}
+
+
+evalParam::evalParam(const evalParam &other): _et(other._et),_sc(other._sc),
+_spVec(other._spVec), _alphaOrKs(other._alphaOrKs),_isGamma(other._isGamma)
+{
+ _distr=other._distr->clone();
+}
+
+
+MDOUBLE evalParam::operator()(MDOUBLE param){
+
+ if (_alphaOrKs==-1) updateAlpha(param);
+ else if (_alphaOrKs==-2) updateBeta(param);
+ else if (_alphaOrKs==0) updateK(param);
+ else if (_alphaOrKs==1) updateOmega(param);
+ else if (_alphaOrKs==2) updateBetaProb(param);
+ MDOUBLE res = likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_spVec,_distr);
+ return -res; //return -log(likelihood).
+}
+
+void evalParam::updateBeta(MDOUBLE param){
+ if (_isGamma) static_cast<generalGammaDistribution*>(_distr)->setBeta(param);
+ else static_cast<betaOmegaDistribution*>(_distr)->setBeta(param);
+ for (int categor = 0; categor < _spVec.size();categor++){
+ static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor));
+
+ }
+ normalizeMatrices(_spVec,_distr);
+}
+void evalParam::updateAlpha(MDOUBLE param){
+ if (_isGamma)static_cast<generalGammaDistribution*>(_distr)->setAlpha(param);
+ else static_cast<betaOmegaDistribution*>(_distr)->setAlpha(param);
+ for (int categor = 0; categor < _spVec.size();categor++){
+ static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setW(_distr->rates(categor));
+
+ }
+ normalizeMatrices(_spVec,_distr);
+}
+
+void evalParam::updateK(MDOUBLE param){
+ for (int categor = 0; categor < _spVec.size();categor++){
+ static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->setK(param);
+ }
+ normalizeMatrices(_spVec,_distr);
+}
+
+
+void evalParam::updateOmega(MDOUBLE param){
+ int size = _spVec.size();
+ static_cast<wYangModel*>(_spVec[size-1].getPijAccelerator()->getReplacementModel())->setW(param);
+ normalizeMatrices(_spVec,_distr);
+}
+
+void evalParam::updateBetaProb(MDOUBLE param){
+ static_cast<betaOmegaDistribution*>(_distr)->setBetaProb(param);
+ normalizeMatrices(_spVec,_distr);
+}
diff --git a/libs/phylogeny/bestAlphaAndK.h b/libs/phylogeny/bestAlphaAndK.h
new file mode 100644
index 0000000..86534d5
--- /dev/null
+++ b/libs/phylogeny/bestAlphaAndK.h
@@ -0,0 +1,84 @@
+#ifndef ___BEST_ALPHA_AND_K
+#define ___BEST_ALPHA_AND_K
+
+#include "definitions.h"
+#include "tree.h"
+#include "likelihoodComputation.h"
+#include "likelihoodComputation2Codon.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "generalGammaDistribution.h"
+#include "logFile.h"
+#include "wYangModel.h"
+#include "bblEM2codon.h"
+#include "computeUpAlg.h"
+#include "numRec.h"
+
+
+
+//evaluate best parameters
+class optimizeSelectonParameters {
+public:
+ explicit optimizeSelectonParameters(tree& et,
+ const sequenceContainer& sc,
+ vector<stochasticProcess>& spVec,
+ distribution * distr,
+ bool bblFlag = true,
+ bool isGamma = true, bool isBetaProbSet=false,bool isOmegaSet = false,
+ bool isKappaSet=false, bool isAlphaSet=false, bool isBetaSet=false,
+ const MDOUBLE upperBoundOnAlpha = 3.0, // changed from 20, Adi S. 2/7/07
+ const MDOUBLE upperBoundOnBeta = 3.0, // changed from 20, Adi S. 2/7/07
+ const MDOUBLE epsilonAlphaOptimization= 0.01,
+ const MDOUBLE epsilonKOptimization=0.01,
+ const MDOUBLE epsilonLikelihoodImprovment= 0.1,
+ const int maxBBLIterations=20,
+ const int maxTotalIterations=20);
+ const MDOUBLE getBestAlpha() const{return _bestAlpha;}
+ const MDOUBLE getBestBeta() const{return _bestBeta;}
+ const MDOUBLE getBestL() const {return _bestL;}
+ const MDOUBLE getBestK() const {return _bestK;}
+ const MDOUBLE getBestOmega() const {return _bestOmega;}
+ const MDOUBLE getBestBetaProb() const {return _bestBetaProb;}
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+ MDOUBLE _bestK;
+ MDOUBLE _bestBeta;
+ MDOUBLE _bestOmega;
+ MDOUBLE _bestBetaProb;
+};
+
+
+//The functor to eval likelihood given a change in a parameters
+class evalParam{
+public:
+ explicit evalParam(const tree& et,
+ const sequenceContainer& sc,
+ vector<stochasticProcess> spVec,
+ int alphaOrKs,
+ const distribution * in_distr,
+ bool isGamma)
+ : _et(et),_sc(sc),_spVec(spVec),_alphaOrKs(alphaOrKs),_isGamma(isGamma){_distr=in_distr->clone();};
+ MDOUBLE operator()(MDOUBLE param);
+
+ virtual ~evalParam();
+ evalParam(const evalParam &other);
+ void updateAlpha(MDOUBLE param);
+ void updateK(MDOUBLE param);
+ void updateBeta(MDOUBLE param);
+ void updateOmega(MDOUBLE param);
+ void updateBetaProb(MDOUBLE param);
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+
+ vector<stochasticProcess> _spVec;
+ int _alphaOrKs; //flag to eval different parameters (alpha,beta or ks)
+ distribution *_distr;
+ bool _isGamma; //gamma = true/ beta=false
+
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/bestAlphaAndNu.cpp b/libs/phylogeny/bestAlphaAndNu.cpp
new file mode 100644
index 0000000..3292c67
--- /dev/null
+++ b/libs/phylogeny/bestAlphaAndNu.cpp
@@ -0,0 +1,177 @@
+// $Id: bestAlphaAndNu.cpp 1975 2007-04-22 13:47:28Z privmane $
+#include <iostream>
+using namespace std;
+
+#include "bestAlphaAndNu.h"
+
+// ******************
+// * USSRV *
+// ******************
+
+MDOUBLE bestFFixedTreeUSSRV::operator()(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnF,
+ const MDOUBLE epsilonFOptimization){
+
+ MDOUBLE bestF=0;
+ const MDOUBLE cx=upperBoundOnF;// left, middle, right limit on alpha
+ const MDOUBLE bx=model.getF();
+ const MDOUBLE ax=0.0;
+ LOG(5,<<"**** Optimizing F **** " << endl<< "bestFFixedTreeSSRV::operator() bx is :" << bx << endl);
+ LOG(9,<<"ax is :" << ax << " cx is :" << cx << endl);
+ _bestL = -brent(ax,bx,cx,
+ C_evalFUSSRV(et,sc,baseSc,&model,weights),
+ epsilonFOptimization,
+ &bestF);
+ setF(bestF,model);
+ _bestF= bestF;
+ return _bestL;
+}
+
+MDOUBLE bestAlphaFixedTreeUSSRV::operator()(const tree& et, //findBestAlphaFixedTree
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnAlpha,
+ const MDOUBLE epsilonAlphaOptimization){
+
+ MDOUBLE bestA=0;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, middle, right limit on alpha
+ const MDOUBLE bx=model.getAlpha();
+ const MDOUBLE ax=0.0;
+ LOG(5,<<"**** Optimizing Alpha **** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl);
+ _bestL = -brent(ax,bx,cx,
+ C_evalAlphaUSSRV(et,sc,baseSc,&model,weights),
+ epsilonAlphaOptimization,
+ &bestA);
+ setAlpha(bestA,model);
+ _bestAlpha= bestA;
+ return _bestL;
+}
+
+// Alpha is fixed
+MDOUBLE bestNuFixedTreeUSSRV::operator()(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnNu,
+ const MDOUBLE epsilonNuOptimization){
+
+
+ MDOUBLE bestN=0;
+ // define the Nu bounds
+ const MDOUBLE cx=upperBoundOnNu;// left, midle, right limit on alpha
+ const MDOUBLE bx= model.getNu();
+ const MDOUBLE ax=0.0;
+ LOG(5,<<"**** Optimizing Nu **** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl);
+ _bestL = -brent(ax,bx,cx, C_evalNuUSSRV(et,sc,baseSc,&model,weights), epsilonNuOptimization, &bestN);
+ setNu(bestN,model);
+ _bestNu= bestN;
+ return _bestL;
+}
+
+
+// ******************
+// * SSRV *
+// ******************
+
+MDOUBLE bestAlphaFixedTreeSSRV::operator()(const tree& et, //findBestAlphaFixedTree
+ const sequenceContainer& sc, stochasticProcessSSRV& ssrvSp, const Vdouble * weights,
+ const MDOUBLE lowerBoundOnAlpha, const MDOUBLE upperBoundOnAlpha, const MDOUBLE epsilonAlphaOptimization){
+
+ MDOUBLE bestA=0;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
+ const MDOUBLE bx=gammaDist->getAlpha();
+ const MDOUBLE ax=lowerBoundOnAlpha;
+ LOG(5,<<"**** Optimizing Alpha **** " << endl<< "bestAlphaFixedTreeSSRV::operator() bx is :" << bx << endl);
+ _bestL = -brent(ax,bx,cx,
+ C_evalAlphaSSRV(et,sc,ssrvSp,weights), epsilonAlphaOptimization, &bestA);
+
+ setAlpha(bestA,ssrvSp);
+ _bestAlpha= bestA;
+ return _bestL;
+}
+
+// Alpha is fixed
+MDOUBLE bestNuFixedTreeSSRV::operator()(const tree& et, const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp, const Vdouble * weights, const MDOUBLE lowerBoundOnNu, const MDOUBLE upperBoundOnNu,
+ const MDOUBLE epsilonNuOptimization) {
+
+ MDOUBLE bestN=0;
+ // define the Nu bounds
+ const MDOUBLE cx=upperBoundOnNu;// left, middle, right limit on alpha
+ const MDOUBLE bx= static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->getRateOfRate();
+ const MDOUBLE ax=lowerBoundOnNu;
+ LOG(5,<<"**** Optimizing Nu **** " << endl << "bestNuFixedTreeSSRV::operator() bx is : " << bx << endl);
+ _bestL = -brent(ax,bx,cx, C_evalNuSSRV(et,sc,ssrvSp,weights), epsilonNuOptimization, &bestN);
+
+ setNu(bestN,ssrvSp);
+ _bestNu= bestN;
+ return _bestL;
+}
+
+
+MDOUBLE bestTamura92ParamFixedTreeSSRV::operator()(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights/*= NULL */,
+ const int maxTotalIterations /* = 5 */,
+ const MDOUBLE epsilonLikelihoodImprovment /* = 0.05 */,
+ const MDOUBLE lowerBoundOnTrTv /* = 0.0 */,
+ const MDOUBLE upperBoundOnTrTv /* = 10.0 */,
+ const MDOUBLE lowerBoundOnTheta /* = 0.0 */,
+ const MDOUBLE upperBoundOnTheta /* = 1.0 */,
+ const MDOUBLE epsilonTrTvOptimization /* = 0.01 */,
+ const MDOUBLE epsilonThetaOptimization /* = 0.01 */){
+
+ LOG(5,<<"Starting bestTamura92ParamFixedTreeSSRV::operator() : find Best TrTv and theta"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ // first guess for the parameters
+ MDOUBLE prevTrTv = static_cast<tamura92*>(static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTrTv();
+ MDOUBLE prevTheta = static_cast<tamura92*>(static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->getBaseRM())->getTheta();
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+ // optimize TrTv
+ newL = -brent(lowerBoundOnTrTv, prevTrTv, upperBoundOnTrTv,
+ C_evalTrTvSSRV(et,sc,ssrvSp,weights),
+ epsilonTrTvOptimization,
+ &_bestTrTv);
+ setTrTv(_bestTrTv,ssrvSp);
+
+ // optimize Theta
+ newL = -brent(lowerBoundOnTheta, prevTheta, upperBoundOnTheta,
+ C_evalThetaSSRV(et,sc,ssrvSp,weights),
+ epsilonThetaOptimization,
+ &_bestTheta);
+ setTheta(_bestTheta,ssrvSp);
+
+ // check for improvement in the likelihood
+ if (newL > oldL+epsilonLikelihoodImprovment) {
+ prevTrTv = _bestTrTv;
+ prevTheta = _bestTheta;
+ oldL = newL;
+ _bestL = newL;
+ } else {
+ if (newL>oldL) {
+ _bestL = newL;
+ } else {
+ LOG(5,<<"bestTamura92ParamFixedTreeSSRV::operator() likelihood went down!"<<endl<<"oldL = "<< oldL <<" newL= "<<newL<<endl);
+ _bestL = oldL;
+ _bestTrTv = prevTrTv;
+ _bestTheta = prevTheta;
+ setTrTvAndTheta(prevTrTv,prevTheta,ssrvSp);
+ }
+ break;
+ }
+ }
+ return _bestL;
+}
diff --git a/libs/phylogeny/bestAlphaAndNu.h b/libs/phylogeny/bestAlphaAndNu.h
new file mode 100644
index 0000000..6369ad1
--- /dev/null
+++ b/libs/phylogeny/bestAlphaAndNu.h
@@ -0,0 +1,215 @@
+// $Id: bestAlphaAndNu.h 1975 2007-04-22 13:47:28Z privmane $
+#ifndef ___BEST_ALPHA_AND_NU
+#define ___BEST_ALPHA_AND_NU
+
+#include "definitions.h"
+
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "replacementModelSSRV.h"
+#include "tamura92.h"
+#include "stochasticProcessSSRV.h"
+#include "C_evalParamUSSRV.h"
+#include "bestAlpha.h"
+#include "numRec.h"
+#include "bblEM.h"
+#include "logFile.h"
+
+// ******************
+// * USSRV *
+// ******************
+
+// Nu is fixed. The tree is fixed
+class bestAlphaFixedTreeUSSRV {
+public:
+ explicit bestAlphaFixedTreeUSSRV() {}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnAlpha = 15,
+ const MDOUBLE epsilonAlphaOptimization = 0.01);
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+
+ void setAlpha(MDOUBLE alpha, ussrvModel& model) const
+ {
+ model.updateAlpha(alpha);
+ }
+
+ void setBestL(MDOUBLE bestL) { _bestL = bestL;}
+
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+// Alpha is fixed
+class bestNuFixedTreeUSSRV {
+public:
+ explicit bestNuFixedTreeUSSRV(){}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnNu = 15,
+ const MDOUBLE epsilonNuOptimization = 0.01);
+ MDOUBLE getBestNu() {return _bestNu;}
+ MDOUBLE getBestL() {return _bestL;}
+ void setNu(MDOUBLE nu, ussrvModel& model) const
+ {
+ model.updateNu(nu);
+ }
+ void setBestL(MDOUBLE bestL) { _bestL = bestL;}
+
+private:
+ MDOUBLE _bestNu;
+ MDOUBLE _bestL;
+};
+
+class bestFFixedTreeUSSRV {
+public:
+ explicit bestFFixedTreeUSSRV() {}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnF = 1,
+ const MDOUBLE epsilonFOptimization = 0.01);
+ MDOUBLE getBestF() {return _bestF;}
+ MDOUBLE getBestL() {return _bestL;}
+ void setF(MDOUBLE f, ussrvModel& model) const
+ {
+ if ( (f>1) || (f < 0))
+ {
+ LOG(5,<<"bestFFixedTreeSSRV:setF, f must be between 0 to 1. f = " << f << endl);
+ return;
+ }
+ model.updateF(f);
+ }
+ void setBestL(MDOUBLE bestL) { _bestL = bestL;}
+
+private:
+ MDOUBLE _bestF;
+ MDOUBLE _bestL;
+};
+
+
+// ******************
+// * SSRV *
+// ******************
+
+// Nu is fixed. The tree is fixed
+class bestAlphaFixedTreeSSRV {
+public:
+ explicit bestAlphaFixedTreeSSRV() {}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE lowerBoundOnAlpha = 0,
+ const MDOUBLE upperBoundOnAlpha = 10,
+ const MDOUBLE epsilonAlphaOptimization = 0.01);
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+
+ void setAlpha(MDOUBLE alpha, stochasticProcessSSRV& ssrvSp) const
+ {
+ if (alpha<0)
+ errorMsg::reportError("bestAlphaFixedTreeSSRV::setAlpha, alpha is < 0 ");
+
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
+ gammaDist->setAlpha(alpha);
+ pMulRM->updateQ();
+ }
+
+ void setBestL(MDOUBLE bestL) { _bestL = bestL;}
+
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+// Alpha is fixed
+class bestNuFixedTreeSSRV {
+public:
+ explicit bestNuFixedTreeSSRV(){}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE lowerBoundOnNu = 0,
+ const MDOUBLE upperBoundOnNu = 15,
+ const MDOUBLE epsilonNuOptimization = 0.01);
+ MDOUBLE getBestNu() {return _bestNu;}
+ MDOUBLE getBestL() {return _bestL;}
+ void setNu(MDOUBLE nu, stochasticProcessSSRV& ssrvSp) const
+ {
+ if (nu<0)
+ errorMsg::reportError("ussrvModel::updateNu , nu is < 0");
+
+ static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel())->setRateOfRate(nu);
+ }
+
+ void setBestL(MDOUBLE bestL) { _bestL = bestL;}
+
+private:
+ MDOUBLE _bestNu;
+ MDOUBLE _bestL;
+};
+
+
+class bestTamura92ParamFixedTreeSSRV {
+public:
+ explicit bestTamura92ParamFixedTreeSSRV(){}
+ MDOUBLE operator()(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights=NULL,
+ const int maxTotalIterations = 5,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.05,
+ const MDOUBLE lowerBoundOnTrTv = 0.0,
+ const MDOUBLE upperBoundOnTrTv = 10.0,
+ const MDOUBLE lowerBoundOnTheta = 0.0,
+ const MDOUBLE upperBoundOnTheta = 1.0,
+ const MDOUBLE epsilonTrTvOptimization = 0.01,
+ const MDOUBLE epsilonThetaOptimization = 0.01);
+ MDOUBLE getBestTrTv() {return _bestTrTv;}
+ MDOUBLE getBestTheta() {return _bestTheta;}
+ MDOUBLE getBestL() {return _bestL;}
+ void setTrTv(MDOUBLE TrTv, stochasticProcessSSRV& ssrvSp) const {
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ static_cast<tamura92*>(pMulRM->getBaseRM())->changeTrTv(TrTv);
+ pMulRM->updateQ();
+ }
+
+ void setTheta(MDOUBLE theta, stochasticProcessSSRV& ssrvSp) const {
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ static_cast<tamura92*>(pMulRM->getBaseRM())->changeTheta(theta);
+ pMulRM->updateFreq();
+ pMulRM->updateQ();
+ }
+
+ void setTrTvAndTheta(MDOUBLE TrTv, MDOUBLE theta, stochasticProcessSSRV& ssrvSp) {
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ tamura92* tamuraRM = static_cast<tamura92*>(pMulRM->getBaseRM());
+ tamuraRM->changeTrTv(TrTv);
+ tamuraRM->changeTheta(theta);
+ pMulRM->updateFreq();
+ pMulRM->updateQ();
+ }
+
+private:
+ MDOUBLE _bestTrTv;
+ MDOUBLE _bestTheta;
+ MDOUBLE _bestL;
+};
+
+
+#endif // ___BEST_ALPHA_AND_NU
diff --git a/libs/phylogeny/bestAlphaManyTrees.cpp b/libs/phylogeny/bestAlphaManyTrees.cpp
new file mode 100644
index 0000000..1aa771d
--- /dev/null
+++ b/libs/phylogeny/bestAlphaManyTrees.cpp
@@ -0,0 +1,270 @@
+// $Id: bestAlphaManyTrees.cpp 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#include "bestAlphaManyTrees.h"
+#include "bestAlpha.h"
+#include "numRec.h"
+#include "bblEMProportional.h"
+#include "bblEMSeperate.h"
+#include "logFile.h"
+#include <iostream>
+using namespace std;
+
+#ifndef VERBOS
+#define VERBOS
+#endif
+
+
+void bestAlpha::optimizeAlphaNG_EM_PROP(tree& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ MDOUBLE & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations,
+ const MDOUBLE epsilon){
+
+ //LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
+ bx=1.5; // the limits are becoming more narrow with time.
+ ax=0;
+ cx=5.0;
+ MDOUBLE tol=0.01f;
+ MDOUBLE bestA=0;
+ int i;
+ const int maxIterationsThisF = 50;
+ for (i=0; i < maxIterationsThisF; ++i) {
+
+ bblEMProportional bblEMprop1(et,sc,sp,weights,maxIterations,epsilon);
+ MDOUBLE newL = bblEMprop1.getTreeLikelihood();
+
+#ifdef VERBOS
+ LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
+#endif
+
+ MDOUBLE likeAfterAlphaOpt = -brent(ax,bx,cx, // NEW MINUS. CHECK
+ C_evalAlphaManyTrees(et,sc,sp,weights),
+ tol,
+ &bestA); // THIS FUNCTION CHANGE SP, BUT YET ONE HAVE TO INSERT THE BEST ALPHAS.
+ for (int z=0; z < sp.size();++z) {
+ (static_cast<gammaDistribution*>(sp[z].distr()))->setAlpha(bestA);
+ }
+
+#ifdef VERBOS
+ LOG(5,<<"After optimizing alpha, L = "<<likeAfterAlphaOpt<<endl);
+ LOG(5,<<" best A = " << bestA<<endl);
+#endif
+ newL = likeAfterAlphaOpt;
+
+
+
+ if (newL > oldL+0.01) {
+ oldL = newL;
+ }
+ else {
+ if (newL > oldL) {
+ likelihoodScore = newL;
+ bestAlpha= bestA;
+ return;
+ }
+ else {
+ likelihoodScore = oldL;
+ bestAlpha= bestA;
+ return;
+ }
+ }
+ }
+ if (i == maxIterationsThisF) errorMsg::reportError(" to many iteration in function optimizeBranchLength");
+}
+
+/*
+void findBestAlphaManyTrees::findBestAlphaFixedManyTrees(const vector<tree>& et,
+ vector<positionInfo>& pi,
+ const VVdouble * weights) {
+ //LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
+ MDOUBLE bestA=0;
+ checkAllocation();
+ MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
+ MDOUBLE tol;
+ ax=0;bx=1.5;cx=2;
+ tol=0.01f;
+ _bestL = brent(ax,bx,cx,
+ C_evalAlphaManyTrees(et,_pi,weights),
+ tol,
+ &bestA);
+ _bestAlpha= bestA;
+}
+
+*/
+
+void bestAlpha::optimizeAlphaNG_EM_SEP(
+ vector<tree>& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ MDOUBLE & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations,
+ const MDOUBLE epsilon) {
+ // SEPERATE ANALYSIS, 1 GAMMA
+ //LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
+ bx=1.5; // the limits are becoming more narrow with time.
+ ax=0;
+ cx=5.0;
+ MDOUBLE tol=0.01f;
+ MDOUBLE bestA=0;
+ const int maxIterationsThisF = 50;
+ for (int i=0; i < maxIterationsThisF; ++i) {
+ newL=0;
+ LOG(3,<<"starting iteration "<<i<<endl);
+ bblEMSeperate bblEMsep1(et,
+ sc,
+ sp,
+ weights,
+ maxIterations,
+ epsilon);
+ newL =bblEMsep1.getTreeLikelihood();
+#ifdef VERBOS
+ LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
+#endif
+ //MDOUBLE alphaB4optimizing = (static_cast<gammaDistribution*>(sp[0].distr()))->getAlpha();
+ MDOUBLE likeAfterAlphaOpt = -brent(ax,bx,cx, // NEW MINUS - CHECK!
+ C_evalAlphaManyTreesSep(et,sc,sp,weights),
+ tol,
+ &bestA);
+
+ if (likeAfterAlphaOpt>newL) {
+ for (int i=0; i < sc.size();++i) {
+ (static_cast<gammaDistribution*>(sp[0].distr()))->setAlpha(bestA);
+ }
+ newL = likeAfterAlphaOpt;
+ }
+#ifdef VERBOS
+ LOG(5,<<"After optimizing alpha, L = "<<newL<<endl);
+#endif
+ if (newL > oldL+0.01) {
+ oldL = newL;
+ }
+ else {
+ if (newL > oldL) {
+ likelihoodScore = newL;
+ bestAlpha= bestA;
+ return;
+ }
+ else {
+ likelihoodScore = oldL;
+ bestAlpha= bestA;
+ return;
+ }
+ }
+ }
+ errorMsg::reportError(" to many iteration in function optimizeBranchLength");
+}
+
+//==================== optimizing n alphas ==============================
+
+void bestAlpha::optimizeAlphaNG_EM_PROP_n_alpha(tree& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<MDOUBLE> & bestAlphas,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations,
+ const MDOUBLE epsilon){
+
+ //LOG(5,<<" 1. bestAlpha::findBestAlpha"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ MDOUBLE ax,bx,cx; // left, midle, right limit on alpha
+ bx=1.5; // the limits are becoming more narrow with time.
+ ax=0;
+ cx=5.0;
+ vector<MDOUBLE> bestAs= bestAlphas;
+ vector<MDOUBLE> newAlphas(sc.size(),0);
+ int i;
+ const int maxIterationsThisF = 50;
+ for (i=0; i < maxIterationsThisF; ++i) {
+#ifdef VERBOS
+ LOG(5,<<" ============================ optimizing bbl (fixed alphas) ================= \n");
+#endif
+ newL=0;
+ bblEMProportional bblem1(et,sc,sp,weights,maxIterations,epsilon);
+ MDOUBLE tmpX =bblem1.getTreeLikelihood();
+
+#ifdef VERBOS
+ LOG(5,<<"likelihood of trees (sum)= "<<tmpX<<endl);
+#endif
+ newL =tmpX;
+#ifdef VERBOS
+ LOG(5,<<"Before optimizing alpha, L = "<<newL<<endl);
+ LOG(5,<<" ============================ optimizing alphas ================= \n");
+#endif
+ const MDOUBLE upperBoundOnAlpha = 5;
+ MDOUBLE likeAfterAlphaOpt = 0;
+ for (int treeNumber =0; treeNumber<sc.size();++treeNumber) {
+ bestAlphaFixedTree bestAlphaFixedTree1(et,
+ sc[treeNumber],
+ sp[treeNumber],
+ weights?(*weights)[treeNumber]:NULL,
+ upperBoundOnAlpha,
+ epsilon);
+ MDOUBLE tmpX = bestAlphaFixedTree1.getBestL();
+#ifdef VERBOS
+ LOG(5,<<"likelihood of tree "<<treeNumber<<" = "<<tmpX<<endl);
+#endif
+ newAlphas[treeNumber] = bestAlphaFixedTree1.getBestAlpha();
+#ifdef VERBOS
+ LOG(5,<<" best alpha tree number: "<<treeNumber<<" = "<<newAlphas[treeNumber]<<endl);
+#endif
+ likeAfterAlphaOpt +=tmpX;
+ }
+
+
+ if (likeAfterAlphaOpt>newL) {
+ for (int z=0; z < sp.size();++z) {
+ (static_cast<gammaDistribution*>(sp[z].distr()))->setAlpha(newAlphas[z]);
+ }
+ newL = likeAfterAlphaOpt;
+ bestAs = newAlphas;
+ }
+
+ #ifdef VERBOS
+ LOG(5,<<"After optimizing alpha, L = "<<newL<<endl);
+ #endif
+
+ if (newL > oldL+0.01) {
+ oldL = newL;
+ }
+ else {
+ if (newL > oldL) {
+ likelihoodScore = newL;
+ bestAlphas= bestAs;
+ return;
+ }
+ else {
+ likelihoodScore = oldL;
+ bestAlphas= bestAs;
+ return;
+ }
+ }
+ }
+ if (i == maxIterationsThisF) {
+ errorMsg::reportError(" to many iteration in function optimizeBranchLength");
+ }
+}
+
+ //// CHECK:
+ //MDOUBLE check_sum=0;
+ //for (int k=0; k < sp.size(); ++k) {
+ // MDOUBLE check = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et,sc[k],sp[k]);
+ // LOG(5,<<" CHECK = "<< check<<endl);
+ // check_sum+=check;
+ //}
+ //LOG(5,<<" check-sum = "<<check_sum<<endl);
+ //// END CHECK
diff --git a/libs/phylogeny/bestAlphaManyTrees.h b/libs/phylogeny/bestAlphaManyTrees.h
new file mode 100644
index 0000000..050645c
--- /dev/null
+++ b/libs/phylogeny/bestAlphaManyTrees.h
@@ -0,0 +1,127 @@
+// $Id: bestAlphaManyTrees.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___BEST_ALPHA_MANY_TREES
+#define ___BEST_ALPHA_MANY_TREES
+
+#include "definitions.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "bblEM.h"
+#include "gammaDistribution.h"
+#include "likelihoodComputation.h"
+#include "logFile.h"
+
+using namespace likelihoodComputation;
+
+//#define VERBOS
+namespace bestAlpha {
+/* void optimizeAlpha1G_EM( tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * weights,
+ MDOUBLE & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+*/
+ void optimizeAlphaNG_EM_SEP(vector<tree>& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess> &sp,
+ const vector<Vdouble *> * weights,
+ MDOUBLE & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+ void optimizeAlphaNG_EM_PROP(tree& et,// 1 alpha for all trees!
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ MDOUBLE & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+ void optimizeAlphaNG_EM_PROP_n_alpha(tree& et,// alpha for each trees!
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights,
+ vector<MDOUBLE> & bestAlpha,
+ MDOUBLE & likelihoodScore,
+ const int maxIterations=1000,
+ const MDOUBLE epsilon=0.05);
+};
+
+#include <iostream>// for debugging
+using namespace std; // for debugging
+
+class C_evalAlphaManyTrees{
+public:
+ C_evalAlphaManyTrees(tree& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights)
+ : _et(et),_sc(sc),_sp(sp),_weights(weights) {};
+private:
+ const tree& _et;
+ const vector<sequenceContainer>& _sc;
+ vector<stochasticProcess>& _sp;
+ const vector<Vdouble *> * _weights;
+public:
+ MDOUBLE operator() (MDOUBLE alpha) {
+ #ifdef VERBOS
+ LOG(5,<<"trying alpha: "<<alpha<<endl);
+ #endif
+ MDOUBLE res=0;
+ for (int i=0; i < _sc.size();++i) {
+
+ if (_sp[i].categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize alpha");
+ }
+ (static_cast<gammaDistribution*>(_sp[i].distr()))->setAlpha(alpha);
+ res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc[i],_sp[i],_weights?(*_weights)[i]:NULL);
+ }
+ #ifdef VERBOS
+ LOG(5,<<"likelihood = "<<-res<<endl);
+ #endif
+ return -res;
+ }
+};
+
+class C_evalAlphaManyTreesSep{ // separate model, 1 gamma
+public:
+ C_evalAlphaManyTreesSep(vector<tree>& et,
+ vector<sequenceContainer>& sc,
+ vector<stochasticProcess>& sp,
+ const vector<Vdouble *> * weights)
+ : _et(et),_sc(sc),_sp(sp),_weights(weights) {};
+private:
+ const vector<tree>& _et;
+ const vector<sequenceContainer>& _sc;
+ vector<stochasticProcess>& _sp;
+ const vector<Vdouble *> * _weights;
+public:
+ MDOUBLE operator() (MDOUBLE alpha) {
+ //LOG(5,<<"trying alpha: "<<alpha<<endl);
+ MDOUBLE res=0;
+ for (int i=0; i < _sc.size();++i) {
+
+ if (_sp[i].categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize alpha");
+ }
+ (static_cast<gammaDistribution*>(_sp[i].distr()))->setAlpha(alpha);
+ res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et[i],_sc[i],_sp[i],_weights?(*_weights)[i]:NULL);
+ }
+// LOG(5,<<" with alpha = "<<alpha<<" logL = "<<res<<endl);
+ return -res;
+ }
+};
+
+
+
+
+
+
+
+
+#endif
+
+
diff --git a/libs/phylogeny/bestGtrModelParams.cpp b/libs/phylogeny/bestGtrModelParams.cpp
new file mode 100644
index 0000000..18aa35d
--- /dev/null
+++ b/libs/phylogeny/bestGtrModelParams.cpp
@@ -0,0 +1,174 @@
+// $Id: bestGtrModelparams.cpp 2008-29-04 10:57:00Z nimrod $
+
+#include "bestGtrModelParams.h"
+#include <iostream>
+using namespace std;
+
+#include "bblEM.h"
+#include "numRec.h"
+#include "logFile.h"
+#include "bestAlpha.h"
+
+bestGtrModel::bestGtrModel(tree& et, // find best Gtr Model Params
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const MDOUBLE epsilonLoglikelihoodForGTRParam,
+ const MDOUBLE upperBoundGTRParam,
+ const bool optimizeTree,
+ const bool optimizeAlpha){
+ LOG(5,<<"Starting bestGtrModel: find Best replacement matrix parameters"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+ _bestL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et,sc,sp,weights);
+
+ MDOUBLE prev_a2c = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2c();
+ MDOUBLE prev_a2g = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2g();
+ MDOUBLE prev_a2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_a2t();
+ MDOUBLE prev_c2g = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_c2g();
+ MDOUBLE prev_c2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_c2t();
+ MDOUBLE prev_g2t = (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->get_g2t();
+
+ MDOUBLE prevAlpha = epsilonLoglikeForBBL;
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+ //optimize a2c
+ newL = -brent(0.0, prev_a2c, upperBoundGTRParam,
+ C_evalGTRParam(a2c,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_a2c);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(_best_a2c);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2c(prev_a2c);
+ LOG(5,<<"likelihood went down in optimizing a2c"<<endl<<"oldL = "<<_bestL);
+ }
+
+ //optimize a2t
+ newL = -brent(0.0, prev_a2t, upperBoundGTRParam,
+ C_evalGTRParam(a2t,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_a2t);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(_best_a2t);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2t(prev_a2t);
+ LOG(5,<<"likelihood went down in optimizing a2t"<<endl<<"oldL = "<<_bestL);
+ }
+
+ //optimize a2g
+ newL = -brent(0.0, prev_a2g, upperBoundGTRParam,
+ C_evalGTRParam(a2g,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_a2g);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(_best_a2g);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_a2g(prev_a2g);
+ LOG(5,<<"likelihood went down in optimizing a2g"<<endl<<"oldL = "<<_bestL);
+ }
+
+ //optimize c2g
+ newL = -brent(0.0, prev_c2g, upperBoundGTRParam,
+ C_evalGTRParam(c2g,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_c2g);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(_best_c2g);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2g(prev_c2g);
+ LOG(5,<<"likelihood went down in optimizing c2g"<<endl<<"oldL = "<<_bestL);
+ }
+
+ //optimize c2t
+ newL = -brent(0.0, prev_c2t, upperBoundGTRParam,
+ C_evalGTRParam(c2t,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_c2t);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(_best_c2t);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_c2t(prev_c2t);
+ LOG(5,<<"likelihood went down in optimizing c2t"<<endl<<"oldL = "<<_bestL);
+ }
+
+ //optimize g2t
+ newL = -brent(0.0, prev_g2t, upperBoundGTRParam,
+ C_evalGTRParam(g2t,et,sc,sp,weights),
+ epsilonLoglikelihoodForGTRParam,
+ &_best_g2t);
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(_best_g2t);//safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gtrModel*>(sp.getPijAccelerator()->getReplacementModel()))->set_g2t(prev_g2t);
+ LOG(5,<<"likelihood went down in optimizing g2t"<<endl<<"oldL = "<<_bestL);
+ }
+
+ if(optimizeAlpha)
+ {
+ newL = -brent(0.0, prevAlpha, upperBoundForAlpha,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonLoglikeForAlphaOptimization,
+ &_bestAlpha);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
+
+ if (newL >= _bestL)
+ {
+ _bestL = newL;
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha); //safety
+ }
+ else
+ {//likelihood went down!
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(prevAlpha);
+ LOG(5,<<"likelihood went down in optimizing alpha"<<endl<<"oldL = "<<_bestL);
+ }
+ }
+
+ if(optimizeTree)
+ {
+ bblEM bblEM1(et,sc,sp,weights,maxBBLIt,epsilonLoglikeForBBL);
+ _bestL = bblEM1.getTreeLikelihood();
+ }
+
+
+ // check for improvement in the likelihood
+ if (_bestL > oldL+epsilonLikelihoodImprovment) {
+ oldL = _bestL;
+ prev_a2c = _best_a2c;
+ prev_a2g = _best_a2g;
+ prev_a2t = _best_a2t;
+ prev_c2g = _best_c2g;
+ prev_c2t = _best_c2t;
+ prev_g2t = _best_g2t;
+ prevAlpha = _bestAlpha;
+ } else {
+ break;
+ }
+ }
+}
diff --git a/libs/phylogeny/bestGtrModelParams.h b/libs/phylogeny/bestGtrModelParams.h
new file mode 100644
index 0000000..0eb4713
--- /dev/null
+++ b/libs/phylogeny/bestGtrModelParams.h
@@ -0,0 +1,111 @@
+// $Id: bestGtrModelparams.h 2008-28-04 15:13:34Z nimrod $
+
+#ifndef ___BEST_GTRMODEL_PARAMS
+#define ___BEST_GTRMODEL_PARAMS
+
+#include "definitions.h"
+
+#include "likelihoodComputation.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "generalGammaDistribution.h"
+#include "tree.h"
+#include "gtrModel.h"
+
+typedef enum
+ {
+ Invalid = 0,
+ a2c,
+ a2g,
+ a2t,
+ c2g,
+ c2t,
+ g2t,
+ }GTRParam;
+
+#define maxBBLIt 10
+#define epsilonLoglikeForBBL 0.01
+#define inAlpha 1.5
+#define epsilonLoglikeForAlphaOptimization 0.01
+#define upperBoundForAlpha 5.0
+
+class bestGtrModel {
+public:
+ explicit bestGtrModel(tree& et, // find best Gtr Model Params
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations = 5,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.05,
+ const MDOUBLE epsilonLoglikelihoodForGTRParam = 0.01,
+ const MDOUBLE upperBoundGTRParam = 5.0,
+ const bool optimizeTree = true,
+ const bool optimizeAlpha = true);
+ MDOUBLE getBesta2c() {return _best_a2c;}
+ MDOUBLE getBesta2g() {return _best_a2g;}
+ MDOUBLE getBesta2t() {return _best_a2t;}
+ MDOUBLE getBestc2g() {return _best_c2g;}
+ MDOUBLE getBestc2t() {return _best_c2t;}
+ MDOUBLE getBestg2t() {return _best_g2t;}
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _best_a2c;
+ MDOUBLE _best_a2g;
+ MDOUBLE _best_a2t;
+ MDOUBLE _best_c2g;
+ MDOUBLE _best_c2t;
+ MDOUBLE _best_g2t;
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+class C_evalGTRParam{
+public:
+ C_evalGTRParam( const GTRParam param,
+ const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ :_param(param), _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const GTRParam _param;
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE paramVal) {
+ switch (_param){
+ case a2c:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2c(paramVal);
+ break;
+ case a2g:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2g(paramVal);
+ break;
+ case a2t:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_a2t(paramVal);
+ break;
+ case c2g:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2g(paramVal);
+ break;
+ case c2t:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_c2t(paramVal);
+ break;
+ case g2t:
+ (static_cast<gtrModel*>(_sp.getPijAccelerator()->getReplacementModel()))->set_g2t(paramVal);
+ break;
+ default:
+ errorMsg::reportError("Missing GTR parameter in C_evalGTRParam::operator ()");
+ break;
+ }
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ LOG(5,<<" with " + int2string(_param) + " = "<<paramVal<<" logL = "<<res<<endl);
+ return -res;
+ }
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/bestHKYparam.cpp b/libs/phylogeny/bestHKYparam.cpp
new file mode 100644
index 0000000..cc37ca5
--- /dev/null
+++ b/libs/phylogeny/bestHKYparam.cpp
@@ -0,0 +1,158 @@
+// $Id: bestHKYparam.cpp 4314 2008-06-25 13:09:12Z itaymay $
+
+#include "bestHKYparam.h"
+#include <iostream>
+using namespace std;
+
+#include "bblEM.h"
+#include "numRec.h"
+#include "logFile.h"
+#include "bestAlpha.h"
+
+bestHkyParamAndBBL::bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnHkyParam,
+ const MDOUBLE epsilonHkyParamOptimization,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const int maxBBLIterations,
+ const int maxTotalIterations){
+ LOG(5,<<"find Best HkyParam and best BBL"<<endl);
+// LOG(5,<<" 1. bestHkyParam::findBestHkyParam"<<endl);
+// brLenOpt br1(*et,*pi,weights);
+ MDOUBLE oldL = VERYSMALL;
+ _bestL = VERYSMALL;
+ const MDOUBLE bx=upperBoundOnHkyParam*0.3;
+ const MDOUBLE ax=0.01;
+ const MDOUBLE cx=upperBoundOnHkyParam;
+ MDOUBLE bestA=0;
+ for (int i=0; i < maxTotalIterations; ++i) {
+ _bestL = -brent(ax,bx,cx,
+ C_evalHkyParam(et,sc,sp,weights),
+ epsilonHkyParamOptimization,
+ &bestA);
+
+ if (_bestL > oldL+epsilonLikelihoodImprovment) {
+ oldL = _bestL;
+ }
+ else {//LL converged
+ if (_bestL > oldL)
+ (static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
+ else
+ _bestL = oldL;
+ break;
+ }
+ _bestHkyParam = bestA;
+ (static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
+ LOG(5,<<"bestHkyParamAndBBL: trtv = "<<_bestHkyParam<<endl);
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLikelihoodImprovment);//maxIterations=1000
+ _bestL =bblEM1.getTreeLikelihood();
+ if (_bestL > oldL+epsilonLikelihoodImprovment) {
+ oldL = _bestL;
+ }
+ else {
+ _bestL = oldL;
+ break;
+ }
+ }
+}
+
+bestHkyParamFixedTree::bestHkyParamFixedTree(const tree& et, //findBestHkyParamFixedTree
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const MDOUBLE upperBoundOnHkyParam,
+ const MDOUBLE epsilonHkyParamOptimization){
+ LOG(5,<<"findBestHkyParamFixedTree"<<endl);
+ MDOUBLE bestA=0;
+ const MDOUBLE cx=upperBoundOnHkyParam;// left, midle, right limit on HkyParam
+ const MDOUBLE bx=cx*0.3;
+ const MDOUBLE ax=0;
+
+
+ _bestL = -brent(ax,bx,cx,
+ C_evalHkyParam(et,sc,sp,weights),
+ epsilonHkyParamOptimization,
+ &bestA);
+ _bestHkyParam= bestA;
+ (static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(bestA);
+}
+
+
+
+bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL( //find best TrTv (=HkyParam), Alpha and best branch lengths
+ tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const MDOUBLE epsilonHkyParamOptimization,
+ const MDOUBLE epsilonAlphaOptimization,
+ const MDOUBLE epsilonBBL,
+ const MDOUBLE upperBoundOnHkyParam,
+ const int maxBBLIterations,
+ const MDOUBLE initAlpha,
+ const MDOUBLE upperBoundOnAlpha)
+
+{
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ // first guess for the parameters
+ MDOUBLE prevHkyParam = static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel())->getTrTv();
+ MDOUBLE prevAlpha = initAlpha;
+ tree prevTree;
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+
+ // optimize HkyParam
+ newL = -brent(0.0, prevHkyParam, upperBoundOnHkyParam,
+ C_evalHkyParam(et,sc,sp,weights),
+ epsilonHkyParamOptimization,
+ &_bestHkyParam);
+ (static_cast<hky*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestHkyParam);
+ LOG(5,<<"bestHkyParamAlphaAndBBL: trtv = "<<_bestHkyParam<<endl);
+ // optimize Alpha
+ newL = -brent(0.0, prevAlpha, upperBoundOnAlpha,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonAlphaOptimization,
+ &_bestAlpha);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
+
+ LOG(5,<<"# bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL iteration " << i << ": after param optimization:" <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new hkyParam = " << _bestHkyParam << endl
+ <<"# new Alpha = " << _bestAlpha << endl);
+
+ // optimize branch lengths
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonBBL);
+ newL =bblEM1.getTreeLikelihood();
+
+ LOG(5,<<"# bestHkyParamAlphaAndBBL::bestHkyParamAlphaAndBBL iteration " << i << ": after branch lengths optimization:" <<endl
+ <<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
+ <<"# The tree:" );
+ LOGDO(5,et.output(myLog::LogFile()));
+
+ // check for improvement in the likelihood
+ if (newL > oldL+epsilonLikelihoodImprovment) {
+ oldL = newL;
+ _bestL = newL;
+ prevHkyParam = _bestHkyParam;
+ prevAlpha = _bestAlpha;
+ prevTree = et;
+ } else {
+ if (newL>oldL) {
+ _bestL = newL;
+ } else {
+ _bestL = oldL;
+ _bestHkyParam = prevHkyParam;
+ et = prevTree;
+ }
+ break;
+ }
+ }
+}
+
diff --git a/libs/phylogeny/bestHKYparam.h b/libs/phylogeny/bestHKYparam.h
new file mode 100644
index 0000000..a095c91
--- /dev/null
+++ b/libs/phylogeny/bestHKYparam.h
@@ -0,0 +1,109 @@
+// $Id: bestHKYparam.h 4292 2008-06-23 10:24:19Z itaymay $
+
+#ifndef ___BEST_HKY_PARAM
+#define ___BEST_HKY_PARAM
+
+#include "definitions.h"
+
+#include "likelihoodComputation.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "hky.h"
+
+
+class bestHkyParamFixedTree {
+public:
+ explicit bestHkyParamFixedTree(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnHkyParam = 0.5,
+ const MDOUBLE epsilonHkyParamOptimization = 0.01);
+ MDOUBLE getBestHkyParam() {return _bestHkyParam;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestHkyParam;
+ MDOUBLE _bestL;
+};
+
+class bestHkyParamAndBBL {
+public:
+ explicit bestHkyParamAndBBL(tree& et, //find Best HkyParam and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE upperBoundOnHkyParam = 5.0,
+ const MDOUBLE epsilonHkyParamOptimization= 0.01,
+ const MDOUBLE epsilonLikelihoodImprovment= 0.05,
+ const int maxBBLIterations=10,
+ const int maxTotalIterations=5);
+ MDOUBLE getBestHkyParam() {return _bestHkyParam;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestHkyParam;
+ MDOUBLE _bestL;
+};
+
+
+
+
+class C_evalHkyParam{
+public:
+ C_evalHkyParam( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ : _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE HkyParam) {
+ (static_cast<hky*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(HkyParam);
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ //LOG(5,<<" with HkyParam = "<<HkyParam<<" logL = "<<res<<endl);
+ return -res;
+ }
+};
+
+
+
+class bestHkyParamAlphaAndBBL {
+public:
+ explicit bestHkyParamAlphaAndBBL( //find best TrTv (=HkyParam), Alpha and best branch lengths
+ tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const int maxTotalIterations=5,
+ const MDOUBLE epsilonLikelihoodImprovment= 0.05,
+ const MDOUBLE epsilonHkyParamOptimization= 0.01,
+ const MDOUBLE epsilonAlphaOptimization= 0.01,
+ const MDOUBLE epsilonBBL= 0.01,
+ const MDOUBLE upperBoundOnHkyParam = 5.0,
+ const int maxBBLIterations=10,
+ const MDOUBLE initAlpha = 1.5,
+ const MDOUBLE upperBoundOnAlpha = 5.0);
+
+ MDOUBLE getBestHkyParam() {return _bestHkyParam;}
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestHkyParam;
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+
+
+
+
+
+#endif
+
+
diff --git a/libs/phylogeny/bestParamUSSRV.cpp b/libs/phylogeny/bestParamUSSRV.cpp
new file mode 100644
index 0000000..3cd953c
--- /dev/null
+++ b/libs/phylogeny/bestParamUSSRV.cpp
@@ -0,0 +1,474 @@
+// $Id: bestParamUSSRV.cpp 4951 2008-09-24 11:16:58Z osnatz $
+#include "bestParamUSSRV.h"
+
+/* structure of this method:
+(1) checks of the number of parameters to optimize, and decide how many parameters optimizations iteration,
+and how many parameters+bbl iterations will be done.
+(2) A loop over the parameters+bbl iterations
+ (2.1) A loop over the parameters optimization iterations
+ (2.1.1) Optimize alpha
+ (2.1.2) Optimize nu
+ (2.1.3) Optimize f
+ if the likelihood wasn't changed during this loop --> parameters converged --> break
+ (2.2) BBL
+ if the likelihood wasn't changed during this loop --> parameters+bbl converged --> break
+(3) return likelihood
+*/
+
+// ***************
+// * USSRV *
+// ***************
+
+MDOUBLE bestParamUSSRV::operator() (tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights /* =NULL */,
+ const MDOUBLE AlphaUpperBound /* = 15 */,
+ const MDOUBLE NuUpperBound /* = 15 */,
+ const MDOUBLE FUpperBound /* = 1 */,
+ const MDOUBLE epsilonParamOptimization /* = 0.01 */,
+ const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
+ const int maxIterations /* = 50 */,
+ const int maxOfParametersAndBblIterations /* = 40 */)
+{
+ _bestL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ bestAlphaFixedTreeUSSRV alphaOptimization;
+ bestNuFixedTreeUSSRV nuOptimization;
+ bestFFixedTreeUSSRV fOptimization;
+
+ int it, bblIt;
+ int numberOfIterations(maxIterations);
+ int numberOfParametersAndBblIterations(maxOfParametersAndBblIterations);
+
+ // if only one parameter is optimize (only Alpha or only Nu or only F) then we need only one iteration.
+ // if we only do bbl, without any optimization of the parameters, then we don't need iterations at all.
+ int countParameters2Optimize(0);
+ if (_AlphaOptimizationFlag) countParameters2Optimize++;
+ if (_NuOptimizationFlag) countParameters2Optimize++;
+ if (_FOptimizationFlag) countParameters2Optimize++;
+
+ if (countParameters2Optimize==0)
+ {
+ numberOfIterations=0;
+ numberOfParametersAndBblIterations=1;
+ }
+ else if (countParameters2Optimize==1)
+ numberOfIterations=1;
+
+ if (_bblOptimizationFlag == false)
+ numberOfParametersAndBblIterations = 1;
+
+ _bestAlpha = model.getAlpha();
+ _bestNu = model.getNu();
+ _bestF = model.getF();
+
+ bool changes(false);
+ bool bblChanges(false);
+ for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt)
+ {
+ LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl);
+ bblChanges = false;
+ // parameters optimizations (without bbl)
+ // in each iteration : optimization of Alpha and then optimization of Nu, and then of F.
+ for (it=0; it < numberOfIterations; ++it)
+ {
+ changes = false;
+ // Alpha optimization
+ if (_AlphaOptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = alphaOptimization(et,sc,baseSc,model,weights,AlphaUpperBound,epsilonParamOptimization);
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (Alpha optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ //go back to previous alpha
+ alphaOptimization.setAlpha(_bestAlpha,model);
+ alphaOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
+ //break;
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ _bestAlpha = alphaOptimization.getBestAlpha();
+ LOG(5,<<"new L = " << _bestL<<" new Alpha = " << _bestAlpha<<endl);
+ }
+ }
+
+ // Nu optimization
+ if (_NuOptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = nuOptimization(et,sc,baseSc,model,weights,NuUpperBound,epsilonParamOptimization);
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (Nu optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ //go back to previous Nu
+ nuOptimization.setNu(_bestNu,model);
+ nuOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
+ //break;
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ _bestNu = nuOptimization.getBestNu();
+ LOG(5,<<"new L = " << _bestL<<" new Nu = " << _bestNu<<endl);
+ }
+ }
+
+ // F optimization
+ if (_FOptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = fOptimization(et,sc,baseSc,model,weights,FUpperBound,epsilonParamOptimization);
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (F optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ //go back to previous F
+ fOptimization.setF(_bestF,model);
+ fOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
+ //break;
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment )
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ _bestF = fOptimization.getBestF();
+ LOG(5,<<"new L = " << _bestL<<" new F = " << _bestF<<endl);
+ }
+ }
+ if (changes == false)
+ {
+ LOG(5,<<"bestParamUSSRV parameters alpha,nu,f converged!"<<endl);
+ break;
+ }
+ }
+
+ if (changes == true)
+ LOG(5,<<"bestParamUSSRV parameters alpha, nu, f, did not converge after " << numberOfIterations << " iterations"<<endl);
+
+
+ // BBL
+ if (_bblOptimizationFlag == true)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ bblEM2USSRV bbl(et,sc,baseSc,model,weights,maxIterations);
+ newL = bbl.getTreeLikelihood();
+ LOG(5,<<"current best L= "<<_bestL<<endl);
+ LOG(5,<<"new L After BBL = " << newL<< " = "<< bbl.getTreeLikelihood() <<endl);
+ LOG(5,<<"The new tree is: " << endl);
+ if (5 <= myLog::LogLevel())
+ et.output(myLog::LogFile());
+ LOG(5,<<endl);
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ bblChanges = true;
+ if (newL < _bestL){
+ LOG(5,<<"likelihood went down in LS! (BBL)"<<endl<<"oldL = "<<_bestL);
+ LOG(5,<<" newL= "<<newL<<endl) ;
+ }
+ else
+ _bestL = newL;
+ }
+
+ if (bblChanges == false)
+ {
+ LOG(5,<<"bestParamUSSRV bbl and parameters converged!"<<endl);
+ break;
+ }
+ }
+
+ if (bblIt == numberOfParametersAndBblIterations)
+ LOG(5,<<"bestParamUSSRV bbl and parameters alpha did not converge after " << numberOfParametersAndBblIterations << "iterations"<<endl);
+
+ LOGDO(5,printTime(myLog::LogFile()));
+ return _bestL;
+}
+
+
+
+// ***************
+// * SSRV *
+// ***************
+
+MDOUBLE bestParamSSRV::operator() (tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights /* =NULL */,
+ const MDOUBLE AlphaUpperBound /* = 15 */,
+ const MDOUBLE NuUpperBound /* = 15 */,
+ const MDOUBLE TrTvUpperBound /* = 10 */,
+ const MDOUBLE epsilonParamOptimization /* = 0.01 */,
+ const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
+ const MDOUBLE epsilonBbl /*= 0.05 */,
+ const int maxIterations /* = 50 */,
+ const int maxOfParametersAndBblIterations /* = 40 */)
+{
+ _bestL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ bestAlphaFixedTreeSSRV alphaOptimization;
+ bestNuFixedTreeSSRV nuOptimization;
+ bestTamura92ParamFixedTreeSSRV tamura92Optimization;
+
+ int it, bblIt;
+ int numberOfIterations(maxIterations);
+ int numberOfParametersAndBblIterations(maxOfParametersAndBblIterations);
+
+ // if only one parameter is optimize (only Alpha or only Nu or only tamura92) then we need only one iteration.
+ // if we only do bbl, without any optimization of the parameters, then we don't need iterations at all.
+ int countParameters2Optimize(0);
+ if (_AlphaOptimizationFlag) countParameters2Optimize++;
+ if (_NuOptimizationFlag) countParameters2Optimize++;
+ if (_tamura92OptimizationFlag) countParameters2Optimize++;
+
+
+ if (countParameters2Optimize==0)
+ {
+ numberOfIterations=0;
+ numberOfParametersAndBblIterations=1;
+ }
+ else if (countParameters2Optimize==1)
+ numberOfIterations=1;
+
+ if (_bblOptimizationFlag == false)
+ numberOfParametersAndBblIterations = 1;
+
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(ssrvSp.getPijAccelerator()->getReplacementModel());
+ gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
+ _bestAlpha = gammaDist->getAlpha();
+ _bestNu = pMulRM->getRateOfRate();
+
+
+ bool changes(false);
+ bool bblChanges(false);
+
+ for (bblIt=0; bblIt < numberOfParametersAndBblIterations; ++bblIt)
+ {
+ bblChanges = false;
+
+ // Set initial values of lower/upper bounds for params
+ MDOUBLE AlphaLowerBoundCur = 0.0;
+ MDOUBLE AlphaUpperBoundCur = AlphaUpperBound;
+ MDOUBLE NuLowerBoundCur = 0.0;
+ MDOUBLE NuUpperBoundCur = NuUpperBound;
+ MDOUBLE TrTvLowerBoundCur = 0.0;
+ MDOUBLE TrTvUpperBoundCur = TrTvUpperBound;
+ MDOUBLE ThetaLowerBoundCur = 0.0;
+ MDOUBLE ThetaUpperBoundCur = 1.0;
+ // And for epsilon
+ MDOUBLE epsilonParamOptimizationCur = epsilonParamOptimization;
+
+ // parameters optimizations (without bbl)
+ // in each iteration : optimization of Alpha and then optimization of Nu, and then of F.
+ for (it=0; it < numberOfIterations; ++it)
+ {
+ LOG(8,<<"bestParamUSSRV, params+bbl, iteration: " << bblIt << endl);
+ changes = false;
+ // Alpha optimization
+ if (_AlphaOptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = alphaOptimization(et,sc,ssrvSp,weights,AlphaLowerBoundCur,AlphaUpperBoundCur,epsilonParamOptimizationCur);
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (Alpha optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ //go back to previous alpha
+ alphaOptimization.setAlpha(_bestAlpha,ssrvSp);
+ alphaOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
+ //break;
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ _bestAlpha = alphaOptimization.getBestAlpha();
+ LOG(5,<<"new L = " << _bestL<<" new Alpha = " << _bestAlpha<<endl);
+ }
+
+ // Narrow search range between lower/upper bounds
+ AlphaLowerBoundCur = (AlphaLowerBoundCur + 2*_bestAlpha) / 3;
+ AlphaUpperBoundCur = (AlphaUpperBoundCur + 2*_bestAlpha) / 3;
+ }
+
+ // Nu optimization
+ if (_NuOptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = nuOptimization(et,sc,ssrvSp,weights,NuLowerBoundCur,NuUpperBoundCur,epsilonParamOptimizationCur);
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (Nu optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ //go back to previous Nu
+ nuOptimization.setNu(_bestNu,ssrvSp);
+ nuOptimization.setBestL(_bestL); // @@@@ maybe this is unnecessary
+ //break;
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ _bestNu = nuOptimization.getBestNu();
+ LOG(5,<<"new L = " << _bestL<<" new Nu = " << _bestNu<<endl);
+ }
+
+ // Narrow search range between lower/upper bounds
+ NuLowerBoundCur = (NuLowerBoundCur + 2*_bestNu) / 3;
+ NuUpperBoundCur = (NuUpperBoundCur + 2*_bestNu) / 3;
+ }
+
+ // tamura92 optimization
+ if (_tamura92OptimizationFlag)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ newL = tamura92Optimization(
+ et,sc,ssrvSp,weights,5,epsilonLikelihoodImprovment,
+ TrTvLowerBoundCur,TrTvUpperBoundCur,ThetaLowerBoundCur,ThetaUpperBoundCur,
+ epsilonParamOptimizationCur,epsilonParamOptimizationCur);
+ MDOUBLE bestTrTv = tamura92Optimization.getBestTrTv();
+ MDOUBLE bestTheta = tamura92Optimization.getBestTheta();
+
+ //the improvement in Likelihood is smaller than epsilon
+ if (newL < _bestL)
+ {
+ LOG(5,<<"likelihood went down in LS! (tamura92 optimization)"<<endl<<"oldL = "<<_bestL<<" newL= "<<newL<<endl);
+ }
+ else
+ {// update of likelihood and model.
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ {
+ changes = true;
+ bblChanges = true;
+ }
+ LOG(9,<<"newL = " << newL << " _bestL = " << _bestL << " epsilonLikelihoodImprovment = " << epsilonLikelihoodImprovment << endl);
+ _bestL = newL;
+ LOG(5,<<"new L = " << _bestL
+ <<" new TrTv = " << bestTrTv
+ <<" new Theta = " << bestTheta <<endl);
+ }
+
+ // Narrow search range between lower/upper bounds
+ TrTvLowerBoundCur = (TrTvLowerBoundCur + 2*bestTrTv) / 3;
+ TrTvUpperBoundCur = (TrTvUpperBoundCur + 2*bestTrTv) / 3;
+
+ ThetaLowerBoundCur = (ThetaLowerBoundCur + 2*bestTheta) / 3;
+ ThetaUpperBoundCur = (ThetaUpperBoundCur + 2*bestTheta) / 3;
+ }
+
+ if (changes == false)
+ {
+ LOG(5,<<"bestParamSSRV parameters alpha,nu, and tamura92 params converged!"<<endl);
+ break;
+ }
+
+ // Reduce epsilonParamOptimizationCur
+ epsilonParamOptimizationCur /= 2;
+ }
+
+ if (changes == true)
+ LOG(5,<<"bestParamSSRV parameters alpha, nu, and tamura92 params did not converge after " << numberOfIterations << " iterations"<<endl);
+
+
+ // BBL
+ if (_bblOptimizationFlag == true)
+ {
+ LOGDO(5,printTime(myLog::LogFile()));
+ bblEM bbl(et,sc,ssrvSp,weights,maxIterations,epsilonBbl);
+ newL = bbl.getTreeLikelihood();
+ LOG(5,<<" current best L= "<<_bestL<<endl);
+ LOG(5,<<"new L After BBL = " << newL<< " = "<< bbl.getTreeLikelihood() <<endl);
+ LOG(5,<<"The new tree is: " << endl);
+ if (5 <= myLog::LogLevel())
+ et.output(myLog::LogFile());
+ LOG(5,<<endl);
+ if (newL > _bestL+epsilonLikelihoodImprovment)
+ bblChanges = true;
+ if (newL < _bestL){
+ LOG(5,<<"likelihood went down in LS! (BBL)"<<endl<<"oldL = "<<_bestL);
+ LOG(5,<<" newL= "<<newL<<endl) ;
+ }
+ else
+ _bestL = newL;
+ }
+
+ if (bblChanges == false)
+ {
+ LOG(5,<<"bestParamSSRV bbl and parameters converged!"<<endl);
+ break;
+ }
+ }
+
+ if (bblIt == numberOfParametersAndBblIterations)
+ LOG(5,<<"bestParamSSRV bbl and parameters alpha did not converge after " << numberOfParametersAndBblIterations << "iterations"<<endl);
+
+ LOGDO(5,printTime(myLog::LogFile()));
+ return _bestL;
+}
+
+
+
+// Variant that can work on a const tree - only if we're not doing BBL
+// WARNING: Running this with bblOptimization==true will give a fatal error
+MDOUBLE bestParamSSRV::operator() (const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights /* =NULL */,
+ const MDOUBLE AlphaUpperBound /* = 15 */,
+ const MDOUBLE NuUpperBound /* = 15 */,
+ const MDOUBLE TrTvUpperBound /* = 10 */,
+ const MDOUBLE epsilonParamOptimization /* = 0.01 */,
+ const MDOUBLE epsilonLikelihoodImprovment /* = 0.01 */,
+ const MDOUBLE epsilonBbl /*= 0.05 */,
+ const int maxIterations /* = 50 */,
+ const int maxOfParametersAndBblIterations /* = 40 */)
+{
+ if (_bblOptimizationFlag == true)
+ errorMsg::reportError("bestParamSSRV::operator(): Can't work on const tree if bblOptimization was requested");
+
+ tree etNotConst(et);
+ return operator()(etNotConst, sc, ssrvSp, weights,
+ AlphaUpperBound, NuUpperBound,
+ epsilonParamOptimization, epsilonLikelihoodImprovment,
+ epsilonBbl, maxIterations,
+ maxOfParametersAndBblIterations);
+}
+
+
diff --git a/libs/phylogeny/bestParamUSSRV.h b/libs/phylogeny/bestParamUSSRV.h
new file mode 100644
index 0000000..ef02214
--- /dev/null
+++ b/libs/phylogeny/bestParamUSSRV.h
@@ -0,0 +1,130 @@
+// $Id: bestParamUSSRV.h 1975 2007-04-22 13:47:28Z privmane $
+#ifndef BEST_PARAM_USSRV
+#define BEST_PARAM_USSRV
+
+#include "definitions.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "replacementModelSSRV.h"
+#include "stochasticProcessSSRV.h"
+#include "C_evalParamUSSRV.h"
+#include "bestAlpha.h"
+#include "numRec.h"
+#include "bblEM.h"
+#include "logFile.h"
+#include "bestAlphaAndNu.h"
+#include "bblEM2USSRV.h"
+#include "someUtil.h"
+#include <ctime>
+
+// ***************
+// * USSRV *
+// ***************
+
+class bestParamUSSRV
+{
+public:
+ explicit bestParamUSSRV(bool AlphaOptimization, bool NuOptimization,
+ bool FOptimization, bool bblOptimization):
+ _AlphaOptimizationFlag(AlphaOptimization),
+ _NuOptimizationFlag(NuOptimization),
+ _FOptimizationFlag(FOptimization),
+ _bblOptimizationFlag(bblOptimization) {}
+
+ MDOUBLE operator() (tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ ussrvModel& model,
+ const Vdouble * weights=NULL,
+ const MDOUBLE AlphaUpperBound = 15,
+ const MDOUBLE NuUpperBound = 15,
+ const MDOUBLE FUpperBound = 1,
+ const MDOUBLE epsilonParamOptimization = 0.01,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.01,
+ const int maxIterations = 50,
+ const int maxOfParametersAndBblIterations = 40);
+
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestNu() {return _bestNu;}
+ MDOUBLE getBestF() {return _bestF;}
+ MDOUBLE getBestL() {return _bestL;}
+
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestNu;
+ MDOUBLE _bestF;
+ MDOUBLE _bestL;
+
+ // flags
+ bool _AlphaOptimizationFlag;
+ bool _NuOptimizationFlag;
+ bool _FOptimizationFlag;
+ bool _bblOptimizationFlag;
+};
+
+// ***************
+// * SSRV *
+// ***************
+
+class bestParamSSRV
+{
+public:
+ explicit bestParamSSRV(bool AlphaOptimization, bool NuOptimization, bool tamura92Optimization,
+ bool bblOptimization):
+ _AlphaOptimizationFlag(AlphaOptimization),
+ _NuOptimizationFlag(NuOptimization),
+ _tamura92OptimizationFlag(tamura92Optimization),
+ _bblOptimizationFlag(bblOptimization) {}
+
+ MDOUBLE operator() (tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE AlphaUpperBound = 15,
+ const MDOUBLE NuUpperBound = 15,
+ const MDOUBLE TrTvUpperBound = 10,
+ const MDOUBLE epsilonParamOptimization = 0.01,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.01,
+ const MDOUBLE epsilonBbl = 0.05,
+ const int maxIterations = 50,
+ const int maxOfParametersAndBblIterations = 40);
+
+ // Variant that can work on a const tree - only if we're not doing BBL
+ // WARNING: Running this with bblOptimization==true will give a fatal error
+ MDOUBLE operator() (const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcessSSRV& ssrvSp,
+ const Vdouble * weights=NULL,
+ const MDOUBLE AlphaUpperBound = 15,
+ const MDOUBLE NuUpperBound = 15,
+ const MDOUBLE TrTvUpperBound = 10,
+ const MDOUBLE epsilonParamOptimization = 0.01,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.01,
+ const MDOUBLE epsilonBbl = 0.05,
+ const int maxIterations = 50,
+ const int maxOfParametersAndBblIterations = 40);
+
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestNu() {return _bestNu;}
+ MDOUBLE getBestTrTv() {return _bestTrTv;}
+ MDOUBLE getBestTheta() {return _bestTheta;}
+ MDOUBLE getBestL() {return _bestL;}
+
+private:
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestNu;
+ MDOUBLE _bestTrTv;
+ MDOUBLE _bestTheta;
+ MDOUBLE _bestL;
+
+ // flags
+ bool _AlphaOptimizationFlag;
+ bool _NuOptimizationFlag;
+ bool _tamura92OptimizationFlag;
+ bool _bblOptimizationFlag;
+};
+
+#endif // BEST_PARAM_USSRV
+
diff --git a/libs/phylogeny/bestTamura92param.cpp b/libs/phylogeny/bestTamura92param.cpp
new file mode 100644
index 0000000..3978b78
--- /dev/null
+++ b/libs/phylogeny/bestTamura92param.cpp
@@ -0,0 +1,205 @@
+// $Id: bestTamura92param.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "bestTamura92param.h"
+#include <iostream>
+using namespace std;
+
+#include "bblEM.h"
+#include "numRec.h"
+#include "logFile.h"
+#include "bestAlpha.h"
+
+bestTamura92ParamFixedTree::bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
+ const MDOUBLE upperBoundOnTrTv) {
+ LOG(5,<<"Starting bestTamura92ParamFixedTree: find Best TrTv and theta"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ // first guess for the parameters
+ MDOUBLE prevTrTv = upperBoundOnTrTv*0.3;
+ MDOUBLE prevTheta = 0.5;
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+ // optimize TrTv
+ newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
+ C_evalTrTvParam(et,sc,sp,weights),
+ epsilonLoglikelihoodForTrTvOptimization,
+ &_bestTrTv);
+
+ // optimize Theta
+ newL = -brent(0.0, prevTheta, 1.0,
+ C_evalTheta(et,sc,sp,weights),
+ epsilonLoglikelihoodForThetaOptimization,
+ &_bestTheta);
+
+ // check for improvement in the likelihood
+ if (newL > oldL+epsilonLikelihoodImprovment) {
+ prevTrTv = _bestTrTv;
+ prevTheta = _bestTheta;
+ oldL = newL;
+ _bestL = newL;
+ } else {
+ if (newL>oldL) {
+ _bestL = newL;
+ } else {
+ _bestL = oldL;
+ _bestTrTv = prevTrTv;
+ _bestTheta = prevTheta;
+ }
+ break;
+ }
+ }
+}
+
+bestTamura92ParamAndBBL::bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBBL,
+ const MDOUBLE upperBoundOnTrTv,
+ const int maxBBLIterations){
+ LOG(5,<<"Starting bestTamura92ParamAndBBL: find best TrTv, theta and BBL"<<endl);
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ // first guess for the parameters
+ MDOUBLE prevTrTv = upperBoundOnTrTv*0.3;
+ MDOUBLE prevTheta = 0.5;
+ tree prevTree;
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+ // optimize TrTv
+ newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
+ C_evalTrTvParam(et,sc,sp,weights),
+ epsilonLoglikelihoodForTrTvOptimization,
+ &_bestTrTv);
+ (static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv);
+
+ // optimize Theta
+ newL = -brent(0.0, prevTheta, 1.0,
+ C_evalTheta(et,sc,sp,weights),
+ epsilonLoglikelihoodForThetaOptimization,
+ &_bestTheta);
+ (static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta);
+
+ // optimize branch lengths
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
+ newL =bblEM1.getTreeLikelihood();
+
+ // check for improvement in the likelihood
+ if (newL > oldL+epsilonLikelihoodImprovment) {
+ prevTrTv = _bestTrTv;
+ prevTheta = _bestTheta;
+ oldL = newL;
+ _bestL = newL;
+ prevTree = et;
+ } else {
+ if (newL>oldL) {
+ _bestL = newL;
+ } else {
+ _bestL = oldL;
+ _bestTrTv = prevTrTv;
+ _bestTheta = prevTheta;
+ et = prevTree;
+ }
+ break;
+ }
+ }
+}
+
+bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths
+ tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations,
+ const MDOUBLE epsilonLikelihoodImprovment,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization,
+ const MDOUBLE epsilonLoglikelihoodForBBL,
+ const MDOUBLE upperBoundOnTrTv,
+ const int maxBBLIterations,
+ const MDOUBLE initAlpha,
+ const MDOUBLE upperBoundOnAlpha)
+
+{
+ MDOUBLE oldL = VERYSMALL;
+ MDOUBLE newL = VERYSMALL;
+
+ // first guess for the parameters
+ MDOUBLE prevTrTv = static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel())->getTrTv();
+ MDOUBLE prevTheta = static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel())->getTheta();
+ MDOUBLE prevAlpha = initAlpha;
+ tree prevTree;
+
+ for (int i=0; i < maxTotalIterations; ++i) {
+
+ // optimize TrTv
+ newL = -brent(0.0, prevTrTv, upperBoundOnTrTv,
+ C_evalTrTvParam(et,sc,sp,weights),
+ epsilonLoglikelihoodForTrTvOptimization,
+ &_bestTrTv);
+ (static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(_bestTrTv);
+
+ // optimize Theta
+ newL = -brent(0.0, prevTheta, 1.0,
+ C_evalTheta(et,sc,sp,weights),
+ epsilonLoglikelihoodForThetaOptimization,
+ &_bestTheta);
+ (static_cast<tamura92*>(sp.getPijAccelerator()->getReplacementModel()))->changeTheta(_bestTheta);
+
+ // optimize Alpha
+ newL = -brent(0.0, prevAlpha, upperBoundOnAlpha,
+ C_evalAlpha(et,sc,sp,weights),
+ epsilonLoglikelihoodForAlphaOptimization,
+ &_bestAlpha);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(_bestAlpha);
+
+ LOG(5,<<"# bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL iteration " << i << ": after param optimization:" <<endl
+ <<"# old L = " << oldL << "\t"
+ <<"# new L = " << newL << endl
+ <<"# new Alpha = " << _bestAlpha << endl);
+
+ // optimize branch lengths
+ bblEM bblEM1(et,sc,sp,NULL,maxBBLIterations,epsilonLoglikelihoodForBBL);//maxIterations=1000
+ newL =bblEM1.getTreeLikelihood();
+
+ LOG(5,<<"# bestTamura92ParamAlphaAndBBL::bestTamura92ParamAlphaAndBBL iteration " << i << ": after branch lengths optimization:" <<endl
+ <<"# After BBL new L = "<<newL<<" old L = "<<oldL<<endl
+ <<"# The tree:" );
+ LOGDO(5,et.output(myLog::LogFile()));
+
+ // check for improvement in the likelihood
+ if (newL > oldL+epsilonLikelihoodImprovment) {
+ oldL = newL;
+ _bestL = newL;
+ prevTrTv = _bestTrTv;
+ prevTheta = _bestTheta;
+ prevAlpha = _bestAlpha;
+ prevTree = et;
+ } else {
+ if (newL>oldL) {
+ _bestL = newL;
+ } else {
+ _bestL = oldL;
+ _bestTrTv = prevTrTv;
+ _bestTheta = prevTheta;
+ et = prevTree;
+ }
+ break;
+ }
+ }
+}
+
diff --git a/libs/phylogeny/bestTamura92param.h b/libs/phylogeny/bestTamura92param.h
new file mode 100644
index 0000000..7e685a4
--- /dev/null
+++ b/libs/phylogeny/bestTamura92param.h
@@ -0,0 +1,137 @@
+// $Id: bestTamura92param.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___BEST_TAMURA92_PARAM
+#define ___BEST_TAMURA92_PARAM
+
+#include "definitions.h"
+
+#include "likelihoodComputation.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "gammaDistribution.h"
+#include "tree.h"
+#include "tamura92.h"
+
+
+class bestTamura92ParamFixedTree {
+public:
+ explicit bestTamura92ParamFixedTree(const tree& et, // find best TrTv and theta
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ const int maxTotalIterations = 5,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.05,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization = 0.01,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization = 0.01,
+ const MDOUBLE upperBoundOnTrTv = 5.0);
+ MDOUBLE getBestTrTv() {return _bestTrTv;}
+ MDOUBLE getBestTheta() {return _bestTheta;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestTrTv;
+ MDOUBLE _bestTheta;
+ MDOUBLE _bestL;
+};
+
+class bestTamura92ParamAndBBL {
+public:
+ explicit bestTamura92ParamAndBBL(tree& et, //find best TrTv, theta and best BBL
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const int maxTotalIterations=5,
+ const MDOUBLE epsilonLikelihoodImprovment = 0.05,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization = 0.01,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization = 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBBL = 0.01,
+ const MDOUBLE upperBoundOnTrTv = 5.0,
+ const int maxBBLIterations=10);
+ MDOUBLE getBestTrTv() {return _bestTrTv;}
+ MDOUBLE getBestTheta() {return _bestTheta;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestTrTv;
+ MDOUBLE _bestTheta;
+ MDOUBLE _bestL;
+};
+
+class bestTamura92ParamAlphaAndBBL {
+public:
+ explicit bestTamura92ParamAlphaAndBBL( //find best TrTv, theta, Alpha and best branch lengths
+ tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights=NULL,
+ const int maxTotalIterations=5,
+ const MDOUBLE epsilonLikelihoodImprovment= 0.05,
+ const MDOUBLE epsilonLoglikelihoodForTrTvOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForThetaOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForAlphaOptimization= 0.01,
+ const MDOUBLE epsilonLoglikelihoodForBBL= 0.01,
+ const MDOUBLE upperBoundOnTrTv = 5.0,
+ const int maxBBLIterations=10,
+ const MDOUBLE initAlpha = 1.5,
+ const MDOUBLE upperBoundOnAlpha = 5.0);
+ MDOUBLE getBestTrTv() {return _bestTrTv;}
+ MDOUBLE getBestTheta() {return _bestTheta;}
+ MDOUBLE getBestAlpha() {return _bestAlpha;}
+ MDOUBLE getBestL() {return _bestL;}
+private:
+ MDOUBLE _bestTrTv;
+ MDOUBLE _bestTheta;
+ MDOUBLE _bestAlpha;
+ MDOUBLE _bestL;
+};
+
+
+
+class C_evalTrTvParam{
+public:
+ C_evalTrTvParam( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ : _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE TrTv) {
+ (static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTrTv(TrTv);
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ LOG(5,<<" with TrTv = "<<TrTv<<" logL = "<<res<<endl);
+ return -res;
+ }
+};
+
+class C_evalTheta{
+public:
+ C_evalTheta( const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights = NULL)
+ : _et(et),_sc(sc),_weights(weights),_sp(sp){};
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE theta) {
+ (static_cast<tamura92*>(_sp.getPijAccelerator()->getReplacementModel()))->changeTheta(theta);
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,_sp,_weights);
+ LOG(5,<<" with theta = "<<theta<<" logL = "<<res<<endl);
+ return -res;
+ }
+};
+
+
+
+
+#endif
+
+
diff --git a/libs/phylogeny/betaDistribution.cpp b/libs/phylogeny/betaDistribution.cpp
new file mode 100644
index 0000000..41ab0b8
--- /dev/null
+++ b/libs/phylogeny/betaDistribution.cpp
@@ -0,0 +1,139 @@
+// $Id: betaDistribution.cpp 3985 2008-05-11 11:00:44Z adido $
+
+#include "betaDistribution.h"
+#include "gammaUtilities.h"
+#include "betaUtilities.h"
+#include "errorMsg.h"
+#include "logFile.h"
+#include <cmath>
+
+
+betaDistribution::betaDistribution()
+{
+ _alpha = 0.0;
+ _beta = 0.0;
+ _boundary.resize(0,0);
+ _rates.resize(0,0);
+ _ratesProb.resize(0,0);
+ _globalRate = 1;//??? 0.5 or 1
+ _discretizationType = MEDIAN;
+}
+
+// note that the order of initalization makes a diffrence.
+betaDistribution::betaDistribution(const betaDistribution& other) :
+ _boundary(other._boundary),
+ _alpha(other._alpha),
+ _beta(other._beta),
+ _rates(other._rates),
+ _ratesProb(other._ratesProb),
+ _globalRate(other._globalRate),
+ _discretizationType(other._discretizationType){
+}
+
+betaDistribution::betaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,discretizationType in_discretizationType) :distribution(){
+ _globalRate=1.0;
+ _discretizationType = in_discretizationType;
+ setBetaParameters(in_number_of_categories,alpha,beta);
+}
+
+betaDistribution::~betaDistribution() {
+ _boundary.clear();
+ _rates.clear();
+ _ratesProb.clear();
+}
+
+void betaDistribution::setAlpha(MDOUBLE in_alpha) {
+ if (in_alpha == _alpha)
+ return;
+ setBetaParameters(categories(), in_alpha, _beta);
+}
+
+void betaDistribution::setBeta(MDOUBLE in_beta) {
+ if (in_beta == _beta)
+ return;
+ setBetaParameters( categories(), _alpha, in_beta);
+}
+
+void betaDistribution::setDiscretizationType(discretizationType in_discretizationType) {
+ if (in_discretizationType == _discretizationType)
+ return;
+ _discretizationType = in_discretizationType;
+ if (categories() > 1)
+ fill_rates();
+
+}
+void betaDistribution::change_number_of_categories(int in_number_of_categories) {
+ if (in_number_of_categories == categories())
+ return;
+ setBetaParameters( in_number_of_categories, _alpha, _beta);
+}
+
+void betaDistribution::setBetaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
+ if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
+ return;
+
+
+ if (in_alpha < MINIMUM_ALPHA_PARAM)
+ in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
+ if (in_beta < MINIMUM_ALPHA_PARAM)
+ in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
+
+ _alpha = in_alpha;
+ _beta = in_beta;
+ _rates.clear();
+ _rates.resize(in_number_of_categories);
+ _ratesProb.clear();
+ _ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories);
+ _boundary.clear();
+ _boundary.resize(in_number_of_categories+1);
+ if (in_number_of_categories==1) {
+ _rates[0] = 1.0;
+ return;
+ }
+ if (categories() > 1) {
+ fill_rates();
+ return ;
+ }
+
+}
+int betaDistribution::fill_rates() {
+ fill_boundaries();
+ int i;
+ //LOG(5,<<endl<<" alpha = "<<_alpha<<" beta = "<< _beta<<endl);
+ //for (i=0; i<=categories(); ++i) cout<<endl<<_boundary[i];
+ //LOG(5,<<"\n====== the r categories are =====\n");
+ for (i=0; i<categories(); ++i) {
+ if (_discretizationType == MEAN)
+ _rates[i]=computeAverage_r(_boundary[i], _boundary[i+1], _alpha, _beta, categories());
+ else //_discretizationType == MEDIAN
+ _rates[i] =inverseCDFBeta(_alpha, _beta,static_cast<MDOUBLE>(i*2 +1)/(2*categories()));
+ //LOG(5,<<_rates[i]<<endl);
+ }
+ //LOG(5,<<endl<<_alpha<<endl);
+ return 0;
+}
+
+int betaDistribution::fill_boundaries() {
+ int i;
+ //LOG(5,<<endl<<"========BOUNDARY============="<<endl);
+ for (i=1; i<categories(); ++i)
+ {
+ _boundary[i]=inverseCDFBeta(_alpha, _beta,static_cast<MDOUBLE>(i)/categories());
+ //LOG(5,<<"_boundary[ "<<i<<"] ="<<_boundary[i]<<endl);
+ }
+ _boundary[0]=0;
+ _boundary[i]=1;
+
+ return 0;
+}
+
+
+const MDOUBLE betaDistribution::getCumulativeProb(const MDOUBLE x) const
+{//
+ //since r~gamma(alpha, beta) then beta*r~ gamma(alpha,1)=gammp
+ //here we assume alpha=beta
+ return incompleteBeta(_alpha,_beta,x);
+}
+
+
+
diff --git a/libs/phylogeny/betaDistribution.h b/libs/phylogeny/betaDistribution.h
new file mode 100644
index 0000000..7e9f5fb
--- /dev/null
+++ b/libs/phylogeny/betaDistribution.h
@@ -0,0 +1,61 @@
+// $Id: betaDistribution.h 5803 2009-01-20 09:17:05Z adido $
+
+#ifndef ___BETA_DIST
+#define ___BETA_DIST
+/************************************************************
+This distribution can take several forms depending on its free parameters alpha,beta
+For an extensive exlpanation of this distribution
+see http://mathworld.wolfram.com/BetaDistribution.html
+************************************************************/
+#include "definitions.h"
+#include "distribution.h"
+
+class betaDistribution : public distribution {
+
+public:
+ enum discretizationType{MEAN, MEDIAN};
+ explicit betaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,discretizationType in_discretizationType = MEDIAN);
+ explicit betaDistribution(const betaDistribution& other);
+ explicit betaDistribution();
+ virtual ~betaDistribution();
+ virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+
+ virtual const int categories() const {return _rates.size();}
+ virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
+ virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
+ virtual distribution* clone() const { return new betaDistribution(*this); }
+ virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
+ virtual MDOUBLE getGlobalRate()const {return _globalRate;}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual void setAlpha(MDOUBLE newAlpha);
+ virtual MDOUBLE getAlpha() const {return _alpha;};
+ virtual void setBeta(MDOUBLE newBeta);
+ virtual MDOUBLE getBeta() const {return _beta;};
+ virtual void setDiscretizationType(discretizationType in_discretizationType);
+ virtual discretizationType getDiscretizationType() const {return _discretizationType;};
+
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual MDOUBLE getBorder(const int i) const {return _boundary[i];} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
+
+
+private:
+ int fill_rates();
+ int fill_boundaries();
+
+
+protected:
+ MDOUBLE _alpha;
+ MDOUBLE _beta;
+
+ vector<MDOUBLE> _rates;
+ vector<MDOUBLE> _ratesProb;
+ MDOUBLE _globalRate;
+ discretizationType _discretizationType;
+ vector<MDOUBLE> _boundary;
+
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/betaDistributionFixedCategories.cpp b/libs/phylogeny/betaDistributionFixedCategories.cpp
new file mode 100644
index 0000000..e0e347a
--- /dev/null
+++ b/libs/phylogeny/betaDistributionFixedCategories.cpp
@@ -0,0 +1,158 @@
+#include "betaDistributionFixedCategories.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+
+
+betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta) :
+betaDistribution()
+{
+ _alpha = alpha;
+ _beta = beta;
+ setFixedCategories(fixedBoundaries);
+}
+
+
+betaDistributionFixedCategories::betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) :
+betaDistribution()
+{
+ if ((fixedRates.size() + 1) != boundaries.size())
+ errorMsg::reportError("error in betaDistributionFixedCategories constructor");
+ _alpha = alpha;
+ _beta = beta;
+ _rates = fixedRates;
+ _boundary = boundaries;
+ computeRatesProbs();
+}
+
+
+
+betaDistributionFixedCategories::betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum)
+: betaDistribution()
+{
+ _alpha = alpha;
+ _beta = beta;
+ setDefaultBoundaries(catNum);
+}
+
+betaDistributionFixedCategories::betaDistributionFixedCategories()
+: betaDistribution()
+{
+ _alpha = 0.5;
+ _beta = 0.5;
+ setDefaultBoundaries(10);
+}
+
+betaDistributionFixedCategories::betaDistributionFixedCategories(const betaDistributionFixedCategories& other)
+: betaDistribution(other)
+{}
+void betaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
+{
+ setDefaultBoundaries(in_number_of_categories);
+}
+
+
+void betaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){
+
+ if (fixedBoundaries.size()<2)
+ errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required");
+ if (fixedBoundaries[0] > 0.0)
+ errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero");
+
+ _boundary = fixedBoundaries;
+ if (_boundary[_boundary.size()] > VERYBIG/10000.0)
+ _boundary[_boundary.size()] = VERYBIG/10000.0; // to avoid overflow
+
+ setFixedCategories();
+}
+
+void betaDistributionFixedCategories::setFixedCategories() {
+ fill_mean();
+ computeRatesProbs();
+}
+
+void betaDistributionFixedCategories::fill_mean()
+{
+ int numOfCategories = _boundary.size()-1;
+ if (numOfCategories == 0)
+ errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized");
+ _rates.clear();
+ _rates.resize(numOfCategories,0.0);
+ int cat;
+ for (cat=0; cat<numOfCategories; ++cat) {
+ _rates[cat] = (_boundary[cat]+_boundary[cat+1])/2.0;
+ }
+
+}
+
+
+// this function is here to override the inherited function
+// note that the rates themselves and the boundaries do not change.
+// the number of categories cannot be changed, since fixed categories must be given before
+void betaDistributionFixedCategories::setBetaParameters (int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
+ if (in_number_of_categories==1) {
+ _rates[0] = 1.0;
+ return;
+ }
+ if (in_number_of_categories != categories())
+ errorMsg::reportError("betaDistributionFixedCategories::setGammaParameters: the number of categories cannot be changed, first call setFixedCategories");
+ if ((in_alpha == _alpha) && (in_beta == _beta))
+ return;
+
+ if (in_alpha < MINIMUM_ALPHA_PARAM)
+ in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflow problems
+ if (in_beta < MINIMUM_ALPHA_PARAM)
+ in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
+
+ _alpha = in_alpha;
+ _beta = in_beta;
+ computeRatesProbs();
+}
+
+void betaDistributionFixedCategories::computeRatesProbs(){
+ MDOUBLE totalProb = 0.0;
+ MDOUBLE catProb = 0.0;
+ MDOUBLE lowerBoundaryProb = 0.0;
+ MDOUBLE upperBoundaryProb = 0.0;
+ int cat;
+ _ratesProb.clear();
+ _ratesProb.resize(categories());
+ for (cat = 0; cat < categories()-1; ++cat) {
+ upperBoundaryProb = getCumulativeProb(_boundary[cat+1]);
+ catProb = upperBoundaryProb - lowerBoundaryProb;
+ _ratesProb[cat] = catProb;
+ totalProb += catProb;
+ lowerBoundaryProb = upperBoundaryProb;
+ }
+ _ratesProb[cat] = 1.0 - totalProb;
+}
+
+void betaDistributionFixedCategories::setDefaultBoundaries(int catNum)
+{
+ _boundary.clear();
+ _boundary.resize(catNum+1,0.0);
+ _boundary[0] = 0;
+ _boundary[catNum] = 1.0;
+ switch (catNum)
+ {
+ case 1:
+ break;
+ case 2:
+ _boundary[1] = 0.5;
+ break;
+ case 10:
+ _boundary[1] = 0.1;
+ _boundary[2] = 0.2;
+ _boundary[3] = 0.3;
+ _boundary[4] = 0.4;
+ _boundary[5] = 0.5;
+ _boundary[6] = 0.6;
+ _boundary[7] = 0.7;
+ _boundary[8] = 0.8;
+ _boundary[9] = 0.9;
+ break;
+ default:
+ errorMsg::reportError("error in betaDistributionFixedCategories::setDefaultBoundaries");
+ }
+
+ setFixedCategories();
+}
diff --git a/libs/phylogeny/betaDistributionFixedCategories.h b/libs/phylogeny/betaDistributionFixedCategories.h
new file mode 100644
index 0000000..845ec53
--- /dev/null
+++ b/libs/phylogeny/betaDistributionFixedCategories.h
@@ -0,0 +1,37 @@
+#ifndef ___BETA_FIXED_CATEGORIES_CATEGORIES
+#define ___BETA_FIXED_CATEGORIES_CATEGORIES
+/************************************************************
+This class differ from the regular betaDistribution in that
+the rateCategories are fixed according to the user's decision.
+Thus, only the probability of each category change for each specific alpha and beta values but
+the rate categories themselves are constant.
+************************************************************/
+#include "definitions.h"
+#include "betaDistribution.h"
+#include "errorMsg.h"
+class betaDistributionFixedCategories : public betaDistribution {
+
+public:
+ explicit betaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta);
+ explicit betaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta);
+ explicit betaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum);
+ explicit betaDistributionFixedCategories(const betaDistributionFixedCategories& other);
+ explicit betaDistributionFixedCategories();
+ virtual ~betaDistributionFixedCategories() {}
+ virtual distribution* clone() const { return new betaDistributionFixedCategories(*this); }
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+ virtual void setFixedCategories(const Vdouble& fixedBoundaries);
+
+protected:
+ virtual void setDefaultBoundaries(int catNum);
+ virtual void setFixedCategories();
+ virtual void fill_mean();
+ virtual void computeRatesProbs();
+
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.cpp b/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.cpp
new file mode 100644
index 0000000..00c04e0
--- /dev/null
+++ b/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.cpp
@@ -0,0 +1,52 @@
+#include "betaDistributionFixedCategoriesWithOmegaUniform.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+#include "matrixUtils.h"
+
+
+betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(const betaDistributionFixedCategoriesOmegaUniform& other)
+: _betaDistr(other._betaDistr),_omegaDistr(other._omegaDistr){
+
+}
+
+betaDistributionFixedCategoriesOmegaUniform::betaDistributionFixedCategoriesOmegaUniform(int betaDistrCatNum,MDOUBLE alpha,MDOUBLE beta,
+ int omegaCatNum,MDOUBLE omegaLowerBound,MDOUBLE omegaUpperBound)
+{
+ _betaDistr.setBetaParameters(betaDistrCatNum,alpha,beta);
+ _omegaDistr.setGlobalRate(1.0);
+ _omegaDistr.setUniformParameters(omegaCatNum,omegaLowerBound,omegaUpperBound);
+
+}
+
+void betaDistributionFixedCategoriesOmegaUniform::setBetaParameters(int in_number_of_categories, MDOUBLE alpha, MDOUBLE beta)
+{
+ _betaDistr.setBetaParameters(in_number_of_categories,alpha,beta);
+}
+
+
+
+void betaDistributionFixedCategoriesOmegaUniform::change_number_of_categories(int in_number_of_categories)
+{
+ _betaDistr.change_number_of_categories(in_number_of_categories);
+}
+
+
+const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::ratesProb(const int i_rate) const {
+ int noBetaDistCat = _betaDistr.categories();
+ if (i_rate < _betaDistr.categories())
+ return _betaDistr.ratesProb(i_rate);
+ else return _omegaDistr.ratesProb(i_rate - noBetaDistCat); //omega prob
+}
+
+
+const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::rates(const int i) const {
+ int noBetaDistCat = _betaDistr.categories();
+ if (i < noBetaDistCat)
+ return _betaDistr.rates(i);
+ else return _omegaDistr.rates(i - noBetaDistCat); //omega
+
+}
+
+const MDOUBLE betaDistributionFixedCategoriesOmegaUniform::getCumulativeProb(const MDOUBLE x) const {
+ return _betaDistr.getCumulativeProb(x);
+}
\ No newline at end of file
diff --git a/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.h b/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.h
new file mode 100644
index 0000000..be0208b
--- /dev/null
+++ b/libs/phylogeny/betaDistributionFixedCategoriesWithOmegaUniform.h
@@ -0,0 +1,53 @@
+#ifndef ___BETA_DISTR_FIXED_CATEGORIES_OMEGA_UNIFORM
+#define ___BETA_DISTR_FIXED_CATEGORIES_OMEGA_UNIFORM
+/************************************************************
+This class differ from the regular betaOmegaDistribution in that
+the rateCategories are fixed according to the user's decision.
+Thus, only the probability of each category changes for each specific alpha value but
+the rate categories themselves are constant.
+************************************************************/
+#include "definitions.h"
+#include "betaDistributionFixedCategories.h"
+#include "uniformDistribution.h"
+#include "errorMsg.h"
+
+
+class betaDistributionFixedCategoriesOmegaUniform : public distribution {
+public:
+
+ explicit betaDistributionFixedCategoriesOmegaUniform(const betaDistributionFixedCategoriesOmegaUniform& other);
+ explicit betaDistributionFixedCategoriesOmegaUniform(int betaDistrCatNum,MDOUBLE alpha,MDOUBLE beta,
+ int omegaCatNum =10,MDOUBLE omegaLowerBound = 1,MDOUBLE omegaUpperBound = 11);
+ explicit betaDistributionFixedCategoriesOmegaUniform() {};
+ virtual ~betaDistributionFixedCategoriesOmegaUniform() {};
+ virtual distribution* clone() const { return new betaDistributionFixedCategoriesOmegaUniform(*this); }
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+
+ virtual const int categories() const {return _betaDistr.categories()+ _omegaDistr.categories();}
+ virtual const int betaCategories()const {return _betaDistr.categories();};
+ virtual const MDOUBLE rates(const int i) const;
+ virtual const MDOUBLE ratesProb(const int i_rate) const;
+ virtual void setGlobalRate(const MDOUBLE x) {_betaDistr.setGlobalRate(x);}
+ virtual MDOUBLE getGlobalRate()const {return _betaDistr.getGlobalRate();}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual void setAlpha(MDOUBLE newAlpha){ _betaDistr.setAlpha(newAlpha);}
+ virtual MDOUBLE getAlpha() const {return _betaDistr.getAlpha();};
+ virtual void setBeta(MDOUBLE newBeta){_betaDistr.setBeta(newBeta);}
+ virtual MDOUBLE getBeta() const {return _betaDistr.getBeta();};
+ virtual MDOUBLE getBorder(const int i) const {return _betaDistr.getBorder(i);} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
+ //virtual MDOUBLE getOmegai() const ;
+ //virtual MDOUBLE getBetaProbi() const ;
+ //virtual void setOmegai(MDOUBLE omega);
+ //virtual void setBetaProbi(MDOUBLE betaProb);
+
+
+private:
+ betaDistributionFixedCategories _betaDistr; //10 fixed cat 0.05, 0.15, 0.25 ...,0.95
+ uniformDistribution _omegaDistr; // w ~ U(1,11) with 10 cat
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/betaOmegaDistribution.cpp b/libs/phylogeny/betaOmegaDistribution.cpp
new file mode 100644
index 0000000..6f08abb
--- /dev/null
+++ b/libs/phylogeny/betaOmegaDistribution.cpp
@@ -0,0 +1,61 @@
+// $Id: betaOmegaDistribution.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "betaOmegaDistribution.h"
+#include "gammaUtilities.h"
+#include "betaUtilities.h"
+#include "errorMsg.h"
+#include "logFile.h"
+#include <cmath>
+
+
+betaOmegaDistribution::betaOmegaDistribution()
+{
+ _omega=1;
+ _betaProb = 0.5;
+}
+
+// note that the order of initalization makes a diffrence.
+betaOmegaDistribution::betaOmegaDistribution(const betaOmegaDistribution& other) :
+ _betaDistr(other._betaDistr),
+ _omega(other._omega),
+ _betaProb(other._betaProb){
+}
+
+betaOmegaDistribution::betaOmegaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega) :distribution(){
+ _omega = omega;
+ _betaProb = betaProb;
+ _betaDistr.setGlobalRate(1.0);
+ _betaDistr.setBetaParameters(in_number_of_categories,alpha,beta);
+}
+
+betaOmegaDistribution::~betaOmegaDistribution() {}
+
+
+void betaOmegaDistribution::setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega){
+ _omega = omega;
+ _betaProb = betaProb;
+ _betaDistr.setBetaParameters(in_number_of_categories, alpha, beta);
+
+}
+const MDOUBLE betaOmegaDistribution::ratesProb(const int i) const {
+ if (i < _betaDistr.categories())
+ return _betaDistr.ratesProb(i)*_betaProb;
+ else return (1-_betaProb); //omega prob
+}
+
+
+const MDOUBLE betaOmegaDistribution::rates(const int i) const {
+ if (i < _betaDistr.categories())
+ return _betaDistr.rates(i);
+ else return _omega; //omega
+}
+
+
+
+const MDOUBLE betaOmegaDistribution::getCumulativeProb(const MDOUBLE x) const
+{ return _betaDistr.getCumulativeProb(x);
+}
+
+
+
+
diff --git a/libs/phylogeny/betaOmegaDistribution.h b/libs/phylogeny/betaOmegaDistribution.h
new file mode 100644
index 0000000..70e70f5
--- /dev/null
+++ b/libs/phylogeny/betaOmegaDistribution.h
@@ -0,0 +1,56 @@
+// $Id: betaOmegaDistribution.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___BETA_OMEGA_DIST
+#define ___BETA_OMEGA_DIST
+/************************************************************
+This distribution can take several forms depending on its free parameters alpha,beta
+For an extensive exlpanation of this distribution
+see http://mathworld.wolfram.com/BetaDistribution.html
+************************************************************/
+#include "definitions.h"
+#include "distribution.h"
+#include "betaDistribution.h"
+
+#include "logFile.h"
+
+using namespace std;
+
+
+class betaOmegaDistribution : public distribution {
+
+public:
+ explicit betaOmegaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories,MDOUBLE betaProb,MDOUBLE omega);
+ explicit betaOmegaDistribution(const betaOmegaDistribution& other);
+ explicit betaOmegaDistribution();
+ virtual ~betaOmegaDistribution();
+ virtual void setBetaOmegaParameters(int in_number_of_categories,MDOUBLE alpha, MDOUBLE beta,MDOUBLE betaProb,MDOUBLE omega);
+ virtual void setBetaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta){_betaDistr.setBetaParameters(numOfCategories,alpha,beta);}
+
+ virtual const int categories() const {return _betaDistr.categories()+1;}
+ virtual const MDOUBLE rates(const int i) const;
+ virtual const MDOUBLE ratesProb(const int i) const;
+ virtual distribution* clone() const { return new betaOmegaDistribution(*this); }
+ virtual void setGlobalRate(const MDOUBLE x) {_betaDistr.setGlobalRate(x);}
+ virtual MDOUBLE getGlobalRate()const {return _betaDistr.getGlobalRate();}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual void setAlpha(MDOUBLE newAlpha){ _betaDistr.setAlpha(newAlpha);}
+ virtual MDOUBLE getAlpha() const {return _betaDistr.getAlpha();};
+ virtual void setBeta(MDOUBLE newBeta){_betaDistr.setBeta(newBeta);}
+ virtual MDOUBLE getBeta() const {return _betaDistr.getBeta();};
+ virtual void change_number_of_categories(int in_number_of_categories){_betaDistr.change_number_of_categories(in_number_of_categories);}
+ virtual MDOUBLE getBorder(const int i) const {return _betaDistr.getBorder(i);} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
+ virtual MDOUBLE getOmega() const {return _omega;}
+ virtual MDOUBLE getBetaProb() const {return _betaProb;};
+ virtual void setOmega(MDOUBLE omega) { _omega = omega;};
+ virtual void setBetaProb(MDOUBLE betaProb) { _betaProb = betaProb;};
+
+private:
+ betaDistribution _betaDistr;
+ MDOUBLE _omega;
+ MDOUBLE _betaProb;
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/betaUtilities.cpp b/libs/phylogeny/betaUtilities.cpp
new file mode 100644
index 0000000..b9ad590
--- /dev/null
+++ b/libs/phylogeny/betaUtilities.cpp
@@ -0,0 +1,174 @@
+// $Id: betaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "definitions.h"
+#include "betaUtilities.h"
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include "errorMsg.h"
+#include <cmath>
+
+/******************************
+ Computes the inverse of the beta CDF: given a prob. value, calculates the x for which
+ the integral over 0 to x of beta CDF = prob.
+ Adapted from:
+ 1. Majumder and Bhattacharjee (1973) App. Stat. 22(3) 411-414
+ and the corrections:
+ 2. Cran et al. (1977) App. Stat. 26(1) 111-114
+ 3. Berry et al. (1990) App. Stat. 39(2) 309-310
+ and another adaptation made in the code of Yang (tools.c)
+****************************/
+MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob){
+ if(a<0 || b<0 || prob<0 || prob>1) {
+ errorMsg::reportError("error in inverseCDFBeta,illegal parameter");
+ }
+ if (prob == 0 || prob == 1)
+ return prob;
+
+ int maxIter=100;
+ MDOUBLE epsilonLow=1e-300;
+ MDOUBLE fpu=3e-308;
+
+ /****** changing the tail direction (prob=1-prob)*/
+ bool tail=false;
+ MDOUBLE probA=prob;
+ if (prob > 0.5) {
+ prob = 1.0 - prob;
+ tail = true;
+ MDOUBLE tmp=a;
+ a=b;
+ b=tmp;
+ }
+ MDOUBLE lnBetaVal=betaln(a,b);
+ MDOUBLE x;
+
+ /****** calculating chi square evaluator */
+ MDOUBLE r = sqrt(-log(prob * prob));
+ MDOUBLE y = r - (2.30753+0.27061*r)/(1.+ (0.99229+0.04481*r) * r);
+
+ MDOUBLE chiSquare = 1.0/(9.0 * b);
+ chiSquare = b*2 * pow(1.0 - chiSquare + y * sqrt(chiSquare), 3.0);
+// MDOUBLE chiSquare2=gammq(b,prob/2.0); //chi square valued of prob with 2q df
+ MDOUBLE T=(4.0*a+2.0*b-2)/chiSquare;
+
+
+ /****** initializing x0 */
+ if (a > 1.0 && b > 1.0) {
+ r = (y * y - 3.) / 6.;
+ MDOUBLE s = 1. / (a*2. - 1.);
+ MDOUBLE t = 1. / (b*2. - 1.);
+ MDOUBLE h = 2. / (s + t);
+ MDOUBLE w = y * sqrt(h + r) / h - (t - s) * (r + 5./6. - 2./(3.*h));
+ x = a / (a + b * exp(w + w));
+ }
+ else {
+ if (chiSquare<0){
+ x=exp((log(b*(1-prob))+lnBetaVal)/b);
+ }
+ else if (T<1){
+ x=exp((log(prob*a)+lnBetaVal)/a);
+ }
+ else {
+ x=(T-1.0)/(T+1.0);
+ }
+ }
+
+ if(x<=fpu || x>=1-2.22e-16) x=(prob+0.5)/2; // 0<x<1 but to avoid underflow a little smaller
+
+ /****** iterating with a modified version of newton-raphson */
+ MDOUBLE adj, newX=x, prev=0;
+ MDOUBLE yprev = 0.;
+ adj = 1.;
+
+ MDOUBLE eps = pow(10., -13. - 2.5/(probA * probA) - 0.5/(probA *probA));
+ eps = (eps>epsilonLow?eps:epsilonLow);
+
+ for (int i=0; i<maxIter; i++) {
+ y = incompleteBeta(a,b,x);
+ y = (y - prob) *
+ exp(lnBetaVal + (1.0-a) * log(x) + (1.0-b) * log(1.0 - x)); //the classical newton-raphson formula
+ if (y * yprev <= 0)
+ prev = (fabs(adj)>fpu?fabs(adj):fpu);
+ MDOUBLE g = 1;
+ for (int j=0; j<maxIter; j++) {
+ adj = g * y;
+ if (fabs(adj) < prev) {
+ newX = x - adj; // new x
+ if (newX >= 0. && newX <= 1.) {
+ if (prev <= eps || fabs(y) <= eps) return(tail?1.0-x:x);;
+ if (newX != 0. && newX != 1.0) break;
+ }
+ }
+ g /= 3.;
+ }
+ if (fabs(newX-x)<fpu)
+ return (tail?1.0-x:x);;
+ x = newX;
+ yprev = y;
+ }
+ return (tail?1.0-x:x);
+}
+
+
+/******************************
+ Computes the average r value in percentile k whose boundaries are leftBound and rightBound
+****************************/
+MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k){
+ MDOUBLE tmp;
+ tmp= incompleteBeta(alpha+1,beta,rightBound) - incompleteBeta(alpha+1,beta,leftBound);
+ tmp= (tmp*alpha/(alpha+beta))*k;
+ return tmp;
+}
+/******************************
+ Computes the integral from 0 to x over the beta CDF:
+ (1/Beta(alpha,beta))x^(alpha-1)*(1-x)^(beta-1) where
+ Beta(a,b)=Gamma(a)*Gamma(b)/Gamma(a+b)
+****************************/
+MDOUBLE incompleteBeta(MDOUBLE alpha, MDOUBLE beta, MDOUBLE x){
+ MDOUBLE tmp;
+ if (x<0 || x>1) {
+ LOG(5,<<"Error in function incompleteBeta : invalid x = "<<x<<" alpha = "<<alpha<<" beta= "<<beta<<endl);
+ errorMsg::reportError("Error in function incompleteBeta : invalid x");
+ }
+ if (x==0 || x==1) tmp=0.0;
+ else tmp=exp(alpha*log(x)+beta*log(1-x)-betaln(alpha,beta));
+
+ if (x<((alpha+1)/(alpha+beta+2))) return tmp*betacf(alpha,beta,x)/alpha;
+ return 1-tmp*betacf(beta,alpha,1-x)/beta;
+}
+MDOUBLE betacf(MDOUBLE a, MDOUBLE b, MDOUBLE x){
+ int m, m2;
+ MDOUBLE aa,c,d,del,h,qab,qam,qap;
+ qab = a+b;
+ qap = a+1;
+ qam = a-1;
+ c=1;
+ d=1-qab*x/qap;
+ if (fabs(d)<FPMIN) d=FPMIN;
+ d=1.0/d;
+ h=d;
+ for(m=1;m<=ITMAX;m++){
+ m2=2*m;
+ aa=m*(b-m)*x/((qam+m2)*(a+m2));
+ d = 1.0+aa*d;
+ if (fabs(d)<FPMIN) d = FPMIN;
+ c=1.0 + aa/c;
+ if (fabs(c)<FPMIN) c = FPMIN;
+ d = 1.0/d;
+ h *= d*c;
+ aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2));
+ d = 1.0+aa*d;
+ if (fabs(d)<FPMIN) d = FPMIN;
+ c = 1.0 + aa/c;
+ if (fabs(c)<FPMIN) c = FPMIN;
+ d = 1.0/d;
+ del = d*c;
+ h*=del;
+ if (fabs(del-1.0) <= EPS) break;
+ }
+ if (m > ITMAX) LOG(5,<<"Error in function betacf : alpha || beta big ||MAXIT small"<<endl);
+ return h;
+}
+
+MDOUBLE betaln(MDOUBLE alpha, MDOUBLE beta){
+ return gammln(alpha)+gammln(beta)-gammln(alpha+beta);
+}
+
diff --git a/libs/phylogeny/betaUtilities.h b/libs/phylogeny/betaUtilities.h
new file mode 100644
index 0000000..95f1658
--- /dev/null
+++ b/libs/phylogeny/betaUtilities.h
@@ -0,0 +1,21 @@
+// $Id: betaUtilities.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___BETA_UTILITIES
+#define ___BETA_UTILITIES
+
+#include "definitions.h"
+#include "numRec.h"
+
+/******************************************************************************
+beta utilities include calculating inverse of the beta cdf and calculation of mean values
+used mainly in building the gamma function and creating categories within it
+******************************************************************************/
+
+MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob);
+MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k);
+MDOUBLE incompleteBeta(MDOUBLE alpha, MDOUBLE beta, MDOUBLE x);
+MDOUBLE betacf(MDOUBLE a, MDOUBLE b, MDOUBLE x);
+MDOUBLE betaln(MDOUBLE alpha, MDOUBLE beta);
+
+
+
+#endif
diff --git a/libs/phylogeny/bootstrap.cpp b/libs/phylogeny/bootstrap.cpp
new file mode 100644
index 0000000..ff764e4
--- /dev/null
+++ b/libs/phylogeny/bootstrap.cpp
@@ -0,0 +1,227 @@
+// $Id: bootstrap.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "someUtil.h"
+#include "bootstrap.h"
+#include "splitTreeUtil.h"
+#include <algorithm>
+#include <set>
+using namespace std;
+
+// -----------------------------------------------------------------------------------------
+// ----------------------------- The constructor and its related functions -----------------
+// -----------------------------------------------------------------------------------------
+
+bootstrap::bootstrap(const treeVec& treevect):_numTrees(0), _nTaxa(0){
+ fillFromTreeVec(treevect);
+}
+bootstrap::bootstrap (const string& filename):_numTrees(0), _nTaxa(0){
+ fillFromTreeVec(getStartingTreeVecFromFile(filename));
+}
+
+void bootstrap::fillFromTreeVec(const treeVec& treevect) {
+// for each tree, we compute the set of all splits.
+// we update for each split in each tree the split-map.
+// so we have the frequency of each split.
+ for (treeVec::const_iterator i=treevect.begin();i!=treevect.end();++i)
+ splitTree(*i);
+}
+
+// takes a tree, computes all splits and
+// enter them into the Splits map
+void bootstrap::splitTree(const tree& T){
+ _numTrees++;
+ updateNtaxaAndNameMapAndValidateConsistency(T);
+ splitSubTreeRecursivly(T.getRoot(), true); // the true because we call the recursion with the root. Otherwise it is false;
+}
+
+void bootstrap::updateNtaxaAndNameMapAndValidateConsistency(const tree& T) {
+ if (!_nTaxa) { // only for the first tree, this part intializes the _nameMap and the _nTaxa
+ _sequenceNames = getSequencesNames(T);
+ for (_nTaxa=0;_nTaxa<_sequenceNames.size();++_nTaxa) {
+ _nameMap[_sequenceNames[_nTaxa]] =_nTaxa;
+ }
+ }
+ else {
+ vector<string> namesInT1 = getSequencesNames(T);
+ if (namesInT1.size() < _nameMap.size()) {
+ string errMs1 = "Not all trees have the same number of sequences. ";
+ errMs1 += "tree number 1 has: ";
+ errMs1 += int2string(_nameMap.size());
+ errMs1 += " while tree number: ";
+ errMs1 += int2string(_numTrees);
+ errMs1 += " has ";
+ errMs1 += int2string(namesInT1.size());
+ errMs1 += "\nError in function bootstrap::splitTree";
+ errorMsg::reportError(errMs1);
+ }
+ for (int i=0; i < namesInT1.size(); ++i) {
+ if (_nameMap.count(namesInT1[i])==0) {
+ string errMs = "The taxa ";
+ errMs += namesInT1[i];
+ errMs += " found in tree number ";
+ errMs += int2string(_numTrees);
+ errMs += " is not present in the first tree. Error in function bootstrap::splitTree";
+ errorMsg::reportError(errMs);
+ }
+ }
+ }
+}
+
+set<int> bootstrap::splitSubTreeRecursivly(const tree::nodeP &n,
+ const bool isRoot) {//false
+// this function assumes that the root of the tree is not a leaf
+ set<int> s; // the id of all leaves of the subtree of the nodeP n.
+ for(int i=0; i<n->getNumberOfSons() ;++i) {
+ set<int> sonSet(splitSubTreeRecursivly(n->getSon(i)));
+ set<int>::iterator it = sonSet.begin();
+ for (; it != sonSet.end(); ++it) s.insert(*it);
+ }
+ if(isRoot) return s;
+ if (n->isLeaf()) {
+ s.insert(idFromName(n->name()));
+ } else { // this avoids keeping track of trivial splits.
+ set<int>::const_iterator sBeg(s.begin());
+ set<int>::const_iterator sEnd(s.end());
+ split sp(sBeg,sEnd,_nTaxa);
+ _Splits.add(sp);
+ }
+ return(s);
+}
+
+// -----------------------------------------------------------------------------------------
+// ----------------------------- getWeightsForTree -----------------------------------------
+// -----------------------------------------------------------------------------------------
+
+map<int, MDOUBLE> bootstrap::getWeightsForTree(const tree& inTree) const {
+ map<int, MDOUBLE> v;
+ recursivelyBuiltBPMap(inTree.getRoot(), v);
+ return (v);
+}
+
+// the function returns the ids of the leaves in the subtree defined by rootOfSubtree.
+set<int> bootstrap::recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map<int, MDOUBLE> &v) const {
+ set<int> s;
+ for(int i=0;i<rootOfSubtree->getNumberOfSons();++i) {
+ set<int> sonSet(recursivelyBuiltBPMap(rootOfSubtree->getSon(i),v));
+ set<int>::iterator it = sonSet.begin();
+ for (; it != sonSet.end(); ++it) s.insert(*it);
+ }
+ if (rootOfSubtree->isLeaf()) {
+ s.insert(idFromName(rootOfSubtree->name()));
+ }
+ set<int>::const_iterator sBeg(s.begin());
+ set<int>::const_iterator sEnd(s.end());
+ split sp(sBeg,sEnd,_nTaxa);
+ v[rootOfSubtree->id()]=(static_cast<MDOUBLE>(_Splits.counts(sp)))/_numTrees;
+ return(s);
+}
+
+// We get different trees, and the id's are not consistent among different trees.
+// here, we map a name to a single id.
+int bootstrap::idFromName(const string & name) const {
+ NameMap_t::const_iterator i(_nameMap.find(name));
+ if (i==_nameMap.end()) {
+ string s="Can not find an Id for the taxa name:";
+ s+=name;
+ s+="\n error in function bootstrap::idFromName\n";
+ errorMsg::reportError(s);
+ }
+ return(i->second);
+}
+
+// -----------------------------------------------------------------------------------------
+// ----------------------------- Printing the bp ------------------------------------------
+// -----------------------------------------------------------------------------------------
+
+void bootstrap::print(ostream& sout){// = cout
+ _Splits.print(sout);
+}
+
+void bootstrap::printTreeWithBPvalues(ostream &out, const tree &t, const map<int, MDOUBLE> & v, const bool printBranchLenght) const{
+ recursivlyPrintTreeWithBPvalues(out,t.getRoot(),v, printBranchLenght);
+ out<<";";
+}
+
+void bootstrap::recursivlyPrintTreeWithBPvalues(ostream &out,
+ const tree::nodeP &myNode,
+ const map<int, MDOUBLE> &v,
+ const bool printBranchLenght) const {
+ if (myNode->isLeaf()) {
+ out << myNode->name();
+ if (printBranchLenght) out << ":"<<myNode->dis2father();
+ return;
+ } else {
+ out <<"(";
+ for (int i=0;i<myNode->getNumberOfSons();++i) {
+ if (i>0) out <<",";
+ recursivlyPrintTreeWithBPvalues(out, myNode->getSon(i),v, printBranchLenght);
+ }
+ out <<")";
+ if (myNode->isRoot()==false) {
+ if (printBranchLenght) out<<":"<<myNode->dis2father();
+ map<int,MDOUBLE>::const_iterator val=v.find(myNode->id());
+ if ((val!=v.end()) && val->second>0.0) {
+ out << "["<<val->second<<"]";
+ }
+ }
+ }
+}
+
+// for DEBUGGING ONLY:
+void bootstrap::print_names(ostream &out) const {
+ NameMap_t::const_iterator i(_nameMap.begin());
+ for (;i!=_nameMap.end();++i)
+ out << "{"<<i->first<<" = "<<i->second<<"}"<<endl;
+}
+
+// -----------------------------------------------------------------------------------------
+// ----------------------------- Building consensus tree ----------------------------------
+// -----------------------------------------------------------------------------------------
+// returns the bp values of the consensus tree.
+// the idea is to start from the split map, extract a split at a time.
+// first, the splits with the highest bp (i.e., in a sorted way).
+// Each splits is checked for compatibility with the consensus tree constructed so far.
+// if it is compatible, it is added to the consensus.
+// Otherwise - it is discarded.
+// returns the consensus tree
+tree bootstrap::consensusTree(const MDOUBLE threshold) const {// =0.5
+// 1. get the names of the sequences
+ vector<string> names;
+ for (NameMap_t::const_iterator i(_nameMap.begin());i!=_nameMap.end();++i)
+ names.push_back(i->first);
+
+// 2. create a star tree
+ tree res = starTree(names);
+
+// 3. get the sorted vector of the splits from which the consensus is to be built.
+ vector<pair<split,int> > sortedSplits = _Splits.sortSplits();
+// 4. get a list of compatible splits
+ MDOUBLE thresholdForNumTrees = threshold * _numTrees;
+
+ vector<split> consensus;
+ for (int k=0; k < sortedSplits.size(); ++k) {
+ bool compatible = true;
+ if (sortedSplits[k].second < thresholdForNumTrees) break;
+
+ for (vector<split>::const_iterator j=consensus.begin(); j != consensus.end(); ++j) {
+ if (!(sortedSplits[k].first.compatible(*j))) {
+ compatible=false;
+ break;
+ }
+ }
+ if (compatible) {
+ consensus.push_back(sortedSplits[k].first);
+ }
+ }
+
+// 5. Now we build a tree from all the compatible splits
+
+ for (vector<split>::iterator i1 = consensus.begin();i1!=consensus.end();++i1) {
+ applySplit(res,*i1,_nameMap);
+ }
+ res.create_names_to_internal_nodes();
+ res.makeSureAllBranchesArePositive();
+
+ return (res);
+}
diff --git a/libs/phylogeny/bootstrap.h b/libs/phylogeny/bootstrap.h
new file mode 100644
index 0000000..cfa6440
--- /dev/null
+++ b/libs/phylogeny/bootstrap.h
@@ -0,0 +1,82 @@
+// $Id: bootstrap.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___BOOTSTRAP
+#define ___BOOTSTRAP
+
+#include "definitions.h"
+#include "split.h"
+#include "splitMap.h"
+#include "tree.h"
+#include "treeUtil.h"
+#include <sstream>
+using namespace std;
+
+// this class gets as input many trees and can answer questions such as
+// 1. the bootstrap value (bp) of a tree
+// 2. the bp of a split
+// 3. can reconstruct a multifurcating consensus trees.
+// We note that 3 can always be done if done only on those splits with bp > 50%
+// In this case there is only one tree.
+// If the treshold value is <= 50% there might be more than one tree for which
+// all splits on this tree have bp>= treshold.
+// In this case we want to give the tree with the highest sum of bp.
+// This is probably NP hard, and we use a greedy search to chose
+// this tree.
+
+class bootstrap {
+public:
+ typedef vector<tree> treeVec;
+ explicit bootstrap(const treeVec& treevect); // constructor
+
+ // this construction is the same as above, but it reads the trees from
+ // an input file.
+ explicit bootstrap (const string& filename);
+
+ // give a tree and return a map from each edge to a bp value.
+ // edge 5 is the edge between node id 5 and its father.
+ map<int, MDOUBLE> getWeightsForTree(const tree& inTree) const;
+
+
+ // give a threshold >= 0.5 and get a concensus tree with all splits
+ // that are more confident then the threshold.
+ tree consensusTree(const MDOUBLE threshold = 0.5) const;
+
+ void print(ostream& sout = cout);
+ void printTreeWithBPvalues(ostream &os, const tree &t, const map<int, MDOUBLE> & v, const bool printBranchLenght=true) const;
+
+ void print_names(ostream &os) const;
+
+
+private:
+
+
+
+
+ void fillFromTreeVec(const treeVec& treevect);
+ int idFromName (const string & name) const;
+
+
+ set<int> recursivelyBuiltBPMap(const tree::nodeP &rootOfSubtree, map<int, MDOUBLE> &v) const;
+ set<int> splitSubTreeRecursivly(const tree::nodeP &n, const bool isRoot=false); // this function assumes that the tree is rooted not in a leaf
+ // take tree, compute all splits and enter them into the Splits map
+ void splitTree(const tree& T);
+ void recursivlyPrintTreeWithBPvalues(ostream &os,
+ const tree::nodeP &nP,
+ const map<int, MDOUBLE> &v,
+ const bool printBranchLenght) const;
+ void getTreeNodes(const tree& t) const ; // note that _allTree_nodes is mutable
+ void updateNtaxaAndNameMapAndValidateConsistency(const tree& T);
+
+ int _numTrees; // total number of trees
+ splitMap _Splits;
+ typedef map<string,int> NameMap_t;
+ NameMap_t _nameMap; // this is a map from the names of the sequences to integers.
+ int _nTaxa;
+ mutable vector<int> _id2TreeId, _treeId2Id;
+ vector<string> _sequenceNames; // the names of the sequences.
+};
+
+
+
+#endif // ___BOOTSTRAP
+
diff --git a/libs/phylogeny/chebyshevAccelerator.cpp b/libs/phylogeny/chebyshevAccelerator.cpp
new file mode 100644
index 0000000..30983eb
--- /dev/null
+++ b/libs/phylogeny/chebyshevAccelerator.cpp
@@ -0,0 +1,212 @@
+// $Id: chebyshevAccelerator.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "chebyshevAccelerator.h"
+#include <cmath>
+#include <cassert>
+
+chebyshevAccelerator::chebyshevAccelerator(const chebyshevAccelerator& other):
+ _alphabetSize(other._alphabetSize),
+ _totalNumOfCoef(other._totalNumOfCoef),
+ _usingNumberOfCoef(other._usingNumberOfCoef),
+ _pb(NULL),
+ _rightRange(other._rightRange),
+ _leftRange(other._leftRange){
+ if (other._pb != NULL) _pb = other._pb->clone();
+ chebi_coff=other.chebi_coff;
+ chebi_dervation_coff=other.chebi_dervation_coff;
+ chebi_sec_dervation_coff=other.chebi_sec_dervation_coff;
+}
+
+chebyshevAccelerator::chebyshevAccelerator(
+ replacementModel* pb,
+ const int alphanetSize,
+ const int totalNumOfCoef,
+ const int usingNumberOfCoef,
+ const MDOUBLE rightRange,
+ const MDOUBLE leftRange
+ ): _alphabetSize(alphanetSize),
+ _totalNumOfCoef(totalNumOfCoef), _usingNumberOfCoef(usingNumberOfCoef),_pb(pb->clone()), _rightRange(rightRange), _leftRange(leftRange)
+//----------------------------------------------------------------------------------
+//input: non
+//output: non
+//doing: filling the member chebi_coff[][][]; chebi_coff[1][2][4] is the forth
+// chebichev coefficient in the chebichev polynom of the function
+// slow_pij(1,2,t);
+//----------------------------------------------------------------------------------
+{
+ int tmp, tmp1;
+ for (tmp = 0; tmp < _alphabetSize ; tmp ++) {
+
+ chebi_coff.resize(_alphabetSize);
+ chebi_dervation_coff.resize(_alphabetSize);
+ chebi_sec_dervation_coff.resize(_alphabetSize);
+
+ for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) {
+ chebi_coff[tmp].resize(_alphabetSize);
+ chebi_dervation_coff[tmp].resize(_alphabetSize);
+ chebi_sec_dervation_coff[tmp].resize(_alphabetSize);
+ for (tmp1 = 0; tmp1 < _alphabetSize ; tmp1 ++) {
+ chebi_coff[tmp][tmp1].resize(_totalNumOfCoef);
+ chebi_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef);
+ chebi_sec_dervation_coff[tmp][tmp1].resize(_totalNumOfCoef);
+ }
+ }
+ }
+
+
+ Vdouble coffij(_totalNumOfCoef);
+ Vdouble coffij_of_derviation(_totalNumOfCoef);
+ Vdouble coffij_of_second_derivation(_totalNumOfCoef);
+
+
+ for (int from_aa =0; from_aa<_alphabetSize ; ++ from_aa)
+ {
+ for (int to_aa =0; to_aa<_alphabetSize ; ++ to_aa)
+ {
+ chebft(coffij,_totalNumOfCoef,from_aa,to_aa);
+ chder(coffij,coffij_of_derviation,_totalNumOfCoef);
+ chder(coffij_of_derviation,coffij_of_second_derivation,_totalNumOfCoef);
+
+ for (int tmp=0; tmp<_totalNumOfCoef;++tmp)
+ {
+ chebi_coff[from_aa][to_aa][tmp] = coffij[tmp];
+ chebi_dervation_coff[from_aa][to_aa][tmp] = coffij_of_derviation[tmp];
+ chebi_sec_dervation_coff[from_aa][to_aa][tmp] = coffij_of_second_derivation[tmp];
+ }
+
+ }
+ }
+}
+
+
+void chebyshevAccelerator::chebft(Vdouble& c, int n, int from_aa, int to_aa) {
+//----------------------------------------------------------------------------------
+//input: c[] is the vector where the cofficient will be
+// from aa and to_aa are for chosing the right function to be developed
+//output: non
+//doing: calculating the chebichev coefficient in the chebichev polynom of the function
+// slow_pij(from_aa,to_aa,t), and put them in the c[] vector
+//----------------------------------------------------------------------------------
+ int k,j;
+ MDOUBLE fac,bpa,bma;
+
+ Vdouble f;
+ f.resize(n);
+ bma=0.5*(_rightRange-_leftRange);
+ bpa=0.5*(_rightRange+_leftRange);
+ for (k=0;k<n;k++) {
+ MDOUBLE y=cos(3.141592653589793*(k+0.5)/n);
+ f[k]= _pb->Pij_t(from_aa,to_aa,y*bma+bpa); //(*func)(y*bma+bpa);
+ }
+ fac=2.0/n;
+ for (j=0;j<n;j++) {
+ MDOUBLE sum=0.0;
+ for (k=0;k<n;k++)
+ sum += f[k]*cos(3.141592653589793*j*(k+0.5)/n);
+ c[j]=fac*sum;
+ }
+
+}
+
+
+const MDOUBLE chebyshevAccelerator::Pij_t(const int from_aa, const int to_aa, const MDOUBLE x) const
+//----------------------------------------------------------------------------------
+//input: like pij_t
+//output: the probabilty
+//doing: calculating with the polinom of chebi and via eigenvalue decomposition
+//----------------------------------------------------------------------------------
+{
+
+ MDOUBLE d=0.0,dd=0.0,sv,y,y2,check;
+ int j;
+
+ if ((x-_leftRange)*(x-_rightRange) > 0.0) {
+ return _pb->Pij_t(from_aa,to_aa,x);
+// errorMsg::reportError("x not in range in routine fast_Pij_t");// also quit the program
+ }
+
+ y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange));
+ for (j=_usingNumberOfCoef;j>0;j--) {
+ sv=d;
+ d=y2*d-dd+chebi_coff[from_aa][to_aa][j];
+ dd=sv;
+ }
+ check = y*d-dd+0.5*chebi_coff[from_aa][to_aa][0];
+ if ((check>1) || (check<=0)) check = _pb->Pij_t(from_aa,to_aa,x);
+ assert(check<=1);
+ assert(check>=0);
+ return check;
+}
+
+
+const MDOUBLE chebyshevAccelerator::dPij_dt(const int from_aa, const int to_aa, const MDOUBLE x) const
+//----------------------------------------------------------------------------------
+//input: like pij_t
+//output: the derivation of probabilty
+//doing: calculating with the polinom of chebi and via eigenvalue decomposition
+//----------------------------------------------------------------------------------
+{
+
+ MDOUBLE d=0.0,dd=0.0,sv,y,y2;
+ int j;
+
+ if ((x-_leftRange)*(x-_rightRange) > 0.0) {
+ return _pb->dPij_dt(from_aa,to_aa,x);
+ }
+ y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange));
+ for (j=_usingNumberOfCoef;j>0;j--) {
+ sv=d;
+ d=y2*d-dd+chebi_dervation_coff[from_aa][to_aa][j];
+ dd=sv;
+ }
+ return y*d-dd+0.5*chebi_dervation_coff[from_aa][to_aa][0];
+}
+
+
+const MDOUBLE chebyshevAccelerator::d2Pij_dt2(const int from_aa, const int to_aa, const MDOUBLE x) const {
+//----------------------------------------------------------------------------------
+//input: like pij_t
+//output: the second derivation of the probabilty
+//doing: calculating with the polynom of chebi and via eigenvalue decomposition
+//----------------------------------------------------------------------------------
+ MDOUBLE d=0.0,dd=0.0,sv,y,y2;
+ int j;
+
+ if ((x-_leftRange)*(x-_rightRange) > 0.0) {
+ return _pb->d2Pij_dt2(from_aa,to_aa,x);
+ }
+ y2=2.0*(y=(2.0*x-_leftRange-_rightRange)/(_rightRange-_leftRange));
+ for (j=_usingNumberOfCoef;j>0;j--) {
+ sv=d;
+ d=y2*d-dd+chebi_sec_dervation_coff[from_aa][to_aa][j];
+ dd=sv;
+ }
+ return y*d-dd+0.5*chebi_sec_dervation_coff[from_aa][to_aa][0];
+}
+
+
+
+
+void chebyshevAccelerator::chder(Vdouble &c, Vdouble &cder, int n) {
+//----------------------------------------------------------------------------------
+//input: chebicev coff of f(x) i.e. in c[]. n is the vector size
+//output: chebicev coff of df(x)/dx i.e. in cder[]
+//doing: calculating the coff of the dervation from the coff of f.
+//reference:numercal recepies in c, pg 195.
+//----------------------------------------------------------------------------------
+ int j;
+ MDOUBLE con;
+
+ cder[n-1]=0.0;
+ cder[n-2]=2*(n-1)*c[n-1];
+ for (j=n-3;j>=0;j--)
+ cder[j]=cder[j+2]+2*(j+1)*c[j+1];
+ con=2.0f/(_rightRange-_leftRange);
+ for (j=0;j<n;j++)
+ cder[j] *= con;
+}
+
+
+
+
+
diff --git a/libs/phylogeny/chebyshevAccelerator.h b/libs/phylogeny/chebyshevAccelerator.h
new file mode 100644
index 0000000..05ba54e
--- /dev/null
+++ b/libs/phylogeny/chebyshevAccelerator.h
@@ -0,0 +1,48 @@
+// $Id: chebyshevAccelerator.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___CHEBYSHEV_ACCELERATOR
+#define ___CHEBYSHEV_ACCELERATOR
+
+#include "pijAccelerator.h"
+#include "replacementModel.h"
+
+class chebyshevAccelerator : public pijAccelerator {
+public:
+
+ explicit chebyshevAccelerator( replacementModel* pb,
+ const int alphanetSize=20,
+ const int totalNumOfCoef=60,
+ const int usingNumberOfCoef=13,
+ const MDOUBLE rightRange=0,const MDOUBLE leftRange=2);
+ chebyshevAccelerator(const chebyshevAccelerator& other);
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE freq(const int i) const {return _pb->freq(i);}
+ virtual pijAccelerator* clone() const { return new chebyshevAccelerator(*this); }
+ virtual ~chebyshevAccelerator() {delete _pb;}
+ virtual replacementModel* getReplacementModel() const {return (_pb);}
+ virtual const int alphabetSize() const {return _pb->alphabetSize();}
+
+private:
+ VVVdouble chebi_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
+ VVVdouble chebi_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
+ VVVdouble chebi_sec_dervation_coff;//[N_ABC][N_ABC][NUMBER_OF_TOTAL_COFF+1];
+
+ const int _alphabetSize;
+ const int _totalNumOfCoef;
+ const int _usingNumberOfCoef;
+
+ replacementModel* _pb;
+
+ void chebft(Vdouble& c, int n, int from_aa, int to_aa);
+ void chder(Vdouble &c, Vdouble &cder, int n);
+
+ const MDOUBLE _rightRange;
+ const MDOUBLE _leftRange;
+
+};
+
+// This is an accelerator of Pij(t) calculation, using a proximity to polynomial.
+#endif
+
diff --git a/libs/phylogeny/checkcovFanctors.h b/libs/phylogeny/checkcovFanctors.h
new file mode 100644
index 0000000..b64cf3c
--- /dev/null
+++ b/libs/phylogeny/checkcovFanctors.h
@@ -0,0 +1,104 @@
+// $Id: checkcovFanctors.h 1732 2007-02-26 13:45:41Z itaymay $
+
+#ifndef ____CHECKCOV__FANCTORS
+#define ____CHECKCOV__FANCTORS
+#include "definitions.h"
+#include "tree.h"
+
+#include "likelihoodComputation.h"
+using namespace likelihoodComputation;
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+
+#include <cmath>
+
+//#define VERBOS
+
+#ifdef VERBOS
+#include <iostream>
+using namespace std;
+#endif
+
+class Cevaluate_L_given_r{
+public:
+ explicit Cevaluate_L_given_r( const sequenceContainer& sd,
+ const tree& t1,
+ const stochasticProcess& sp,
+ const int pos)
+ :_sd(sd),_t1(t1),_pos(pos), _sp(sp) {}
+private:
+ const sequenceContainer& _sd;
+ const tree& _t1;
+ const int _pos;
+ const stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (const MDOUBLE r) {
+
+ MDOUBLE tmp1= convert(getLofPos(_pos,_t1,_sd,_sp,r));
+#ifdef VERBOS
+ LOG(5,<<" r = "<<r<<" l = "<<tmp1<<endl);
+#else
+ LOG(12,<<" r = "<<r<<" l = "<<tmp1<<endl);
+#endif
+ return -tmp1;
+ }
+};
+
+// THIS FUNCTION IS USED ONLY BY ITAY MAYROSE AND ONLY HE KNOWS WHAT IS INSIDE...
+// ONE DAY HE WILL WRITE .DOC FILES...
+class Cevaluate_Posterior_given_r {
+public:
+ explicit Cevaluate_Posterior_given_r( const sequenceContainer& seqContainer,
+ const tree& t1,
+ const stochasticProcess& sp,
+ const MDOUBLE alpha,
+ const int pos)
+ :m_seqContainer(seqContainer), m_alpha(alpha),m_tree(t1), m_pos(pos), m_sp(sp) {}
+public:
+ MDOUBLE operator() (const MDOUBLE r)
+ {
+
+ MDOUBLE l= convert(getLofPos(m_pos, m_tree, m_seqContainer, m_sp, r));
+ #ifdef VERBOS
+ LOG(5,<<" r = "<<r<<" l = "<<l<<endl);
+ #endif
+ MDOUBLE prior = exp((-m_alpha) * r) * pow(r, m_alpha - 1);
+ return -(l * prior);
+ }
+
+private:
+ const sequenceContainer& m_seqContainer;
+ const MDOUBLE m_alpha;
+ const tree& m_tree;
+ const int m_pos;
+ const stochasticProcess& m_sp;
+
+};
+
+// WHEN YOU WANT TWO TREE TO HAVE THE SAME RATE AT A SPECIFIC POSITION.
+class Cevaluate_L_sum_given_r{
+public:
+ explicit Cevaluate_L_sum_given_r(const stochasticProcess& sp,
+ const sequenceContainer& sd,
+ const tree &inLTree1,
+ const tree &inLTree2,
+ const int pos)
+ :_sp(sp), _sd(sd), _tree1(inLTree1),_tree2(inLTree2), _pos(pos){};
+
+private:
+ const stochasticProcess _sp;
+ const sequenceContainer _sd;
+ const tree& _tree1;
+ const tree& _tree2;
+ const int _pos;
+public:
+ MDOUBLE operator() (const MDOUBLE r) {
+ MDOUBLE tmp1= convert(getLofPos(_pos,_tree1,_sd,_sp,r));
+ MDOUBLE tmp2= convert(getLofPos(_pos,_tree2,_sd,_sp,r));
+ MDOUBLE tmp= tmp1*tmp2;
+ return -tmp;
+ }
+};
+
+#endif
diff --git a/libs/phylogeny/checkcovFanctorsWithFactors.h b/libs/phylogeny/checkcovFanctorsWithFactors.h
new file mode 100644
index 0000000..9d585dc
--- /dev/null
+++ b/libs/phylogeny/checkcovFanctorsWithFactors.h
@@ -0,0 +1,47 @@
+// $Id: checkcovFanctorsWithFactors.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ____CHECKCOV__FANCTORS_WITH_FACTORS
+#define ____CHECKCOV__FANCTORS_WITH_FACTORS
+#include "definitions.h"
+#include "tree.h"
+#include "likelihoodComputation.h"
+#include "likelihoodComputationFactors.h" //<-new.
+using namespace likelihoodComputation;
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+
+//#define VERBOS
+#ifdef VERBOS
+#include <iostream>
+using namespace std;
+#endif
+
+// USING FACTORS: THE IDEA HERE IS THAT WHEN WE HAVE TOO MANY SEQUENCES,
+// WE MUST TAKE SPECIAL CARE TO USE "FACTORS" AT INTERNAL NODES, TO AVOID UNDERFLOW.
+// HERE WE ALSO RETURN LOG LIKELIHOOD OF A POSITION AND NOT THE LIKELIHOOD ITSELF.
+class Cevaluate_LOG_L_given_r{
+public:
+ explicit Cevaluate_LOG_L_given_r( const sequenceContainer& sd,
+ const tree& t1,
+ const stochasticProcess& sp,
+ const int pos)
+ :_sd(sd),_t1(t1),_pos(pos), _sp(sp){}
+private:
+ const sequenceContainer& _sd;
+ const tree& _t1;
+ const int _pos;
+ const stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (const MDOUBLE r) {
+
+ MDOUBLE tmp1= getLOG_LofPos(_pos,_t1,_sd,_sp,r);
+ #ifdef VERBOS
+ LOG(5,<<" r = "<<r<<" l = "<<tmp1<<endl);
+ #endif
+ return -tmp1;
+ }
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/clustalFormat.cpp b/libs/phylogeny/clustalFormat.cpp
new file mode 100644
index 0000000..471b461
--- /dev/null
+++ b/libs/phylogeny/clustalFormat.cpp
@@ -0,0 +1,158 @@
+// $Id: clustalFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "clustalFormat.h"
+#include "codon.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+#include <map>
+
+sequenceContainer clustalFormat::read(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+
+sequenceContainer clustalFormat::readUnAligned(istream &infile, const alphabet* alph) {
+ sequenceContainer mySequenceData;
+
+ vector<string> seqFileData;
+ map<string ,string> stringsToAdd; //map that holding for each name last
+ //one or two nucleotides (when reading codon
+ //alphabet) of the line in order to add it
+ //to the next line.
+ putFileIntoVectorStringArray(infile,seqFileData);
+ if (seqFileData.empty()){
+ errorMsg::reportError("unable to open file, or file is empty in clustal format");
+ }
+
+
+ vector<string>::const_iterator it1= seqFileData.begin();
+
+ // make sure that the first 7 chars in the first line is clustal
+ if (it1->size()<7) errorMsg::reportError("first word in clusltal sequence file format must be clustal",1);
+ if ( (( (*it1)[0] != 'C') && ((*it1)[0] != 'c'))
+ || (((*it1)[1] != 'L') && ((*it1)[1] != 'l'))
+ || (((*it1)[2] != 'U') && ((*it1)[2] != 'u'))
+ || (((*it1)[3] != 'S') && ((*it1)[3] != 's'))
+ || (((*it1)[4] != 'T') && ((*it1)[4] != 't'))
+ || (((*it1)[5] != 'A') && ((*it1)[5] != 'a'))
+ || (((*it1)[6] != 'L') && ((*it1)[6] != 'l')) ) {
+ errorMsg::reportError("first word in clusltal sequence file format must be clustal",1);
+ }
+ it1++;
+
+ int localid=0;
+ while (it1!= seqFileData.end()) {
+ if (it1->empty()) {++it1;continue; }// empty line continue
+ if ((it1->size() > 1) && ((*it1)[0]==' ')) {++it1;continue; }// remark line
+ string remark;
+ string name;
+
+// getFromLineAnameAndAsequence;
+ string name1;
+ string stringSeq1;
+ string::const_iterator it2 = (it1)->begin();
+ for (; it2 != (it1)->end();++it2) {
+ if ((*it2)==' ') break;
+ else name1+=(*it2);
+ }
+ if (stringsToAdd.find(name1)!=stringsToAdd.end()) //not new sequence
+ stringSeq1 = stringsToAdd[name1]; //init stringSeq1 with the nucleotide
+ //from the previous line
+ for (; it2 != (it1)->end();++it2) {
+ if ((*it2)==' ') continue;
+ else stringSeq1+=(*it2);
+ }
+
+ //when alphabet is codon stringSeq1 must be product of three.
+ // 1. save 1 or 2 last nucleotide in stringToAdd
+ // 2. substr the last or two last nucleotide for the next line.
+ // 3. keep stringToAdd in map (according the name).
+ string stringToAdd="";
+ // codon codonAlph;
+ if (alph->size()>=60){ // codon?
+ if ((stringSeq1.size()%3)==1){ //add the last nucleotide to the next line
+ stringToAdd+=stringSeq1[stringSeq1.size()-1];
+ stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-1);
+ }
+ if ((stringSeq1.size()%3)==2){ //add the 2 last nucleotide to the next line
+ stringToAdd+=stringSeq1[stringSeq1.size()-2];
+ stringToAdd+=stringSeq1[stringSeq1.size()-1];
+ stringSeq1 = stringSeq1.substr(0,stringSeq1.size()-2);
+ }
+
+ }
+ stringsToAdd[name1] = stringToAdd; //update the map with the new stringToAdd
+ int id = mySequenceData.getId(name1,false);
+ if (id==-1) { // new sequence.
+ name = name1;
+ mySequenceData.add(sequence(stringSeq1,name,remark,localid,alph));
+ localid++;
+ } else {// the sequence is already there...
+ sequence tmp(stringSeq1,name,remark,id,alph);
+ mySequenceData[id].operator += (tmp);
+ }
+
+ it1++;
+ }
+
+ return mySequenceData;
+}
+
+void clustalFormat::write(ostream &out, const sequenceContainer& sd) {
+ // setting some parameters
+ const int numOfPositionInLine = 60;
+ int maxLengthOfSeqName =0;
+ for (sequenceContainer::constTaxaIterator p=sd.constTaxaBegin(); p != sd.constTaxaEnd(); ++p ) {
+ int nameLen = (*p).name().size();
+ if (nameLen>maxLengthOfSeqName) maxLengthOfSeqName=nameLen;
+ }
+ if (maxLengthOfSeqName<15) maxLengthOfSeqName=16;
+ else maxLengthOfSeqName=maxLengthOfSeqName+4; // all this maxLengthOfSeqName is the
+
+ out<<"CLUSTAL V"<<endl;
+ // num. of space after the name.
+ int currentPosition = 0;
+ int charLen = sd.seqLen();
+ //in case of codon alphabet the character length is : 3*(sequence_length)
+ // codon codonAlph;
+ if (sd.alphabetSize()>=60) charLen*=3;
+ out<<endl<<endl;
+ while (currentPosition < charLen ) {
+ out.flush();
+ //for (vector<const sequenceContainer::sequenceDatum*>::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) {
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+ for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
+ if (iName<(*it5).name().size()) {
+ out<<(*it5).name()[iName];
+ out.flush();
+ }
+ else out<<" ";
+ }
+ out.flush();
+ out<<" ";
+
+ if (charLen<numOfPositionInLine)
+ out<<it5->toString()<<endl;
+ else {
+ for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
+ if (k>=charLen)
+ break;
+ out<<it5->toString()[k];
+ //in case of codon alphabet each position is three characters
+
+ if (sd.alphabetSize()>=60){
+ out<<it5->toString()[++k];
+ out<<it5->toString()[++k];
+ }
+ }
+ out<<endl;
+ }
+ }
+ currentPosition +=numOfPositionInLine;
+ out<<endl<<endl;
+ }
+
+ return;
+}
+
diff --git a/libs/phylogeny/clustalFormat.h b/libs/phylogeny/clustalFormat.h
new file mode 100644
index 0000000..d17ec6d
--- /dev/null
+++ b/libs/phylogeny/clustalFormat.h
@@ -0,0 +1,47 @@
+// $Id: clustalFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___CLUSTAL_FORMAT
+#define ___CLUSTAL_FORMAT
+
+#include "sequenceContainer.h"
+
+class clustalFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF THE FORMAT:
+CLUSTAL V
+
+
+Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN
+Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN
+Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN
+Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN
+Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN
+Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN
+
+
+Langur SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS
+Baboon SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS
+Human SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR
+Rat SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS
+Cow SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS
+Horse NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS
+
+
+Langur QYVKGCGV
+Baboon QYVQGCGV
+Human QYVQGCGV
+Rat GYIRNCGV
+Cow SYVEGCTL
+Horse EYLASCNL
+
+
+*/
+
diff --git a/libs/phylogeny/cmdline.ggo b/libs/phylogeny/cmdline.ggo
new file mode 100644
index 0000000..8ca19d8
--- /dev/null
+++ b/libs/phylogeny/cmdline.ggo
@@ -0,0 +1,83 @@
+# $Id: cmdline.ggo 962 2006-11-07 15:13:34Z privmane $
+
+purpose "structural EM based Phylogeny"
+package "semphy"
+version "1.0.a3"
+
+# test default values
+
+#files
+section "Basic Options"
+option "sequence" s "Sequence file name" string typestr="FILENAME" default="-" no
+option "format" f "Sequence format: [phylip], clustal, molphy, mase, fasta" string default="phylip" no
+option "tree" t "Tree file name" string typestr="FILENAME" no
+option "constraint" c "Constraint Tree file name" string typestr="FILENAME" no
+option "outputfile" o "Output tree file" string typestr="FILENAME" default="-" no
+# model options:
+section "Model Options"
+option "alphabet" a "Alphabet Size" int typestr="4|20"default="20" no
+option "ratio" z "Transition/Transversion ratio" float default="2" no
+option "ACGprob" p "User input nucleotide frequencies. String separated list for A,C,G" string typestr="A,C,G" default="0.25,0.25,0.25" no
+
+option "gamma" G "Use Gamma RVAS (4 bins) and set alpha" float default="0.3" no
+option "optimizeGamma" O "Optimize Gamma and use it" flag off
+
+
+defgroup "Model" groupdesc="Model type"
+
+groupoption "day" - "Use 'day' model" group="Model"
+groupoption "jtt" - "Use 'jtt' model (default)" group="Model"
+groupoption "rev" - "Use 'rev' model" group="Model"
+groupoption "wag" - "Use 'wag' model" group="Model"
+groupoption "cprev" - "Use 'cprev' model" group="Model"
+groupoption "nucjc" - "Use nucleic acid JC model" group="Model"
+groupoption "aaJC" - "Use amino acid JC model" group="Model"
+groupoption "k2p" - "Use 'k2p' model" group="Model"
+groupoption "hky" - "Use 'k2p' model" group="Model"
+
+option "modelfile" - "Use user input file as model" string typestr="NAME" no
+
+
+section "Log Options"
+
+option "verbose" v "Log report level (verbose)" int default="1" no
+option "Logfile" l "Log output file name" string typestr="FILENAME" default="-" no
+
+
+section "Algorithm Options"
+
+# algorithm options
+defgroup "Run Options" groupdesc="Which algorithm to run"
+
+groupoption "SEMPHY" S "Do SEMPHY step (default)" group="Run Options"
+groupoption "bbl" n "Only optimize branch length" group="Run Options"
+groupoption "likelihood" L "Compute likelihood for fixed tree" group="Run Options"
+groupoption "NJ" J "compute NJ tree only" group="Run Options"
+option "rate" R "optimize rate of gene" flag off
+
+
+section "Other Algorithm Options"
+option "max-semphy-iter" M "Max number of SEM iterations" int default="100" no
+option "max-bbl-iter" b "Max number of BBL iterations" int default="1000" no
+option "min-improv" d "Minimum improvement" float default="0.001" no
+option "gaps" g "Remove positions with gaps" flag off
+option "dont-use-NJ" N "Do not Use NJ to break stars in treeRearrange" flag on
+option "exact" e "Compute exact counts" flag off
+option "maxDistance" x "'infinity' distance for sequence pairs" float default="2.0" no
+
+option "seed" r "Seed random number generator" long no
+
+
+#option "paramFile" f "Parameter file name" string no
+#option "cin" I "Get input sequence file from cin" flag off
+
+# annealing:
+#option "anneal" A "Do anneal step" flag off
+#option "ratchet" R "Do Ratchet step" flag off
+#option "start-temp" H "Starting temp" float no
+#option "cooling-factor" c "Variance decay factor for anneal noise" float default="1.1" no
+#option "final-temp" C "Final temperature of anneal noise" float default="0.1" no
+#option "adversarial" - "Use Adversarial Re-weighting" flag off
+#option "learning-rate" L "learning rate for Adversary" float default="1.0" no
+#option "Orig-dumping" D "Dumping to the original weights" float default="0.5" no
+#option "prev-dumping" X "Dumping to the previous weights" float default="0.5" no
diff --git a/libs/phylogeny/cmdline2EvolObjs.cpp b/libs/phylogeny/cmdline2EvolObjs.cpp
new file mode 100644
index 0000000..6d448a8
--- /dev/null
+++ b/libs/phylogeny/cmdline2EvolObjs.cpp
@@ -0,0 +1,2 @@
+// $Id: cmdline2EvolObjs.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "cmdline2EvolObjs.h"
diff --git a/libs/phylogeny/cmdline2EvolObjs.h b/libs/phylogeny/cmdline2EvolObjs.h
new file mode 100644
index 0000000..deb3bcf
--- /dev/null
+++ b/libs/phylogeny/cmdline2EvolObjs.h
@@ -0,0 +1,578 @@
+// $Id: cmdline2EvolObjs.h 5928 2009-02-25 16:30:50Z privmane $
+
+#ifndef ___CREATESPFROMARGSINFO_H
+#define ___CREATESPFROMARGSINFO_H
+
+#include <cstdlib>
+#include "amino.h"
+#include "nucleotide.h"
+#include "codon.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "replacementModel.h"
+#include "uniDistribution.h"
+#include "trivialAccelerator.h"
+#include "alphaTrivialAccelerator.h"
+#include "chebyshevAccelerator.h"
+#include "talRandom.h"
+#include "nucJC.h"
+#include "aaJC.h"
+#include "hky.h"
+#include "tamura92.h"
+#include "gtrModel.h"
+#include "logFile.h"
+#include "readDatMatrix.h"
+#include "gammaDistribution.h"
+#include "recognizeFormat.h"
+#include "replacementModelSSRV.h"
+#include "stochasticProcessSSRV.h"
+#include "someUtil.h"
+
+#define DEFAULT_VALUE_FOR_ALPAH 1.0
+
+template <class args_infoT>
+class cmdline2EvolObjs {
+private:
+ args_infoT _args_info;
+public:
+ const args_infoT& getArgsInfo(void) {return(_args_info);}
+ // constructors
+ cmdline2EvolObjs(args_infoT &args_info) : _args_info(args_info) {
+ checkParameterConsistancy();
+ }
+ cmdline2EvolObjs(args_infoT &args_info, bool DontChack) : _args_info(args_info) {
+ // if (!DontChack) checkParameterConsistancy();
+ }
+ explicit cmdline2EvolObjs(void){}; // do nothing
+ void installArgsInfo(args_infoT &args_info){
+ _args_info = args_info;
+ checkParameterConsistancy();
+ }
+private:
+ void checkParameterConsistancy() {
+ if (!_args_info.homogeneous_flag) { // using Gamma ASRV
+ if (!_args_info.alpha_given && !_args_info.optimizeAlpha_flag)
+ errorMsg::reportError("Must use either 'alpha' or 'optimizeAlpha' when using Gamma ASRV");
+ } else { // using homogeneous rates
+ if (_args_info.categories_given ||_args_info.alpha_given || _args_info.optimizeAlpha_given)
+ errorMsg::reportError("Can't use 'categories' or 'alpha' or 'optimizeAlpha' with homogeneous rates model");
+ // more tests may come here
+ }
+
+ // check compatibility of alphabet and model
+ if (_args_info.alphabet_arg == 4
+ && !(_args_info.nucjc_given || _args_info.k2p_given || _args_info.tamura92_given || _args_info.gtr_given))
+ errorMsg::reportError("Model type is not suitable for nucleotide alphabet");
+ if (_args_info.alphabet_arg == 20
+ && (_args_info.nucjc_given || _args_info.k2p_given || _args_info.tamura92_given || _args_info.gtr_given))
+ errorMsg::reportError("Model type is not suitable for amino-acid alphabet");
+
+ if (_args_info.nu_given) {
+ _args_info.ssrv_flag = true;
+ }
+ }
+
+public:
+ void initializeRandomSeed() {
+ if (_args_info.seed_given) {
+ talRandom::setSeed(_args_info.seed_arg);
+ }
+ }
+ void initializeLogFile() {
+ myLog::setLog(_args_info.Logfile_arg, _args_info.verbose_arg);
+ }
+
+ // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
+ // is returned and the user is responsible for doing delete. This is because
+ // alphabet is an abstract class, so we can't return it by value
+ alphabet* cmdline2Alphabet() {
+ alphabet* alphPtr = NULL;
+ switch (_args_info.alphabet_arg)
+ { // allwayes defined, with default
+ case 4:
+ alphPtr = new nucleotide;
+ break;
+ case 20:
+ alphPtr = new amino;
+ break;
+ case 64: case 61: case 60: case 62:
+ alphPtr = new codon;
+ break;
+ default: errorMsg::reportError("alphabet size not supported");
+ }
+
+ // Handle mulAlphabet needed in case we use an SSRV model
+ if (_args_info.ssrv_flag) {
+ alphabet* mulAlphPtr = new mulAlphabet(alphPtr, _args_info.categories_arg);
+ delete alphPtr;
+ alphPtr = mulAlphPtr;
+ }
+
+ return alphPtr;
+ }
+
+ sequenceContainer cmdline2SequenceContainer(const alphabet * const alphPtr) {
+ ifstream ins;
+ istream* inPtr = &cin;
+ string sequenceFileName(_args_info.sequence_arg);
+ if (sequenceFileName != "" && sequenceFileName != "-") {
+ ins.open(sequenceFileName.c_str());
+ if (! ins.is_open())
+ errorMsg::reportError(string("Can not open sequence file ")+sequenceFileName);
+ inPtr = &ins;
+ }
+ istream& in = *inPtr;
+
+ sequenceContainer sc;
+ if (!_args_info.ssrv_flag) {
+ sc = recognizeFormat::read(in, alphPtr);
+ } else {
+ sequenceContainer scBase(recognizeFormat::read(in, (static_cast<const mulAlphabet*>(alphPtr))->getBaseAlphabet()));
+ sc = sequenceContainer(scBase, alphPtr);
+ }
+ return sc;
+ }
+
+ void takeCareOfGaps (sequenceContainer &sc) {
+ if (_args_info.gaps_flag) {
+ sc.removeGapPositions();
+ } else {
+ sc.changeGaps2MissingData();
+ }
+ }
+
+ // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
+ // is returned and the user is responsible for deleting it. This is because
+ // we need to return a NULL pointer if we are not given a tree
+ tree *cmdline2Tree() {
+ tree *treePtr = NULL;
+ if (_args_info.tree_given) { // did we get a tree
+ string treeFileName(_args_info.tree_arg);
+ treePtr = new tree(treeFileName);
+ }
+ return treePtr;
+ }
+
+ // NOTE: Unlike other cmdline2*** classes, here a pointer to an allocated obj
+ // is returned and the user is responsible for deleting it. This is because
+ // we need to return a NULL pointer if we are not given a tree
+ tree *cmdline2ConstraintTree() {
+ tree *constraintTreePtr = NULL;
+ if (_args_info.constraint_given) { // did we get a tree
+ string constraintTreeFileName(_args_info.constraint_arg);
+ constraintTreePtr = new tree(constraintTreeFileName);
+ }
+ return constraintTreePtr;
+ }
+
+ replacementModel *cmdline2ReplacementModel() {
+ replacementModel *probModPtr=NULL;
+ MDOUBLE ratio =_args_info.ratio_arg;
+ MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
+ sscanf(_args_info.ACGprob_arg,"%lf,%lf,%lf", &Ap, &Cp, &Gp);
+ Tp=1.0-(Ap+Cp+Gp);
+
+ if (_args_info.day_given) {
+ LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::dayhoff);
+ } else if (_args_info.rev_given) {
+ LOG(5,<<"Using rev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::mtREV24);
+ } else if (_args_info.wag_given) {
+ LOG(5,<<"Using wag replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::wag);
+ } else if (_args_info.cprev_given) {
+ LOG(5,<<"Using cprev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::cpREV45);
+ } else if (_args_info.nucjc_given) {
+ LOG(5,<<"Using JC for nucleotide"<<endl);
+ probModPtr=new nucJC;
+ } else if (_args_info.aaJC_given) {
+ LOG(5,<<"Using JC for amino acids"<<endl);
+ probModPtr=new aaJC;
+ } else if ((_args_info.hky_given) || (_args_info.k2p_given)) {
+ LOG(5,<<"Using hky replacement matrix"<<endl);
+ probModPtr=new hky(Ap,Cp,Gp,Tp,ratio);
+ } else if (_args_info.tamura92_given) {
+ LOG(5,<<"Using the Tamura 92 replacement matrix"<<endl);
+ MDOUBLE theta = Cp+Gp;
+ probModPtr=new tamura92(theta, ratio);
+ } else if (_args_info.gtr_given) {
+ LOG(5,<<"Using the GTR replacement matrix"<<endl);
+ //Vdouble freqs = evaluateCharacterFreq(_sc);
+ Vdouble freqs;
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ probModPtr=new gtrModel(freqs);
+ } else if ((_args_info.alphabet_arg == 20) &&
+ (_args_info.modelfile_given)) { // try to read the name as a file name
+ LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
+ probModPtr=new pupAll(_args_info.modelfile_arg);
+ } else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
+ probModPtr=new pupAll(datMatrixHolder::jones);
+ }
+
+ return probModPtr;
+ }
+
+ replacementModel *cmdline2ReplacementModelAAOnly() {
+ replacementModel *probModPtr=NULL;
+
+ if (_args_info.day_given) {
+ LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::dayhoff);
+ } else if (_args_info.rev_given) {
+ LOG(5,<<"Using rev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::mtREV24);
+ } else if (_args_info.wag_given) {
+ LOG(5,<<"Using wag replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::wag);
+ } else if (_args_info.cprev_given) {
+ LOG(5,<<"Using cprev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::cpREV45);
+ } else if (_args_info.aaJC_given) {
+ LOG(5,<<"Using JC for amino acids"<<endl);
+ probModPtr=new aaJC;
+ } else if (_args_info.modelfile_given) { // try to read the name as a file name
+ LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
+ probModPtr=new pupAll(_args_info.modelfile_arg);
+ } else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
+ probModPtr=new pupAll(datMatrixHolder::jones);
+ }
+
+ return probModPtr;
+ }
+
+ bool useGamma()
+ {
+ return (!_args_info.homogeneous_flag);
+ }
+
+ // this function is ment for cases where a "mature" stochastic Process
+ // can be produced. If there is a chance that the user may ask for
+ // alpha optimisation use the
+ // "cmdline2StochasticProcessThatRequiresAlphaOptimization" version
+ // instead
+ inline stochasticProcess *cmdline2StochasticProcess() {
+ distribution *distP = NULL;
+ if (useGamma()) {
+ if (_args_info.alpha_given)
+ distP = new gammaDistribution(_args_info.alpha_arg,_args_info.categories_arg);
+ else
+ errorMsg::reportError("Can not create stochastic process with ASRV if no alpha is given, when working without alpha optimization");
+ LOG(5,<<"Using Gamma ASRV with "<<_args_info.categories_arg<<" bins"<<endl);
+ } else {
+ distP = new uniDistribution;
+ LOG(5,<<"Using uniform rates"<<endl);
+ }
+ stochasticProcess *spPtr = cmdline2StochasticProcessInternal(*distP);
+ if (distP) delete distP;
+ return(spPtr);
+ }
+
+ // Assuming that the user asked to optimize Alpha (by bestAlphaAndBBL)
+ inline stochasticProcess *cmdline2StochasticProcessThatRequiresAlphaOptimization () {
+ distribution *distP = NULL;
+ if (!_args_info.optimizeAlpha_given)
+ errorMsg::reportError("Can't use function cmdline2StochasticProcessThatRequiresAlphaOptimization if the optimizeAlpha flag was not turned on - please inform the programmer of this error.");
+ // else
+ if (_args_info.alpha_given)
+ distP = new gammaDistribution(_args_info.alpha_arg,_args_info.categories_arg);
+ else
+ distP = new gammaDistribution(DEFAULT_VALUE_FOR_ALPAH,_args_info.categories_arg);
+ LOG(5,<<"Using Gamma ASRV with "<<_args_info.categories_arg<<" bins"<<endl);
+ stochasticProcess *spPtr = cmdline2StochasticProcessInternal(*distP);
+ if (distP) delete distP;
+ return(spPtr);
+ }
+
+ inline stochasticProcess *cmdline2HomogenuisStochasticProcess() {
+ uniDistribution dist;
+ LOG(5,<<"Creating homogeneous rate based stochastic Process "<<endl);
+ return (cmdline2StochasticProcessInternal(dist));
+ }
+
+ inline stochasticProcess cmdline2HomogenuisStochasticProcessAAOnly() {
+ uniDistribution dist;
+ LOG(5,<<"Creating homogeneous rate based stochastic Process "<<endl);
+ return (cmdline2StochasticProcessInternalAAOnly(dist));
+ }
+
+ inline stochasticProcess *cmdline2StochasticProcessSafe()
+ {
+ if (_args_info.homogeneous_flag) {
+ return cmdline2StochasticProcess();
+ } else { // we use Gamma
+ if (_args_info.optimizeAlpha_flag) {
+ return cmdline2StochasticProcessThatRequiresAlphaOptimization();
+ } else if (_args_info.alpha_given) {
+ return cmdline2StochasticProcess();
+ } else {
+ errorMsg::reportError("Gamma ASRV requiers either --alpha or --optimizeAlpha or both.",1);
+ }
+ }
+ exit(1); // should never be reached
+ }
+
+private:
+ stochasticProcess *cmdline2StochasticProcessInternal(distribution& dist) {
+ replacementModel *probModPtr=NULL;
+ pijAccelerator *pijAcc=NULL;
+ MDOUBLE ratio =_args_info.ratio_arg;
+ MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
+ sscanf(_args_info.ACGprob_arg,"%lf,%lf,%lf", &Ap, &Cp, &Gp);
+ Tp=1.0-(Ap+Cp+Gp);
+
+ if (_args_info.day_given) {
+ LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::dayhoff);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.rev_given) {
+ LOG(5,<<"Using rev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::mtREV24);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.wag_given) {
+ LOG(5,<<"Using wag replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::wag);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.cprev_given) {
+ LOG(5,<<"Using cprev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::cpREV45);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.nucjc_given) {
+ LOG(5,<<"Using JC for nucleotide"<<endl);
+ probModPtr=new nucJC;
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if (_args_info.aaJC_given) {
+ LOG(5,<<"Using JC for amino acids"<<endl);
+ probModPtr=new aaJC;
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if ((_args_info.hky_given) || (_args_info.k2p_given)) {
+ LOG(5,<<"Using hky replacement matrix"<<endl);
+ probModPtr=new hky(Ap,Cp,Gp,Tp,ratio);
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if (_args_info.tamura92_given) {
+ LOG(5,<<"Using the Tamura 92 replacement matrix"<<endl);
+ MDOUBLE theta = Cp+Gp;
+ probModPtr=new tamura92(theta, ratio);
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if (_args_info.gtr_given) {
+ LOG(5,<<"Using the GTR replacement matrix"<<endl);
+ //Vdouble freqs = evaluateCharacterFreq(_sc);
+ Vdouble freqs;
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ freqs.push_back(0.25);
+ probModPtr=new gtrModel(freqs);
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if ((_args_info.alphabet_arg == 20) &&
+ (_args_info.modelfile_given)) { // try to read the name as a file name
+ LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
+ probModPtr=new pupAll(_args_info.modelfile_arg);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
+ probModPtr=new pupAll(datMatrixHolder::jones);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ }
+
+ stochasticProcess *spPtr = NULL;
+ if (!_args_info.ssrv_flag) {
+ spPtr = new stochasticProcess(&dist, pijAcc);
+
+ } else {
+ // Using a Site-Specific Rate Variation model
+ replacementModelSSRV probModSsrv(&dist,probModPtr,_args_info.nu_arg);
+ if (pijAcc) delete pijAcc;
+ pijAcc = new trivialAccelerator(&probModSsrv);
+ spPtr = new stochasticProcessSSRV(pijAcc);
+ LOG(5,<<"cmdline2StochasticProcessInternal: Created stochasticProcessSSRV"<<endl);
+ }
+
+ // if rate is given in input, set it.
+ if (_args_info.inputRate_given)
+ spPtr->setGlobalRate(_args_info.inputRate_arg);
+
+ if (probModPtr) delete probModPtr;
+ if (pijAcc) delete pijAcc;
+ return spPtr;
+ }
+
+ stochasticProcess cmdline2StochasticProcessInternalAAOnly(distribution& dist) {
+ replacementModel *probModPtr=NULL;
+ pijAccelerator *pijAcc=NULL;
+
+ if (_args_info.day_given) {
+ LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::dayhoff);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.rev_given) {
+ LOG(5,<<"Using rev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::mtREV24);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.wag_given) {
+ LOG(5,<<"Using wag replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::wag);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.cprev_given) {
+ LOG(5,<<"Using cprev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::cpREV45);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else if (_args_info.aaJC_given) {
+ LOG(5,<<"Using JC for amino acids"<<endl);
+ probModPtr=new aaJC;
+ pijAcc = new trivialAccelerator(probModPtr);
+ } else if (_args_info.modelfile_given) { // try to read the name as a file name
+ LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
+ probModPtr=new pupAll(_args_info.modelfile_arg);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ } else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
+ probModPtr=new pupAll(datMatrixHolder::jones);
+ pijAcc = new chebyshevAccelerator(probModPtr);
+ }
+ stochasticProcess sp(&dist, pijAcc);
+
+ // if rate is given in input, set it.
+ // if (_args_info.inputRate_given)
+ // sp.setGlobalRate(_args_info.inputRate_arg);
+
+ if (probModPtr) delete probModPtr;
+ if (pijAcc) delete pijAcc;
+ return sp;
+ }
+
+public:
+ stochasticProcess cmdline2ExactGammaStochasticProcess() {
+ uniDistribution dist;
+ LOG(5,<<"Creating exact Gamma based stochastic Process "<<endl);
+ if(!_args_info.alpha_given)
+ errorMsg::reportError("Using exact Gamma requires alpha to be set");
+ pupAll *probModPtr=NULL;
+ // pijAccelerator *pijAcc=NULL;
+ alphaTrivialAccelerator *pijAcc=NULL;
+
+ if (_args_info.day_given) {
+ LOG(5,<<"Using Dayhoff replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::dayhoff);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ } else if (_args_info.rev_given) {
+ LOG(5,<<"Using rev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::mtREV24);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ } else if (_args_info.wag_given) {
+ LOG(5,<<"Using wag replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::wag);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ } else if (_args_info.cprev_given) {
+ LOG(5,<<"Using cprev replacement matrix"<<endl);
+ probModPtr=new pupAll(datMatrixHolder::cpREV45);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ } else if ((_args_info.alphabet_arg == 20) &&
+ (_args_info.modelfile_given)) { // try to read the name as a file name
+ LOG(5,<<"Using user supplied replacement matrix from the file "<<_args_info.modelfile_arg<<endl);
+ probModPtr=new pupAll(_args_info.modelfile_arg);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ } else if (_args_info.nucjc_given ||
+ _args_info.aaJC_given ||
+ _args_info.hky_given ||
+ _args_info.k2p_given ||
+ _args_info.tamura92_given ||
+ _args_info.gtr_given) {
+ errorMsg::reportError("Exact Gamma stochastic process only works with pupAll model");
+ } else { /* default = if (strcmp(_args_info.model_arg,"jtt")==0) */
+ probModPtr=new pupAll(datMatrixHolder::jones);
+ pijAcc = new alphaTrivialAccelerator(probModPtr,_args_info.alpha_arg);
+ }
+ stochasticProcess sp(&dist, pijAcc);
+
+ // if rate is given in input, set it.
+ if (_args_info.inputRate_given)
+ sp.setGlobalRate(_args_info.inputRate_arg);
+
+ if (probModPtr) delete probModPtr;
+ if (pijAcc) delete pijAcc;
+ return sp;
+ }
+
+public:
+ // NOTE: the user must check:
+ // if the returned stream is an ofstream object (an actual file) it should be deleted
+ // if the returned stream is an ostream object (cout) do nothing
+ ostream *cmdline2OutputStream() {
+ ostream *outPtr;
+ string outFileName(_args_info.outputfile_arg);
+ if (outFileName == "") outFileName="-";
+ if (outFileName == "-") {
+ outPtr = &cout;
+ } else {
+ outPtr = new ofstream(outFileName.c_str());
+ if (!outPtr->good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName);
+ }
+ return outPtr;
+ }
+
+ // NOTE: the user must check:
+ // if the returned stream is an ofstream object (an actual file) it should be deleted
+ // if the returned stream is an ostream object (cout) do nothing
+ ostream *cmdline2TreeOutputStream() {
+ ostream *outPtr;
+ string outFileName(_args_info.treeoutputfile_arg);
+ if (outFileName == "") outFileName="-";
+ if (outFileName == "-") {
+ outPtr = &cout;
+ } else {
+ outPtr = new ofstream(outFileName.c_str());
+ if (!outPtr->good()) errorMsg::reportError(string("Can't open for writing the file ")+outFileName);
+ }
+ return outPtr;
+ }
+
+ void consistencyCheck (tree *treePtr, tree *constraintTreePtr) {
+ if (treePtr!=NULL) {
+ if (constraintTreePtr !=NULL) {
+ /* constraints c1(*constraintTreePtr);
+ c1.setTree(*treePtr);
+ if (!c1.fitsConstraints()){
+ LOG(1,<<"Input tree does not fit constraints!"<<endl);
+ LOGDO(1,c1.outputMissingClads(myLog::LogFile()));
+ errorMsg::reportError("Please enter a starting tree that fits the constraints");
+ }
+ */ }
+ }
+ }
+
+public:
+ // Read from file the posterior distribution of rates for each sequence site
+ VVdoubleRep cmdline2PosteriorRates() {
+ if (!_args_info.posteriorRates_given)
+ errorMsg::reportError("cmdline2EvolObjs::cmdline2PosteriorRates: This method shouldn't be used if --posteriorRates was not given");
+ ifstream in(_args_info.posteriorRates_arg);
+ if (!in.is_open())
+ errorMsg::reportError(string("Can not open sequence file ")+string(_args_info.posteriorRates_arg));
+
+ string line, number, rest; // For splitting the line into separate numbers
+ VdoubleRep posterior(_args_info.categories_arg); // Current line
+ VVdoubleRep posteriorRates; // Accumulate all lines
+ getline(in, line);
+
+ // Each loop reads one line of numbers
+ while (in) {
+ // split line into numbers
+ for(int cat=0; cat<_args_info.categories_arg; ++cat) {
+ splitString2(line, " ", number, rest);
+ if (number.size() == 0)
+ errorMsg::reportError(string("cmdline2EvolObjs::cmdline2PosteriorRates: Bad line with too few numbers in file ")
+ +_args_info.posteriorRates_arg+": "+line);
+ posterior[cat] = atof(number.c_str());
+ }
+ posteriorRates.push_back(posterior);
+ getline(in, line);
+ }
+ return posteriorRates;
+ }
+};
+
+#endif
diff --git a/libs/phylogeny/codon.cpp b/libs/phylogeny/codon.cpp
new file mode 100644
index 0000000..8a9cbb3
--- /dev/null
+++ b/libs/phylogeny/codon.cpp
@@ -0,0 +1,560 @@
+// $Id: codon.cpp 5981 2009-03-17 14:39:39Z rubi $
+
+#include "codon.h"
+#include "nucleotide.h"
+#include "amino.h"
+#include "logFile.h"
+#include "definitions.h"
+#include "someUtil.h"
+#include "matrixUtils.h"
+#include "sequenceContainer.h"
+#include <sstream>
+#include <cctype>
+#define INITIATION_CODON "i"
+
+vector<vector<codonUtility::diffType> > codonUtility::_trtvDiff;
+vector<vector<codonUtility::replacementType> > codonUtility::_synNonsynDiff;
+vector<vector<codonUtility::nucDiffPlaceType> > codonUtility::_nucDiffPlace;
+vector<vector<codonUtility::nucsDiffType> > codonUtility::_nucsDiff;
+
+
+codon::codon(){
+ geneticCodeString gcs=geneticCodeHolder::nuclearStandard;
+ init(gcs);
+}
+
+codon::codon(const geneticCodeString& matrixFileString){
+ init(matrixFileString);
+}
+
+void codon::init(const geneticCodeString& matrixFileString)
+{
+ readMatrixFromFile(matrixFileString.Val);
+}
+
+void codon::readMatrixFromFile(const string& matrixFileName){ //default value: "nuclearCode.txt"
+ // cout<<"in codon constructor"<<endl;
+ stringstream in(matrixFileName.c_str());
+ if (!in) {
+ errorMsg::reportError("in codon::readMatrixFromFile: unable to open matrix data file");
+ }
+
+ int aa = -1; //initialized as -1 so in first iteration will change to 0
+ int noOfCodons = 0;
+ string strAmino;
+ bool isInitCodon = false;
+ while (!in.eof()) { //20 amino acids and stop
+ string val;
+ in>>val;
+ if (val.size()==1) { //amino acid
+ if(val == INITIATION_CODON)
+ isInitCodon = true;
+ else{
+ aa++;
+ strAmino=val;
+ if (strAmino=="*") { _alphabetSize=noOfCodons;}
+ isInitCodon = false;
+ }
+ }
+
+ else if (val.size()==3 && val[0]!='#'){ //codon, # symbolizes a comment
+ if(isInitCodon){
+ map <string,int>::const_iterator iniItr =_codon2Int.find(val);
+ if(iniItr == _codon2Int.end())
+ errorMsg::reportError("Initiation codon with undefined index at codon::readMatrixFromFile");
+ else
+ _initiationIndex2codon[iniItr->second] = val;
+ }
+ else{
+ _geneticCode[val]=strAmino;
+ _codon2Int[val]=noOfCodons;
+ noOfCodons++;
+ }
+ }
+ else {
+
+ if (noOfCodons!=64){
+ string err="in codon::readMatrixFromFile: total number of codons = "+int2string(noOfCodons);
+ errorMsg::reportError(err);
+ }
+ return;
+ }
+ }
+}
+codon& codon::operator=(const codon& other) {
+ _geneticCode = other._geneticCode; //key - codon, value - amino acid
+ _codon2Int = other._codon2Int;//key string of codon int= integer value of codon
+ _alphabetSize = other._alphabetSize;
+ _initiationIndex2codon = other._initiationIndex2codon;
+ return *this;
+}
+// codon::codon(const codon& other):
+// _geneticCode(other._geneticCode), //key - codon, value - amino acid
+// _codon2Int(other._codon2Int),//key string of codon int= integer value of codon
+// _alphabetSize(other._alphabetSize){}
+
+
+//return -99 if not succeeds.
+int codon::fromChar(const string& s, const int pos) const {
+ if (s.size() <= pos+2) {
+ //errorMsg::reportError("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three");
+ string textToPrint("Trying to read a codon pass the end of the string. The number of nucleotide may not be divisible by three");
+ LOG(1,<<textToPrint<<endl);
+ return -99;
+ }
+
+ nucleotide nuc;
+ int p1,p2,p3;
+ p1 = nuc.fromChar(s[pos]);
+ p2 = nuc.fromChar(s[pos+1]);
+ p3 = nuc.fromChar(s[pos+2]);
+
+
+ if ((p1 <0) || (p2 <0) || (p3 <0))
+ return gap();
+ else if ((p1 ==15) || (p2 ==15) || (p3 ==15)) return unknown(); // unknown.
+ else if ((p1 >4) || (p2 >4) || (p3 >4)) return unknown(); //unknown.
+ string strCodon="";
+ //change U --> T
+ if (p1==4) strCodon+="T";
+ else strCodon+=toupper(s[pos]);
+ if (p2==4) strCodon+="T";
+ else strCodon+=toupper(s[pos+1]);
+ if (p3==4) strCodon+="T";
+ else strCodon+=toupper(s[pos+2]);
+ //const string strCodon = s.substr(pos,3);
+ map <string,int> tmpMap=_codon2Int;
+ map <string,int>::iterator it1;
+ it1=tmpMap.find(strCodon);
+ if (it1==tmpMap.end()){
+
+ string err="error in codon::fromChar cannot find codon "+strCodon;
+ errorMsg::reportError(err);
+ }
+ return tmpMap[strCodon];
+}
+
+vector<int> codon::fromString(const string &str) const {
+ vector<int> vec;
+ if (str.size()%3!=0) {
+ errorMsg::reportError("error in function codon::fromString. String length should be a multiplication of 3");
+ }
+ for (int i=0;i<str.size();i+=3)
+ vec.push_back(fromChar(str,i));
+ return vec;
+}
+
+string codon::fromInt(const int in_id) const{
+ if (in_id == unknown())
+ return "XXX";
+ if (in_id == gap())
+ return "---";
+ map <string, int> tmpMap = _codon2Int;
+ map <string, int>::iterator it=tmpMap.begin();
+ while (it!=tmpMap.end()){
+ if ((*it).second==in_id){
+ return (*it).first;
+ }
+ it++;
+ }
+ string err="error in function codon::fromInt: no codon found for the integer";
+ errorMsg::reportError(err);
+ return (string("we should never get here - the reportError above will exit"));
+}
+
+codonUtility::replacementType codonUtility::codonReplacement(const int c1, const int c2, const codon &cod){
+ if (c1 == c2) return codonUtility::sameCodon;
+ else if (codonUtility::aaOf(c1,cod) == codonUtility::aaOf(c2,cod)) return codonUtility::synonymous;
+ return codonUtility::non_synonymous;
+}
+
+int codonUtility::aaOf(const int c1, const codon &cod){
+ amino a;
+ if (c1==cod.gap())
+ return a.gap();
+ if (c1==cod.unknown())
+ return a.unknown();
+ string strCodon=cod.fromInt(c1);
+ map <string,string> geneticCode=cod.geneticCode();
+ map <string,string>::iterator pos;
+ if ((pos=geneticCode.find(strCodon)) == geneticCode.end()){
+ string err="error in codonUtility::aaOf: cannot find codon "+strCodon;
+ errorMsg::reportError(err);
+ }
+ if (pos->second.size() > 1){
+ errorMsg::reportError("error in codonUtility::aaOf: amino acid 1 letter code > 1");
+ }
+ return a.fromChar(*pos->second.c_str());
+}
+
+
+codonUtility::diffType codonUtility::codonDiff(const int c1, const int c2, const codon &cod){
+ if (c1==c2) return codonUtility::equal;
+ nucleotide n;
+ string s1 = cod.fromInt(c1);
+ string s2 = cod.fromInt(c2);
+
+ int pos1 = n.fromChar(s1[0])+n.fromChar(s2[0]);
+ int pos2 = n.fromChar(s1[1])+n.fromChar(s2[1]);
+ int pos3 = n.fromChar(s1[2])+n.fromChar(s2[2]);
+
+ if (s1[0]!=s2[0] && s1[1]!=s2[1] && s1[2]!=s2[2])
+ return codonUtility::threesub;
+
+ if (s1[0]==s2[0] && s1[1]==s2[1] && s1[2]!=s2[2]) {
+ if (pos3%2==0) return codonUtility::tr;
+ else return codonUtility::tv;
+ }
+ if (s1[1]==s2[1] && s1[2]==s2[2] && s1[0]!=s2[0]) {
+ if (pos1%2==0) return codonUtility::tr;
+ else return codonUtility::tv;
+ }
+ if (s1[0]==s2[0] && s1[2]==s2[2] && s1[1]!=s2[1]) {
+ if (pos2%2==0) return codonUtility::tr;
+ else return codonUtility::tv;
+ }
+
+ if (s1[0]==s2[0] && pos2%2==0 && pos3%2==0)
+ return codonUtility::twoTrs;
+ if (s1[1]==s2[1] && pos1%2==0 && pos3%2==0)
+ return codonUtility::twoTrs;
+ if (s1[2]==s2[2] && pos1%2==0 && pos2%2==0)
+ return codonUtility::twoTrs;
+
+ if (s1[0]==s2[0] && pos2%2!=0 && pos3%2!=0)
+ return codonUtility::twoTvs;
+ if (s1[1]==s2[1] && pos1%2!=0 && pos3%2!=0)
+ return codonUtility::twoTvs;
+ if (s1[2]==s2[2] && pos1%2!=0 && pos2%2!=0)
+ return codonUtility::twoTvs;
+
+ return codonUtility::trtv;
+}
+
+
+//return the place (0, 1, or 2) that the two codons are different
+//and the identity of the different nucleotide in the target codon.
+//For example, nucDiffPlace(ATG, ACG) retruns C2
+codonUtility::nucDiffPlaceType codonUtility::nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod){
+ if (fromCodon == targetCodon)
+ return codonUtility::EQUAL;
+
+ codonUtility::nucDiffPlaceType res = A1;
+ nucleotide nuc;
+ string s1 = cod.fromInt(fromCodon);
+ string s2 = cod.fromInt(targetCodon);
+
+ int diffNum = 0;
+ if (s1[0] != s2[0]){
+ ++diffNum;
+ switch (s2[0])
+ {
+ case 'A': res = A1;
+ break;
+ case 'C': res = C1;
+ break;
+ case 'G': res = G1;
+ break;
+ case 'T': res = T1;
+ break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucDiffPlace.");
+ break;
+ }
+ }
+ if (s1[1] != s2[1]){
+ ++diffNum;
+ switch (s2[1])
+ {
+ case 'A': res = A2;
+ break;
+ case 'C': res = C2;
+ break;
+ case 'G': res = G2;
+ break;
+ case 'T': res = T2;
+ break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucDiffPlace.");
+ break;
+ }
+ }
+ if (s1[2] != s2[2]){
+ ++diffNum;
+ switch (s2[2])
+ {
+ case 'A': res = A3;
+ break;
+ case 'C': res = C3;
+ break;
+ case 'G': res = G3;
+ break;
+ case 'T': res = T3;
+ break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucDiffPlace.");
+ break;
+ }
+ }
+ if (diffNum == 0)
+ errorMsg::reportError("error in codonUtility::nucDiffPlace. Can't find different nucleotide");
+ if (diffNum > 1)
+ res = MUL_SUB;
+ return res;
+}
+
+//return the different nucleotides between the fron and target codons.
+//For example, nucsPlace(ATG, ACG) retruns TC
+codonUtility::nucsDiffType codonUtility::nucsDiff(const int fromCodon, const int targetCodon, const codon &cod){
+ if (fromCodon == targetCodon)
+ return codonUtility::SAME;
+
+ codonUtility::nucsDiffType res = AC;
+ nucleotide nuc;
+ string s1 = cod.fromInt(fromCodon);
+ string s2 = cod.fromInt(targetCodon);
+
+ int diffNum = 0;
+ int from = 0;
+ int to = 0;
+ if (s1[0] != s2[0])
+ {
+ ++diffNum;
+ from = s1[0];
+ to = s2[0];
+ }
+ if (s1[1] != s2[1])
+ {
+ ++diffNum;
+ from = s1[1];
+ to = s2[1];
+ }
+ if (s1[2] != s2[2])
+ {
+ ++diffNum;
+ from = s1[2];
+ to = s2[2];
+ }
+ switch(from)
+ {
+ case 'A':
+ switch(to)
+ {
+ case 'G':res = AG;break;
+ case 'T':res = AT;break;
+ case 'C':res = AC;break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucsDiff.");
+ break;
+ }
+ break;
+ case 'G':
+ switch(to)
+ {
+ case 'A':res = AG;break;
+ case 'T':res = GT;break;
+ case 'C':res = CG;break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucsDiff.");
+ break;
+ }
+ break;
+ case 'C':
+ switch(to)
+ {
+ case 'G':res = CG;break;
+ case 'T':res = CT;break;
+ case 'A':res = AC;break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucsDiff.");
+ break;
+ }
+ break;
+ case 'T':
+ switch(to)
+ {
+ case 'G':res = GT;break;
+ case 'A':res = AT;break;
+ case 'C':res = CT;break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucsDiff.");
+ break;
+ }
+ break;
+ default:
+ errorMsg::reportError("error in codonUtility::nucsDiff.");
+ break;
+ }
+
+ if (diffNum == 0)
+ errorMsg::reportError("error in codonUtility::nucsDiff. Can't find different nucleotide");
+ if (diffNum > 1)
+ res = DIFF;
+ return res;
+}
+
+
+
+void codonUtility::initSubMatrices(const codon& cod){
+
+ if ((_trtvDiff.size() == cod.size()) && (_synNonsynDiff.size() == cod.size()) && (_nucDiffPlace.size() == cod.size()) && (_nucsDiff.size() == cod.size()))
+ return;
+
+ _trtvDiff.resize(cod.size());
+ _synNonsynDiff.resize(cod.size());
+ _nucDiffPlace.resize(cod.size());
+ _nucsDiff.resize(cod.size());
+ for (int i = 0; i < _trtvDiff.size(); ++i)
+ {
+ _trtvDiff[i].resize(cod.size());
+ _synNonsynDiff[i].resize(cod.size());
+ _nucDiffPlace[i].resize(cod.size());
+ _nucsDiff[i].resize(cod.size());
+
+ }
+ //resizeMatrix<diffType>(_trtvDiff, cod.size(), cod.size());
+ //resizeMatrix<replacementType>(_synNonsynDiff, cod.size(), cod.size());
+ //resizeMatrix<nucDiffPlaceType>(_nucDiffPlace, cod.size(), cod.size());
+ for (int i = 0; i < cod.size(); ++i){
+ for (int j =0; j <= i; ++j){
+ _trtvDiff[i][j] = _trtvDiff[j][i] = codonDiff(i, j, cod);
+ _synNonsynDiff[i][j] = _synNonsynDiff[j][i] = codonReplacement(i, j, cod);
+ _nucDiffPlace[i][j] = nucDiffPlace(i, j, cod);
+ _nucDiffPlace[j][i] = nucDiffPlace(j, i, cod);
+ _nucsDiff[i][j] = nucsDiff(i,j,cod);
+ _nucsDiff[j][i] = nucsDiff(j,i,cod);
+ }
+ }
+}
+
+//returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container
+void codonUtility::getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage)
+{
+ if (sc.getAlphabet()->size() != 61)
+ errorMsg::reportError("cannot calculate codon usage when alphabet is not codon");
+ codonCounter.resize(61, 0);
+ codonUsage.resize(61, 0.0);
+ codon alph;
+ int sum = 0;
+ for (int s = 0; s < sc.numberOfSeqs();++s) {
+ int id = sc.placeToId(s);
+ for (int pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ int cod = sc[id][pos];
+ if (alph.isSpecific(cod))
+ {
+ ++sum;
+ ++codonCounter[cod];
+ }
+ }
+ }
+
+ for (int c = 0; c < codonCounter.size(); ++c)
+ codonUsage[c] = static_cast<MDOUBLE>(codonCounter[c]) / sum;
+}
+
+
+//in codonUsageFile: only 3-letter-codon and frequency seperated by "\t"
+void codonUtility::readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &alph)
+{
+ codonUsage.resize(alph.size(), 0.0);
+ ifstream inFile(codonUsageFileName.c_str());
+ vector<string> inFileData;
+ putFileIntoVectorStringArray(inFile, inFileData);
+ inFile.close();
+ if (inFileData.empty()){
+ errorMsg::reportError("unable to open file, or file is empty in codonUtility::readCodonUsage");
+ }
+
+ vector<string>::const_iterator it = inFileData.begin();
+ for (; it!= inFileData.end(); ++it)
+ {
+ if (it->empty()) //empty line
+ continue;
+ int endCodon = it->find_first_of("\t", 0);
+ int startFreq = it->find_first_not_of("\t ", endCodon);
+ if (startFreq>0)
+ {
+ string codonStr = it->substr(0, endCodon);
+ string freqStr = it->substr(startFreq);
+ MDOUBLE freq = string2double(freqStr);
+ if(freq == 0.0) freq = EPSILON;
+ codonUsage[alph.fromChar(codonStr, 0)] = freq;
+ }
+ }
+}
+
+//calculates the CAI for the whole MSA and for each position.
+//The calculation is based on a pre-calculated codonUsage vector.
+//The calculation is based on Sharp & Li (1987) NAR, 15:1281-1295
+MDOUBLE codonUtility::calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site)
+{
+ //the returned value: calculated as the average CAI for the MSA, rather than the geometrical mean as in Sharp & Li
+ MDOUBLE wholeAlignmentCai = 0.0;
+ codon alph;
+ amino am;
+ //1. calculate Wk = the frequency of codon k relative to the frequency of the optimal codon for that amino acid.
+ Vdouble Wk(codonUsage.size(), 0.0);
+ int aaId;
+ for (aaId = 0; aaId < am.size(); ++aaId)
+ {
+ Vint codonsOfAa = aminoUtility::codonOf(aaId, alph);
+ //finding the most frequent codon for this aa
+ MDOUBLE mostFrequent = 0.0;
+ Vint::const_iterator iter;
+ for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter)
+ {
+ if (codonUsage[*iter] > mostFrequent)
+ mostFrequent = codonUsage[*iter];
+ }
+
+ //calculating Wk
+ for (iter = codonsOfAa.begin(); iter != codonsOfAa.end(); ++iter)
+ Wk[*iter] = codonUsage[*iter] / mostFrequent;
+ }
+
+ //2. calculate CAI
+ cai4site.resize(sc.seqLen(), 0.0);
+ int pos;
+ for (pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ MDOUBLE cai = 0.0;
+ int informativeCodons = 0;
+ for (int s = 0; s < sc.numberOfSeqs();++s)
+ {
+ int id = sc.placeToId(s);
+ int cod = sc[id][pos];
+ if(!alph.isSpecific(cod))
+ continue;
+ cai += Wk[cod];
+ ++informativeCodons;
+ }
+
+ cai /= static_cast<MDOUBLE>(informativeCodons);
+ cai4site[pos] = cai;
+ wholeAlignmentCai += cai;
+ }
+ return wholeAlignmentCai;
+}
+
+
+
+bool codon::isStopCodon(const int in_id) const
+{
+ if (in_id == unknown()) return false;
+ if (in_id == gap()) return false;
+ if ((in_id >= 0 ) && (in_id < _alphabetSize)) return false;
+ return true;
+}
+
+bool codon::isInitiationCodon(const int in_id) const
+{
+ bool result = true;
+ map <int,string>::const_iterator itr = _initiationIndex2codon.find(in_id);
+ if(itr == _initiationIndex2codon.end()){
+ result = false;
+ }
+ return result;
+}
+
+
diff --git a/libs/phylogeny/codon.h b/libs/phylogeny/codon.h
new file mode 100644
index 0000000..56e478f
--- /dev/null
+++ b/libs/phylogeny/codon.h
@@ -0,0 +1,107 @@
+// $Id: codon.h 5975 2009-03-17 08:00:37Z rubi $
+#ifndef ____CODON
+#define ____CODON
+
+#include <cassert>
+#include "definitions.h"
+#include "errorMsg.h"
+#include "someUtil.h"
+#include "alphabet.h"
+#include "geneticCodeHolder.h"
+#include <map>
+class codon;
+
+class sequenceContainer;
+class codonUtility {
+public:
+ enum diffType {equal =0, tr, tv, twoTrs, twoTvs ,trtv, threesub};
+ static diffType codonDiff(const int c1, const int c2, const codon &cod);
+ static diffType codonDiff(const int c1, const int c2) {return _trtvDiff[c1][c2];}
+
+ enum replacementType {sameCodon=0, synonymous, non_synonymous};
+ static replacementType codonReplacement(const int c1, const int c2, const codon &cod);
+ static replacementType codonReplacement(const int c1, const int c2) {return _synNonsynDiff[c1][c2];}
+
+ enum nucDiffPlaceType {A1=0, A2, A3,C1, C2, C3, G1,G2,G3,T1,T2,T3, EQUAL, MUL_SUB};
+ static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon, const codon &cod);
+ static nucDiffPlaceType nucDiffPlace(const int fromCodon, const int targetCodon) {return _nucDiffPlace[fromCodon][targetCodon];}
+
+ enum nucsDiffType {AC=0, AG, AT, CG, CT, GT, SAME, DIFF}; //The difference between two codons: For exampe nucsDiff(ACT, ACG) returns GT. DIFF = more than one change.
+ static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon, const codon &cod);
+ static nucsDiffType nucsDiff(const int fromCodon, const int targetCodon) {return _nucsDiff[fromCodon][targetCodon];}
+
+ static int aaOf(const int c1, const codon &cod);
+ static void initSubMatrices(const codon& cod);
+
+ //returns the number (codonCounter) and frequency (codonUsage) of each codon in the sequnece container
+ static void getCodonUsage(const sequenceContainer& sc, Vint& codonCounter, Vdouble& codonUsage);
+ static void readCodonUsage(const string& codonUsageFileName, Vdouble& codonUsage,const codon &inCodonAlpa);
+ //calculates the CAI for the whole MSA and for each position.
+ //The calculation is based on a pre-calculated codonUsage vector.
+ static MDOUBLE calcCodonAdaptationIndex(const sequenceContainer& sc, const Vdouble& codonUsage, Vdouble& cai4site);
+
+private:
+ static vector<vector<diffType> > _trtvDiff;
+ static vector<vector<replacementType> > _synNonsynDiff;
+ static vector<vector<nucDiffPlaceType> > _nucDiffPlace;
+ static vector<vector<nucsDiffType> > _nucsDiff;
+};
+
+
+class codon : public alphabet {
+public:
+ explicit codon(); //default constructor: reads "nuclearCode.txt"
+ explicit codon(const geneticCodeString& matrixFileString);
+ virtual ~codon() {}
+ // explicit codon( codon& other);
+ codon& operator=(const codon& other);
+ virtual alphabet* clone() const { return new codon(*this); }
+ void readMatrixFromFile(const string& matrixFileName);
+ const map <string,string> & geneticCode()const {return _geneticCode;}
+ int unknown() const {return 64;}
+ int gap() const {return -1;}
+ int size() const {return _alphabetSize;} // 3 stop codon excluded
+ int stringSize() const {return 3;} // 3 letter code.
+ vector<int> fromString(const string& str) const;
+ bool isStopCodon(const int in_id) const;
+ bool isStopCodon(const string& str) const {return isStopCodon(fromChar(str));};
+ bool isInitiationCodon(const int in_id) const;
+ bool isInitiationCodon(const string& str) const {return isInitiationCodon(fromChar(str));};
+ int fromChar(const string& s, const int pos=0) const;
+ string fromInt(const int in_id) const;
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ bool isSpecific(const int id) const {return (id>=0 && id < size());}
+
+
+
+ int relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == -1) {
+ errorMsg::reportError("gaps in the sequences. Either change gaps to ? or remove gap positions");
+ }
+ else if (charInSeq == unknown()) return 1;
+ else if (charInSeq == charToCheck) return 1;
+ if (charInSeq >= _alphabetSize)
+ {
+ string err= "";
+ err+="charInSeq = ";
+ err += int2string(charInSeq);
+ err+= " _alphabetSize = ";
+ err+=int2string(_alphabetSize);
+ errorMsg::reportError(err);
+ }
+ assert(charInSeq < _alphabetSize);
+ return 0;
+ }
+private:
+ void init(const geneticCodeString& matrixFileString);
+private:
+ map <string,string> _geneticCode; //key - codon, value - amino acid
+ map <string,int> _codon2Int;//key string of codon int= integer value of codon
+ map <int,string> _initiationIndex2codon;//key: integer value of codon; value: string of initiation codon. the keys is an integer so that the value of the init codon can be found
+ int _alphabetSize;
+};
+
+
+
+
+#endif
diff --git a/libs/phylogeny/codonJC.cpp b/libs/phylogeny/codonJC.cpp
new file mode 100644
index 0000000..7304a0c
--- /dev/null
+++ b/libs/phylogeny/codonJC.cpp
@@ -0,0 +1,6 @@
+// $Id: codonJC.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "codonJC.h"
+
+
+
diff --git a/libs/phylogeny/codonJC.h b/libs/phylogeny/codonJC.h
new file mode 100644
index 0000000..d4d413d
--- /dev/null
+++ b/libs/phylogeny/codonJC.h
@@ -0,0 +1,47 @@
+// $Id: codonJC.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___CODON_JC
+#define ___CODON_JC
+
+#include "replacementModel.h"
+#include <cmath>
+using namespace std;
+
+namespace codonDef {
+ const MDOUBLE Alp = 61.0;
+ const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
+ const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
+ const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
+ const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
+}
+
+class codonJC : public replacementModel {
+public:
+
+ virtual replacementModel* clone() const { return new codonJC(*this); }// see note down:
+ const int alphabetSize() const {return 61;}
+
+ explicit codonJC(){};
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+ return ((i==j) ? codonDef::odAl+codonDef::om_odAl*exp(codonDef::m_alDiv_omalp*d): codonDef::odAl-codonDef::odAl*exp(codonDef::m_alDiv_omalp*d));
+ }
+
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ return ((i==j) ? -exp(codonDef::m_alDiv_omalp*d): exp(codonDef::m_alDiv_omalp*d)/(codonDef::Alp-1));
+ }
+ const MDOUBLE freq(const int i) const {return codonDef::odAl;};
+
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ return ((i==j) ? codonDef::alDiv_omalp*exp(codonDef::m_alDiv_omalp*d): codonDef::m_alDiv_omalp*exp(codonDef::m_alDiv_omalp*d));
+ }
+
+};
+
+#endif
+
+// note: according to the new C++ rules, the clone function should be like this:
+// virtual aaJC* clone() const { return new aaJC(*this); }
+// however, not all compiler support it yet. look at More Effective C++ page 126.
+
+
+
diff --git a/libs/phylogeny/codonUtils.cpp b/libs/phylogeny/codonUtils.cpp
new file mode 100644
index 0000000..555b9cc
--- /dev/null
+++ b/libs/phylogeny/codonUtils.cpp
@@ -0,0 +1,340 @@
+#include "codonUtils.h"
+#include "numRec.h"
+#include <algorithm>
+
+
+
+void printHelp(){
+
+ cout <<"+-------------------------------------------------------+"<<endl;
+ cout <<"Input:"<<endl;
+ cout <<"---------------------------------------------------------"<<endl;
+ cout <<"-i input codon-aligned sequence file "<<endl;
+ cout <<" (accepted formats: Fasta, Clustal)"<<endl;
+ cout <<"-q name of query sequence (default=1st in file)"<<endl;
+ cout <<"---------------------------------------------------------"<<endl;
+ cout <<"Advanced options:"<<endl;
+ cout <<"---------------------------------------------------------"<<endl;
+ cout <<"-u input user tree in Newick format"<<endl;
+ cout <<"-g genetic code (default: nuc. standard)"<<endl;
+ cout <<" Nuclear:"<<endl;
+ cout <<" 0: standard , 1: Blepharisma"<<endl;
+ cout <<" 2: Ciliate, 3: Euplotid"<<endl;
+ cout <<" Mitochondria:"<<endl;
+ cout <<" 4: Vertebrate, 5: Ascidian, 6: Echinoderm"<<endl;
+ cout <<" 7: Flatworm, 8: Invertebrate"<<endl;
+ cout <<" 9: Protozoan, 10: Yeast"<<endl;
+ cout <<"-m method:"<<endl;
+ cout <<" -mb bayesian (default)"<<endl;
+ cout <<" -ml maximum likelihood"<<endl;
+ cout <<"-d prior bayesian distribution"<<endl;
+ cout <<" -db beta+w (default)"<<endl;
+ cout <<" -dg gamma"<<endl;
+ cout <<"-n No. of categories for discrete distr.(default=8)"<<endl;
+ cout <<"-bn no branch length optimization"<<endl;
+ cout <<"-e epsilon for likelihood optimization (default=0.1)"<<endl;
+ cout <<" (the smaller the value, the higher the precision)"<<endl;
+ cout <<"-j number of optimization iterations (default=5)"<<endl;
+ cout <<"--------------------------------------------------------"<<endl;
+ cout <<"For fixing the parameters, or running a specific model:"<<endl;
+ cout <<"--------------------------------------------------------"<<endl;
+ cout <<"(Note that if one of the below options is not used,"<<endl;
+ cout <<"the default run will be of the M8 model)"<<endl;
+ cout <<"-w initial value of additional w category (-w1=M8a model)"<<endl;
+ cout <<"-p initial probability of beta distribution"<<endl;
+ cout <<"-a initial alpha value"<<endl;
+ cout <<"-x initial beta value"<<endl;
+ cout <<"-k initial kappa value"<<endl;
+ cout <<"-Fw fixed value of additional omega value"<<endl;
+ cout <<"-Fp fixed probability of beta distribution"<<endl;
+ cout <<"-Fa fixed alpha value"<<endl;
+ cout <<"-Fx fixed beta value"<<endl;
+ cout <<"-Fk fixed kappa"<<endl;
+ cout <<"** For the M8a model, type -w1 -Fw"<<endl;
+ cout <<"** For the M7 model, type -p1 -Fp"<<endl;
+ cout <<"--------------------------------------------------------"<<endl;
+ cout <<"Output files:"<<endl;
+ cout <<"--------------------------------------------------------"<<endl;
+ cout <<"-l log file"<<endl;
+ cout <<"-r results output file with Ka/Ks, CI and posterior"<<endl;
+ cout <<"-o output file with likelihood and optimized params"<<endl;
+ cout <<"-s Rasmol script for coloring a 3D molecule, if available"<<endl;
+ cout <<"-c output color bin file (site to color, according to web server colors)"<<endl;
+ cout <<"-t output tree file"<<endl;
+ cout <<"---------------------------------------------------------"<<endl;
+ cout <<"-h or -? or -H help"<<endl;
+ cout <<"lowercase and uppercase letters are both ok"<<endl;
+ cout <<"---------------------------------------------------------+"<<endl;
+}
+
+
+//check that the input sequences are divisable by 3
+void checkInputSeqLength(string codonFile){
+ nucleotide alph;
+ ifstream in(codonFile.c_str());
+ sequenceContainer inputSc = recognizeFormat::readUnAligned(in, &alph);
+ in.close();
+ int i;
+ for (i = 0; i < inputSc.numberOfSeqs(); ++i){
+ int seqLen = inputSc[i].seqLen();
+ if ((seqLen % 3) != 0){
+ string textToPrint = "USER ERROR: unable to read sequence: " + inputSc[i].name() + "\nSequence length is not divisable by three";
+ errorMsg::reportError(textToPrint);
+ }
+ }
+}
+
+//this function convert codon sequences to amino sequences.
+sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph){
+ amino aaAlph;
+ sequenceContainer aaSc;
+ for (int i = 0; i < codonSc.numberOfSeqs(); ++i){
+ sequence codonSeq = codonSc[i];
+ sequence aaSeq("", codonSeq.name(), codonSeq .remark(), codonSeq.id(), &aaAlph);
+ for (int pos = 0; pos < codonSeq .seqLen(); ++pos)
+ aaSeq.push_back(codonUtility::aaOf(codonSeq[pos],*codonAlph));
+ aaSc.add(aaSeq);
+ }
+ if (codonSc.numberOfSeqs() != aaSc.numberOfSeqs())
+ errorMsg::reportError("RevTrans: number of codon and Amino sequences is not the same");
+
+ return aaSc;
+}
+
+// normalize the Q matrix so average rate of substitution = 1
+void normalizeMatrices(vector<stochasticProcess> & spVec,const distribution * forceDistr){
+ MDOUBLE sumPijQij=0.0;
+ int categor;
+ for ( categor=0; categor<forceDistr->categories();categor++)
+ sumPijQij+=forceDistr->ratesProb(categor)*static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij();
+ if (sumPijQij ==0){
+ errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0");
+ }
+ for (categor=0; categor<forceDistr->categories();categor++)
+ static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->norm(1/sumPijQij);
+
+}
+
+Vdouble freqCodonF3x4(const sequenceContainer &nucSc, codon * coAlph){
+ VVdouble nucFeqPos(3);
+ int pos= 0;
+ int nPos = 0;
+ for (nPos=0;nPos<3;nPos++)
+ nucFeqPos[nPos].resize(nucSc.alphabetSize(),0.0);
+
+ sequenceContainer::constTaxaIterator tIt;
+ sequenceContainer::constTaxaIterator tItEnd;
+ tIt.begin(nucSc);
+ tItEnd.end(nucSc);
+ while (tIt!= tItEnd) {
+ pos = 0;
+ sequence::constIterator sIt;
+ sequence::constIterator sItEnd;
+ sIt.begin(*tIt);
+ sItEnd.end(*tIt);
+ while (sIt != sItEnd) {
+ if ((*sIt >= 0) && (*sIt <nucFeqPos[pos%3].size())) ++nucFeqPos[pos%3][(*sIt)];
+ if (*sIt == 4) ++nucFeqPos[pos%3][3]; //for T (4) to U (3)
+ ++sIt;
+ ++pos;
+ }
+ ++tIt;
+ }
+ for (nPos=0;nPos<3;nPos++)
+ changeCountsToFreqs(nucFeqPos[nPos]);
+
+
+ Vdouble freqCodon(coAlph->size(),0.0);
+
+ nucleotide n;
+ for (int c = 0; c<freqCodon.size();c++){
+
+ string s = coAlph->fromInt(c);
+ int nuc0 = n.fromChar(s[0]);
+ int nuc1 = n.fromChar(s[1]);
+ int nuc2 = n.fromChar(s[2]);
+ freqCodon[c] = nucFeqPos[0][nuc0]*nucFeqPos[1][nuc1]*nucFeqPos[2][nuc2];
+ }
+
+ MDOUBLE sum=0;
+ for (int i=0;i<coAlph->size();i++){
+ sum+=freqCodon[i];
+ }
+ MDOUBLE stopFreq = 1.0 - sum;
+ MDOUBLE ep = stopFreq/coAlph->size();
+ for (int i=0;i<coAlph->size();i++){
+ freqCodon[i]+=ep;
+ }
+
+ return freqCodon;
+
+
+}
+
+
+/***********************************************
+ The following functions are useful for the selecton server, for creating a
+ Rasmol script and for setting the color value of each site
+ ***********************************************/
+
+
+// Positive significant in color dark yellow, non-sig. positive selection - light yellow.
+// Purifying selection in shades of bordeaux
+vector<vector<int> > create7ColorValues(){
+ vector<vector<int> > colorsValue;
+ colorsValue.resize(7);
+ for (int i=0;i<7;i++)
+ colorsValue[i].resize(3);
+ // RGB values of the differnt color bins
+ colorsValue[0][0] = 255; //yellow positive significant
+ colorsValue[0][1] = 220 ;
+ colorsValue[0][2] = 0;
+
+ colorsValue[1][0] =255 ; //light yellow - not significant positive selection
+ colorsValue[1][1] = 255;
+ colorsValue[1][2] = 120;
+
+ //three categories of not significant negative selection according to bordeaux shades (colors like conseq/consurf)
+
+ colorsValue[2][0] = 255; //white
+ colorsValue[2][1] = 255;
+ colorsValue[2][2] = 255;
+
+ colorsValue[3][0] = 252;
+ colorsValue[3][1] = 237;
+ colorsValue[3][2] = 244;
+
+ colorsValue[4][0] = 250;
+ colorsValue[4][1] = 201;
+ colorsValue[4][2] = 222;
+
+ colorsValue[5][0] = 240;
+ colorsValue[5][1] = 125;
+ colorsValue[5][2] = 171;
+
+ //significant negative selection
+ colorsValue[6][0] = 130;
+ colorsValue[6][1] = 67;
+ colorsValue[6][2] = 96;
+
+ return colorsValue;
+}
+
+//this functions creates a rasmol script (assumes positions are the same between the alignment and the PDB)
+void outToRasmolFile(string fileName,vector<int>& color4Site){
+ ofstream out(fileName.c_str());
+ vector<vector<int> > colorsValue = create7ColorValues();
+ int numberOfColor = colorsValue.size();
+ vector<vector<int> > colors; //for each color (1-9/3) holds vector of sites.
+ colors.resize(numberOfColor+1);
+ int i;
+ for (i=0;i<color4Site.size();i++){
+ int color=color4Site[i];
+ if (color>numberOfColor){
+ errorMsg::reportError("Error in outToColorFile - unknown color");
+ }
+ colors[color].push_back(i+1); //add site (position in the vector +1)
+ }
+ out<<"select all"<<endl;
+ out<<"color [200,200,200]"<<endl<<endl;
+
+ for (int c=1;c<numberOfColor+1;c++){
+ out<<"select ";
+ for (i=0;i<colors[c].size();i++){
+ if (i==0)
+ out<<colors[c][i];
+ else if ((i+1)%6==0)
+ out<<endl<<"select selected or "<<colors[c][i];
+
+ else out<<" , "<<colors[c][i];
+ }
+ out<<endl<<"select selected and :a"<<endl;
+ out<<"color [" <<colorsValue[c-1][0]<<","<<colorsValue[c-1][1]<<","<<colorsValue[c-1][2]<<"]"<<endl;
+ out<<"spacefill"<<endl<<endl;
+ }
+
+ out.close();
+}
+
+
+// a file with color-coding from Ka/Ks values to color-bins
+void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV,
+ const sequence & refSeq, string fileName,codon *co) {
+ vector<int> colors;
+ int numOfSitesinAln = kaksVec.size();
+ Vdouble negativesKaksVec,negativesSite;
+ negativesKaksVec.clear();
+ negativesSite.clear();
+ int i,gapsInRefSeq=0;
+
+ for (i=0;i<numOfSitesinAln;i++){
+ if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++;
+ }
+
+ // first dealing with positive selection
+ colors.resize(numOfSitesinAln-gapsInRefSeq);
+ int gap=0;
+ for (i=0;i<numOfSitesinAln;i++){
+ if (codonUtility::aaOf(refSeq[i],*co) == -1){
+ gap++;
+ continue;
+ }
+ if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1
+ colors[i-gap]=1;
+ else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant"
+ colors[i-gap]=2;
+ else {
+ negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1
+ negativesSite.push_back(i-gap); //add the number of site of the kaks
+ }
+
+ }
+
+ // now dealing with purifying selection
+ Vdouble orderVec = negativesKaksVec;
+ if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly)
+ sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups
+ MDOUBLE percentileNum = 5.0;
+ int percentileNumInt = 5;
+ Vdouble maxScoreForPercentile(percentileNumInt);
+ if (orderVec.size()>0) {
+ maxScoreForPercentile[0] = orderVec[0];
+ for (int c = 1; c < percentileNumInt; ++c){
+ int place = (int)((c / percentileNum) * negativesKaksVec.size());
+ MDOUBLE maxScore = orderVec[place];
+ maxScoreForPercentile[c] = maxScore;
+ }
+ }
+
+ //loop over all the Ka/Ks < 1
+ for (int j=0; j < negativesKaksVec.size(); ++j){
+ MDOUBLE r = negativesKaksVec[j]; //the kaks of the site.
+ int s = (int)negativesSite[j]; //the site.
+ if (r > maxScoreForPercentile[4])
+ colors[s] = 3;
+ else if (r > maxScoreForPercentile[3])
+ colors[s] = 4;
+ else if (r> maxScoreForPercentile[2])
+ colors[s] = 5;
+ else if (r > maxScoreForPercentile[1])
+ colors[s] = 6;
+ else if (r >= maxScoreForPercentile[0])
+ colors[s] = 7;
+ }
+ //print to file
+ ofstream out(fileName.c_str());
+ gap=0;
+ amino aminoAcid;
+ LOG(5,<<"Printing selection color bins to file"<<endl);
+ for (i=0;i<refSeq.seqLen();i++){
+ int aa = codonUtility::aaOf(refSeq[i], *co);
+ if (aa==-1){
+ gap++;
+ continue;
+ }
+ string aaStr = aminoAcid.fromInt(aa);
+ out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap];
+ out<<endl;
+ }
+ out.close();
+}
diff --git a/libs/phylogeny/codonUtils.h b/libs/phylogeny/codonUtils.h
new file mode 100644
index 0000000..63f3abc
--- /dev/null
+++ b/libs/phylogeny/codonUtils.h
@@ -0,0 +1,36 @@
+#ifndef CODON_UTILS_H
+#define CODON_UTILS_H
+
+#include <iostream>
+#include "nucleotide.h"
+#include "codon.h"
+#include "amino.h"
+#include "logFile.h"
+#include "fastaFormat.h"
+#include "clustalFormat.h"
+#include "recognizeFormat.h"
+#include "someUtil.h"
+#include "definitions.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "wYangModel.h"
+#include "evaluateCharacterFreq.h"
+#include "geneticCodeHolder.h"
+#include "codon.h"
+using namespace std;
+
+
+void printHelp();
+void checkInputSeqLength(string codonFile);
+sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph);
+vector<vector<int> > create7ColorValues();
+void outToRasmolFile(string fileName,vector<int>& color4Site);
+
+void normalizeMatrices(vector<stochasticProcess> & spVec,const distribution * forceDistr);
+
+Vdouble freqCodonF3x4(const sequenceContainer &nucSc,codon *coAlph);
+
+void kaks2Color(const Vdouble & kaksVec,const Vdouble &lowerBoundV,
+ const sequence & refSeq, string fileName,codon *co);
+
+#endif
diff --git a/libs/phylogeny/computeCounts.cpp b/libs/phylogeny/computeCounts.cpp
new file mode 100644
index 0000000..a56fafb
--- /dev/null
+++ b/libs/phylogeny/computeCounts.cpp
@@ -0,0 +1,142 @@
+// $Id: computeCounts.cpp 4583 2008-08-05 15:02:26Z cohenofi $
+
+#include "computeCounts.h"
+void computeCounts::computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
+ const computePijHom& pi,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ const MDOUBLE weight,
+ const doubleRep posProb,
+ const tree::nodeP nodeSon,
+ countTableComponentHom& _ctc,
+ const MDOUBLE rateCategorProb
+ )
+{
+ assert(posProb>0.0);
+ if (weight == 0) return;
+ int alph1,alph2;
+ for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
+ for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
+ doubleRep tmp = cup.get(nodeSon->id(),alph1) *
+ cdown.get(nodeSon->id(),alph2) *
+ pi.getPij(nodeSon->id(),alph1,alph2)*
+ sp.freq(alph1)
+ * rateCategorProb
+ /
+ posProb;
+ _ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
+ }
+ }
+}
+
+void computeCounts::computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
+ const computePijHom& pi,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ const MDOUBLE weight,
+ const doubleRep posProb,
+ const tree::nodeP nodeSon,
+ countTableComponentHom& _ctc,
+ const MDOUBLE rateCategorProb,
+ const int letterInRoot
+ )
+{
+ assert(posProb>0.0);
+ if (weight == 0) return;
+ int alph1,alph2;
+ for (alph1 =0; alph1< pi.alphabetSize(); ++alph1) {
+ for (alph2 =0; alph2< pi.alphabetSize(); ++alph2) {
+ doubleRep tmp = cup.get(nodeSon->id(),alph1) *
+ cdown.get(nodeSon->id(),alph2) * // down was given with specific root
+ pi.getPij(nodeSon->id(),alph1,alph2)*
+ sp.freq(letterInRoot) // fixed root
+ * rateCategorProb
+ * sp.freq(letterInRoot) // to account for the additional letterAtRoot loop
+ /posProb;
+ _ctc.addToCounts(alph1,alph2,convert(tmp)*weight);
+ }
+ }
+}
+
+
+
+void computeCounts::fillCountTableComponentGam(countTableComponentGam& ctcGam,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijGam& pij0,
+ const suffStatGlobalGam& cup,
+ const suffStatGlobalGam& cdown,
+ const Vdouble * weights,
+ tree::nodeP nodeSon,
+ const VdoubleRep& posProbVec) {
+ ctcGam.countTableComponentAllocatePlace(sp.alphabetSize(),sp.categories());
+ for (int rateCat =0; rateCat< sp.categories(); ++ rateCat) {
+ fillCountTableComponentGamSpecRateCategor(rateCat,ctcGam[rateCat],sp,
+ sc,pij0[rateCat],
+ cup,cdown,weights,posProbVec,nodeSon);
+ }
+}
+
+void computeCounts::fillCountTableComponentGamSpecRateCategor(const int rateCategor,
+ countTableComponentHom& ctcHom,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijHom& pi,
+ const suffStatGlobalGam& cup,
+ const suffStatGlobalGam& cdown,
+ const Vdouble * weights,
+ const VdoubleRep& posProbVec, //prob of the position with gamma
+ tree::nodeP nodeSon) {
+ computeCounts cc;
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ MDOUBLE weig = (weights ? (*weights)[pos] : 1.0);
+ cc.computeCountsNodeFatherNodeSonHomPos(sc,pi,sp,cup[pos][rateCategor],
+ cdown[pos][rateCategor],
+ weig,posProbVec[pos],nodeSon,
+ ctcHom,sp.ratesProb(rateCategor));
+ }
+}
+/*
+void computeCounts::computeCountsNodeXNodeYHomPos(
+ const tree::nodeP nodeX,
+ const tree::nodeP nodeY) {
+
+ const tree::nodeP nodeFather = nodeSon->father();
+ _ctc.zero();
+ if (_weight!=NULL) { // this is one of the MAIN LOOPS. no "if"s deep inside it!
+ for (int pos=0; pos< _pi.seqLen(); ++pos) {
+ if ((*_weight)[pos] == 0) continue;
+ for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) {
+ for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) {
+ for (int rate =0; rate< _pi.categories(); ++rate) {
+ MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) *
+ _cdown.get(nodeSon->id(),pos,rate,alph2) *
+ _pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)*
+ _pi.stocProcessFromPos(pos)->freq(alph1)/
+ _cprobAtEachPos.getProb(pos);
+ _ctc.addToCounts(alph1,alph2,rate,tmp*(*_weight)[pos]);
+ }
+ }
+ }
+ }
+ }
+ else {
+ for (int pos=0; pos< _pi.seqLen(); ++pos) {
+ for (int alph1 =0; alph1< _pi.alphabetSize(); ++alph1) {
+ for (int alph2 =0; alph2< _pi.alphabetSize(); ++alph2) {
+ for (int rate =0; rate< _pi.categories(); ++rate) {
+ MDOUBLE tmp = _cup.get(nodeSon->id(),pos,rate,alph1) *
+ _cdown.get(nodeSon->id(),pos,rate,alph2) *
+ _pi.pij(pos)->getPij(nodeSon->id(),alph1,alph2,rate)*
+ _pi.stocProcessFromPos(pos)->freq(alph1)/
+ _cprobAtEachPos.getProb(pos);
+ _ctc.addToCounts(alph1,alph2,rate,tmp);
+ }
+ }
+ }
+ }
+ }
+ */
+
diff --git a/libs/phylogeny/computeCounts.h b/libs/phylogeny/computeCounts.h
new file mode 100644
index 0000000..7de0c5e
--- /dev/null
+++ b/libs/phylogeny/computeCounts.h
@@ -0,0 +1,68 @@
+// $Id: computeCounts.h 4545 2008-07-30 18:37:25Z cohenofi $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#ifndef ___COMPUTE_COUNTS
+#define ___COMPUTE_COUNTS
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+
+// things included for the function "fillCountTableComponentGam"
+#include "sequenceContainer.h"
+
+class computeCounts {
+public:
+ explicit computeCounts() {};
+ void computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
+ const computePijHom& pi,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ const MDOUBLE weight,
+ const doubleRep posProb,
+ const tree::nodeP nodeSon,
+ countTableComponentHom& _ctc,
+ const MDOUBLE rateCategorProb = 1.0); //CODE_RED
+ void computeCountsNodeFatherNodeSonHomPos(const sequenceContainer& sc,
+ const computePijHom& pi,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ const MDOUBLE weight,
+ const doubleRep posProb,
+ const tree::nodeP nodeSon,
+ countTableComponentHom& _ctc,
+ const MDOUBLE rateCategorProb,
+ const int letterInRoot);
+
+
+
+ void fillCountTableComponentGam(countTableComponentGam& ctcGam,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijGam& pij0,
+ const suffStatGlobalGam& cup,
+ const suffStatGlobalGam& cdown,
+ const Vdouble * weights,
+ tree::nodeP nodeSon,
+ const VdoubleRep& posProbVec);
+
+ void fillCountTableComponentGamSpecRateCategor(const int rateCategor,
+ countTableComponentHom& ctcHom,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijHom& pi,
+ const suffStatGlobalGam& cup,
+ const suffStatGlobalGam& cdown,
+ const Vdouble * weights,
+ const VdoubleRep& posProbVec, //prob of the position with gamma
+ tree::nodeP nodeSon);
+};
+
+
+#endif
diff --git a/libs/phylogeny/computeDownAlg.cpp b/libs/phylogeny/computeDownAlg.cpp
new file mode 100644
index 0000000..c7dfa0b
--- /dev/null
+++ b/libs/phylogeny/computeDownAlg.cpp
@@ -0,0 +1,221 @@
+// $Id: computeDownAlg.cpp 4585 2008-08-05 15:02:58Z cohenofi $
+
+#include "definitions.h"
+#include "computeDownAlg.h"
+#include "treeIt.h"
+
+
+void computeDownAlg::fillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup){
+ ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter,letterInFather,bro,letterInSon;
+ if (mynode->father()==NULL) {// if root
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ ssc.set(mynode->id(),letter,1.0);
+ }
+ mynode = tIt.next(); //continue
+ }
+ tree::nodeP fatherNode=mynode->father();
+ const int n_bro=fatherNode->getNumberOfSons();
+ for(letter=0; letter<pi.alphabetSize();letter++) {//alpha
+ doubleRep totalProb=1.0;
+ doubleRep fatherTerm=0;
+ if (fatherNode->father()!=NULL) {
+ for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)
+ fatherTerm += pi.getPij(fatherNode->id(),letter,letterInFather)*
+ ssc.get(fatherNode->id(),letterInFather);
+ }
+ else {
+ fatherTerm=1.0;
+ }
+ doubleRep brotherTerm=1.0;
+ for(bro = 0; bro < n_bro; bro++) {
+ tree::nodeP brother = fatherNode->getSon(bro);
+ if (brother != mynode) {
+ doubleRep tmp_bro=0.0;
+ for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
+ tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
+ cup.get(brother->id(),letterInSon);
+ }
+ brotherTerm *=tmp_bro;
+ }
+ }
+ totalProb = fatherTerm * brotherTerm;
+ ssc.set(mynode->id(),letter,totalProb);
+ }
+ }
+}
+
+
+//use Pij(t) from the stochastic process instead of precomputed probabilities (via the computePijHom class)
+void computeDownAlg::fillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup){
+ ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter, letterInFather, bro, letterInSon;
+ if (mynode->isRoot()) {// if root: set all values to 1.0
+ for(letter = 0; letter < sp.alphabetSize(); letter++) {
+ ssc.set(mynode->id(), letter, 1.0);
+ }
+ mynode = tIt.next(); //continue
+ }
+ tree::nodeP fatherNode = mynode->father();
+ const int n_bro = fatherNode->getNumberOfSons();
+ for(letter = 0; letter < sp.alphabetSize(); letter++) {
+ doubleRep totalProb=1.0;
+ doubleRep fatherTerm=0;
+ if (fatherNode->isRoot())
+ {
+ fatherTerm = 1.0;
+ }
+ else
+ {
+ for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++)
+ {
+ MDOUBLE dist = fatherNode->dis2father() * sp.getGlobalRate();
+ fatherTerm += sp.Pij_t(letter, letterInFather, dist)
+ * ssc.get(fatherNode->id(), letterInFather);
+ }
+ }
+ doubleRep brotherTerm = 1.0;
+ for(bro = 0; bro < n_bro; bro++) {
+ tree::nodeP brother = fatherNode->getSon(bro);
+ if (brother != mynode) {
+ doubleRep tmp_bro=0.0;
+ for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++)
+ {
+ MDOUBLE dist = brother->dis2father() * sp.getGlobalRate();
+ tmp_bro += sp.Pij_t(letter, letterInSon, dist)
+ * cup.get(brother->id(), letterInSon);
+ }
+ brotherTerm *= tmp_bro;
+ }
+ }
+ totalProb = fatherTerm * brotherTerm;
+ ssc.set(mynode->id(), letter, totalProb);
+ }
+ }
+}
+
+
+//compute probabilities with a site-specific rate
+void computeDownAlg::fillComputeDownSpecificRate(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const MDOUBLE gRate){
+ ssc.allocatePlace(et.getNodesNum(), sp.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter, letterInFather, bro, letterInSon;
+ if (mynode->isRoot()) {// if root: set all values to 1.0
+ for(letter = 0; letter < sp.alphabetSize(); letter++) {
+ ssc.set(mynode->id(), letter, 1.0);
+ }
+ mynode = tIt.next(); //continue
+ }
+ tree::nodeP fatherNode = mynode->father();
+ const int n_bro = fatherNode->getNumberOfSons();
+ for(letter = 0; letter < sp.alphabetSize(); letter++) {
+ doubleRep totalProb=1.0;
+ doubleRep fatherTerm=0;
+ if (fatherNode->isRoot())
+ {
+ fatherTerm = 1.0;
+ }
+ else
+ {
+ for(letterInFather = 0; letterInFather < sp.alphabetSize(); letterInFather++)
+ {
+ MDOUBLE dist = fatherNode->dis2father() * gRate * sp.getGlobalRate();
+ fatherTerm += sp.Pij_t(letter, letterInFather, dist)
+ * ssc.get(fatherNode->id(), letterInFather);
+ }
+ }
+ doubleRep brotherTerm = 1.0;
+ for(bro = 0; bro < n_bro; bro++) {
+ tree::nodeP brother = fatherNode->getSon(bro);
+ if (brother != mynode) {
+ doubleRep tmp_bro=0.0;
+ for(letterInSon = 0; letterInSon < sp.alphabetSize(); letterInSon++)
+ {
+ MDOUBLE dist = brother->dis2father() * gRate * sp.getGlobalRate();
+ tmp_bro += sp.Pij_t(letter, letterInSon, dist)
+ * cup.get(brother->id(), letterInSon);
+ }
+ brotherTerm *= tmp_bro;
+ }
+ }
+ totalProb = fatherTerm * brotherTerm;
+ ssc.set(mynode->id(), letter, totalProb);
+ }
+ }
+}
+
+// The filled sscGivenRoot is using the "Gam" class (over all rate categories) for placing letter at root hidden state
+void computeDownAlg::fillComputeDownNonReversible(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalGamPos& sscGivenRoot,
+ const suffStatGlobalHomPos& cup)
+{
+ sscGivenRoot.allocatePlace(pi.alphabetSize(),et.getNodesNum(), pi.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter,letterInFather,bro,letterInSon;
+ if (mynode->father()==NULL) {//root
+ for (int letterAtRoot=0; letterAtRoot<pi.alphabetSize();letterAtRoot++){
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ MDOUBLE ind = (letterAtRoot==letter?1.0:0.0);
+ sscGivenRoot.set(letterAtRoot,mynode->id(),letter,ind);
+ }
+ }
+ mynode = tIt.next(); //continue
+ }
+ tree::nodeP fatherNode=mynode->father();
+ const int n_bro=fatherNode->getNumberOfSons();
+ for(int letterAtRoot=0; letterAtRoot<pi.alphabetSize();letterAtRoot++) {//root state
+ for(letter=0; letter<pi.alphabetSize();letter++) {//letter for current down calc (at father of node)
+ doubleRep totalProb=1.0;
+ doubleRep fatherTerm=0;
+ //down of father
+ if (fatherNode->father()!=NULL) { // not son of root
+ for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)//father of father
+ fatherTerm += pi.getPij(fatherNode->id(),letterInFather,letter)*
+ sscGivenRoot.get(letterAtRoot,fatherNode->id(),letterInFather);
+ }
+ else {//son of root
+ fatherTerm=(letterAtRoot==letter?1.0:0.0);
+ }
+ doubleRep brotherTerm=1.0;
+ for(bro = 0; bro < n_bro; bro++) {
+ tree::nodeP brother = fatherNode->getSon(bro);
+ if (brother != mynode) {
+ doubleRep tmp_bro=0.0;
+ for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
+ tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
+ cup.get(brother->id(),letterInSon);
+ }
+ brotherTerm *=tmp_bro;
+ }
+ }
+ totalProb = fatherTerm * brotherTerm;
+ sscGivenRoot.set(letterAtRoot,mynode->id(),letter,totalProb);
+ }
+ }
+ }
+ }
\ No newline at end of file
diff --git a/libs/phylogeny/computeDownAlg.h b/libs/phylogeny/computeDownAlg.h
new file mode 100644
index 0000000..258d839
--- /dev/null
+++ b/libs/phylogeny/computeDownAlg.h
@@ -0,0 +1,49 @@
+// $Id: computeDownAlg.h 3107 2007-12-27 12:38:05Z adist $
+
+#ifndef ___COMPUTE_DOWN_ALG
+#define ___COMPUTE_DOWN_ALG
+
+#include "definitions.h"
+#include "tree.h"
+#include "suffStatComponent.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+
+
+class computeDownAlg {
+public:
+ void fillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup);
+
+ void fillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup);
+
+ void fillComputeDownSpecificRate(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const MDOUBLE gRate);
+
+/** compute the down computation for a non-reversible model:
+ each down computation is conditioned on the state at the root.
+ This means that the vector field is of one additional dimension (the alphabet at the root)
+ and hence the use of the suffStatGlobalGamPos (=vector<suffStatGlobalHomPos>)
+**/
+ void fillComputeDownNonReversible(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalGamPos& sscGivenRoot,
+ const suffStatGlobalHomPos& cup);
+};
+#endif
diff --git a/libs/phylogeny/computeJumps.cpp b/libs/phylogeny/computeJumps.cpp
new file mode 100644
index 0000000..032d5f0
--- /dev/null
+++ b/libs/phylogeny/computeJumps.cpp
@@ -0,0 +1,166 @@
+#include "computeJumps.h"
+#include "talRandom.h"
+#include "someUtil.h"
+#include "matrixUtils.h"
+#include <algorithm>
+
+
+computeJumps::computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r)
+: _Lambda1(Lambda1), _Lambda2(Lambda2)
+{
+ _gFuncStart0 = gFunc(Lambda1, Lambda2, r);
+ _gFuncStart0MinusR = gFunc(Lambda1, Lambda2, -r);
+ _gFuncStart1 = gFunc(Lambda2, Lambda1, r);
+ _gFuncStart1MinusR = gFunc(Lambda2, Lambda1, -r);
+}
+computeJumps::~computeJumps()
+{
+}
+
+
+/********************************************************************************************
+getExpectation
+*********************************************************************************************/
+MDOUBLE computeJumps::getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId)
+{
+ if(fromId==0 && toId==1 && BranchLength>=0){ // Gain
+ if(terminalStart==0 && terminalEnd==1)
+ return gainExpGiven01(BranchLength);
+ if(terminalStart==0 && terminalEnd==0)
+ return gainExpGiven00(BranchLength);
+ if(terminalStart==1 && terminalEnd==1)
+ return gainExpGiven11(BranchLength);
+ else //(terminalStart==1 && terminalEnd==0)
+ return gainExpGiven10(BranchLength);
+ }
+ else
+ return 0;
+
+}
+//////////////////////////////////////////////////////////////////////////
+MDOUBLE computeJumps::gainExpGiven01(MDOUBLE BranchLength){
+ return 0.5*(m01(BranchLength) +Pij_t(0,1,BranchLength));
+}
+MDOUBLE computeJumps::gainExpGiven00(MDOUBLE BranchLength){
+ return 0.5*(m00(BranchLength));
+}
+MDOUBLE computeJumps::gainExpGiven11(MDOUBLE BranchLength){
+ return 0.5*(m11(BranchLength) ); //???
+}
+MDOUBLE computeJumps::gainExpGiven10(MDOUBLE BranchLength){
+ return 0.5*(m10(BranchLength) ); //???
+}
+
+
+MDOUBLE computeJumps::lossExpGiven01(MDOUBLE BranchLength){
+ return 0.5*(m01(BranchLength) ); //???
+}
+MDOUBLE computeJumps::lossExpGiven00(MDOUBLE BranchLength){
+ return 0.5*(m11(BranchLength) ); //???
+}
+MDOUBLE computeJumps::lossExpGiven11(MDOUBLE BranchLength){
+ return 0.5*(m11(BranchLength) ); //???
+}
+MDOUBLE computeJumps::lossExpGiven10(MDOUBLE BranchLength){
+ return 0.5*(m10(BranchLength) + Pij_t(1,0,BranchLength) ); //???
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+MDOUBLE computeJumps::m01(MDOUBLE BranchLength){
+ return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) - _gFuncStart0MinusR.gFunc_dr(BranchLength));
+}
+MDOUBLE computeJumps::m00(MDOUBLE BranchLength){
+ return 0.5 *( _gFuncStart0.gFunc_dr(BranchLength) + _gFuncStart0MinusR.gFunc_dr(BranchLength));
+}
+MDOUBLE computeJumps::m11(MDOUBLE BranchLength){
+ return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) - _gFuncStart1MinusR.gFunc_dr(BranchLength));
+}
+MDOUBLE computeJumps::m10(MDOUBLE BranchLength){
+ return 0.5 *( _gFuncStart1.gFunc_dr(BranchLength) + _gFuncStart1MinusR.gFunc_dr(BranchLength));
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+MDOUBLE computeJumps::gFunc_dr(MDOUBLE BranchLength){
+ return _gFuncStart0.g1Func_dr(BranchLength) + _gFuncStart0.g2Func_dr(BranchLength);
+}
+MDOUBLE computeJumps::gFunc::gFunc_dr(MDOUBLE BranchLength){
+ return g1Func_dr(BranchLength) + g2Func_dr(BranchLength);
+}
+
+
+MDOUBLE computeJumps::gFunc::g1Func_dr(MDOUBLE BranchLength){
+ return _g1Part_dr*g1Exp(BranchLength) + _g1Part*g1Exp(BranchLength)*BranchLength*_Alpha1_dr;
+}
+MDOUBLE computeJumps::gFunc::g2Func_dr(MDOUBLE BranchLength){
+ return _g2Part_dr*g2Exp(BranchLength) + _g2Part*g2Exp(BranchLength)*BranchLength*_Alpha2_dr;
+}
+//////////////////////////////////////////////////////////////////////////
+MDOUBLE computeJumps::gFunc::g1Exp(MDOUBLE BranchLength){
+ return exp(_Alpha1*BranchLength);
+}
+MDOUBLE computeJumps::gFunc::g2Exp(MDOUBLE BranchLength){
+ return exp(_Alpha2*BranchLength);
+}
+
+
+//MDOUBLE computeJumps::gainExp(MDOUBLE BranchLength,MDOUBLE prob01,MDOUBLE prob11){
+// return gainExpGiven01(BranchLength)*prob01 + gainExpGiven00(BranchLength)*prob11;
+//}
+
+
+/********************************************************************************************
+Pij_t - Based on Analytic solution
+*********************************************************************************************/
+MDOUBLE computeJumps::Pij_t(const int i,const int j, const MDOUBLE d) {
+ MDOUBLE gain = _Lambda1;
+ MDOUBLE loss = _Lambda2;
+ MDOUBLE eigenvalue = -(gain + loss);
+ bool withHGT = true;
+
+ MDOUBLE noHGTfactor = 0.0001;
+
+ VVdouble Pt;
+ int AlphaSize = 2;
+ resizeMatrix(Pt,AlphaSize,AlphaSize);
+ int caseNum = i + j*2;
+ switch (caseNum) {
+ case 0 : Pt[0][0] = loss/(-eigenvalue) + exp(eigenvalue*d)*(1 - loss/(-eigenvalue)); break;
+ case 1 : Pt[1][0] = loss/(-eigenvalue) - exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break;
+ case 2 : if(withHGT)
+ { Pt[0][1] = gain/(-eigenvalue) - exp(eigenvalue*d)*(1 - loss/(-eigenvalue));}
+ else
+ { Pt[0][1] = (gain/(-eigenvalue) - exp(eigenvalue*d)*(1 - loss/(-eigenvalue)))*noHGTfactor;} break;
+ case 3 : Pt[1][1] = gain/(-eigenvalue) + exp(eigenvalue*d)*(1 - gain/(-eigenvalue)); break;
+ }
+ MDOUBLE val = (Pt[i][j]);
+ return val;
+}
+
+
+
+/********************************************************************************************
+*********************************************************************************************/
+computeJumps::gFunc::gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r)
+: _Lambda1(Lambda1), _Lambda2(Lambda2), _r(r)
+{
+ _delta = sqrt( pow((_Lambda1+_Lambda2),2) + 4*(_r*_r - 1)*_Lambda1*_Lambda2 );
+ _delta_dr = (4*_r*_Lambda1*_Lambda2)/_delta;
+
+ _Alpha1 = 0.5*(-_Lambda1-_Lambda2 +_delta);
+ _Alpha2 = 0.5*(-_Lambda1-_Lambda2 -_delta);
+
+ _Alpha1_dr = 0.5*_delta_dr;
+ _Alpha2_dr = -0.5*_delta_dr;
+
+ _Alpha1_2 = _delta; //= _Alpha1-_Alpha2;
+ _Alpha1_2_dr = _delta_dr; //= _Alpha1_dr - _Alpha2_dr;
+
+ _g1Part = ( (_r-1)*_Lambda1 - _Alpha2)/_Alpha1_2;
+ _g2Part = (-(_r-1)*_Lambda1 + _Alpha1)/_Alpha1_2;
+
+ _g1Part_dr = ( _Alpha1_2*( _Lambda1-_Alpha2_dr) - ( (_r-1)*_Lambda1 - _Alpha2)*_Alpha1_2_dr )/(_Alpha1_2*_Alpha1_2);
+ _g2Part_dr = ( _Alpha1_2*(-_Lambda1+_Alpha1_dr) - (-(_r-1)*_Lambda1 + _Alpha1)*_Alpha1_2_dr )/(_Alpha1_2*_Alpha1_2);
+}
diff --git a/libs/phylogeny/computeJumps.h b/libs/phylogeny/computeJumps.h
new file mode 100644
index 0000000..4ed4405
--- /dev/null
+++ b/libs/phylogeny/computeJumps.h
@@ -0,0 +1,103 @@
+#ifndef ___COMPUTE_JUMPS__
+#define ___COMPUTE_JUMPS__
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "alphabet.h"
+
+#include <map>
+#include <vector>
+using namespace std;
+
+/******************************************************************
+This class compute jumps (events) by Suchard equations along differing branch lengths (according to a
+given tree), with the aim of giving the expectation of the number of jumps
+from state a to state b given that the terminal states at the end of the branch are
+x and y.
+*******************************************************************/
+
+class computeJumps {
+public:
+ computeJumps(const MDOUBLE Lambda1, const MDOUBLE Lambda2, const MDOUBLE r=1);
+ virtual ~computeJumps();
+
+ //////////////////////////////////////////////////////////////////////////
+ class gFunc {
+ public:
+ gFunc(const MDOUBLE Lambda1, const MDOUBLE Lambda2 , const MDOUBLE r);
+ gFunc(){};
+ ~gFunc(){};
+
+ MDOUBLE gFunc_dr(MDOUBLE BranchLength);
+ MDOUBLE g1Func_dr(MDOUBLE BranchLength);
+ MDOUBLE g2Func_dr(MDOUBLE BranchLength);
+
+ MDOUBLE g1Exp(MDOUBLE BranchLength);
+ MDOUBLE g2Exp(MDOUBLE BranchLength);
+
+ private:
+ MDOUBLE _r;
+ MDOUBLE _Lambda1;
+ MDOUBLE _Lambda2;
+
+ MDOUBLE _Alpha1;
+ MDOUBLE _Alpha2;
+ MDOUBLE _Alpha1_dr;
+ MDOUBLE _Alpha2_dr;
+
+ MDOUBLE _Alpha1_2;
+ MDOUBLE _Alpha1_2_dr;
+
+ MDOUBLE _delta;
+ MDOUBLE _delta_dr;
+
+ MDOUBLE _g1Part;
+ MDOUBLE _g2Part;
+ MDOUBLE _g1Part_dr;
+ MDOUBLE _g2Part_dr;
+
+ };
+ //////////////////////////////////////////////////////////////////////////
+
+ MDOUBLE getExpectation(const MDOUBLE BranchLength, int terminalStart, int terminalEnd, int fromId, int toId);
+ MDOUBLE gainExp(MDOUBLE BranchLength,MDOUBLE prob01,MDOUBLE prob11);
+
+ MDOUBLE gainExpGiven01(MDOUBLE BranchLength);
+ MDOUBLE gainExpGiven00(MDOUBLE BranchLength);
+ MDOUBLE gainExpGiven11(MDOUBLE BranchLength);
+ MDOUBLE gainExpGiven10(MDOUBLE BranchLength);
+
+ MDOUBLE lossExpGiven01(MDOUBLE BranchLength);
+ MDOUBLE lossExpGiven00(MDOUBLE BranchLength);
+ MDOUBLE lossExpGiven11(MDOUBLE BranchLength);
+ MDOUBLE lossExpGiven10(MDOUBLE BranchLength);
+
+
+ MDOUBLE gFunc_dr(MDOUBLE BranchLength);
+
+private:
+ MDOUBLE m01(MDOUBLE BranchLength);
+ MDOUBLE m00(MDOUBLE BranchLength);
+ MDOUBLE m11(MDOUBLE BranchLength);
+ MDOUBLE m10(MDOUBLE BranchLength);
+
+
+ //MDOUBLE g1Func_dr(MDOUBLE BranchLength);
+ //MDOUBLE g2Func_dr(MDOUBLE BranchLength);
+ //MDOUBLE g1Exp(MDOUBLE BranchLength);
+ //MDOUBLE g2Exp(MDOUBLE BranchLength);
+
+ MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d);
+
+
+ MDOUBLE _Lambda1;
+ MDOUBLE _Lambda2;
+ gFunc _gFuncStart0;
+ gFunc _gFuncStart0MinusR;
+ gFunc _gFuncStart1;
+ gFunc _gFuncStart1MinusR;
+
+};
+
+#endif
diff --git a/libs/phylogeny/computeMarginalAlg.cpp b/libs/phylogeny/computeMarginalAlg.cpp
new file mode 100644
index 0000000..c6682df
--- /dev/null
+++ b/libs/phylogeny/computeMarginalAlg.cpp
@@ -0,0 +1,100 @@
+// $Id: computeMarginalAlg.cpp 1735 2007-02-26 13:46:37Z itaymay $
+
+#include "definitions.h"
+#include "treeIt.h"
+#include "computeMarginalAlg.h"
+#include <iostream>
+#include <cassert>
+using namespace std;
+
+
+void computeMarginalAlg::fillComputeMarginal(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ doubleRep & posProb){
+
+ // filling the exact probs.
+ tree::nodeP mynode = NULL;
+ ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ assert (mynode != NULL);
+ int letter;
+ if (mynode->isLeaf()) {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ doubleRep val=convert(cup.get(mynode->id(),letter))?1.0:0.0;
+ ssc.set(mynode->id(),letter,val);
+ }
+ continue;
+ }
+ doubleRep sumProb =0;
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ doubleRep prob=0.0;
+ if (mynode->father()==NULL) prob=1.0; // special case of the root.
+ else {
+ for(int letter_in_f=0; letter_in_f<pi.alphabetSize();letter_in_f++) {
+ prob +=cdown.get(mynode->id(),letter_in_f)*
+ pi.getPij(mynode->id(),letter,letter_in_f);
+ }
+ }
+
+ prob = prob*sp.freq(letter)*
+ cup.get(mynode->id(),letter);
+ ssc.set(mynode->id(),letter,prob);
+ sumProb += prob;
+ }
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ doubleRep getV = ssc.get(mynode->id(),letter);
+ ssc.set(mynode->id(),letter,getV/sumProb);
+ }
+
+
+
+ // CHECKING:
+/* LOG(5,<<" checking marginal of node: "<<mynode->name()<<endl);
+ MDOUBLE SSum =0;
+ for (int u=0; u < pi.alphabetSize(); ++u) {
+ LOG(5,<<ssc.get(mynode->id(),u)<<" ");
+ SSum +=ssc.get(mynode->id(),u);
+ }
+ LOG(5,<<"\nsum of marginals = "<<SSum<<endl);
+*/
+ if (mynode->isRoot()) posProb = convert(sumProb);
+ }
+}
+
+
+
+
+/*
+if (val>1) {
+ LOG(5,<<"x val = " << val<<endl);
+ LOG(5,<<" my node = " << mynode->name()<<endl);
+ LOG(5,<<" let = " << let << endl);
+ LOG(5,<<" up = " << cup.get(mynode->id(),let));
+ LOG(5,<< "pos prob = " << posProb<<endl);
+ LOG(5,<<" root of tree = " << et.getRoot()->name()<<endl);
+ errorMsg::reportError(" error in compute marginal >1 ");
+ }
+if (val>1) {
+ LOG(5,<<" val = " << val<<endl);
+ LOG(5,<<" pos = " << pos<<endl);
+ LOG(5,<<" my node = " << mynode->name()<<endl);
+ LOG(5,<<" let = " << let << endl);
+ LOG(5,<<" up = " << cup.get(mynode->id(),let)<<endl);
+ LOG(5,<<" down[sameLetter] = " << cdown.get(mynode->id(),let)<<endl);
+ LOG(5,<<" pij[sameLetter] = " << pi.getPij(mynode->id(),let,let)<<endl);
+ LOG(5,<< "pos prob = " << posProb<<endl);
+ LOG(5,<<" root of tree = " << et.getRoot()->name()<<endl);
+ LOG(5,<<"sp.freq(letter) = "<<sp.freq(let)<<endl);
+ errorMsg::reportError(" error in compute marginal >1 ");
+ }
+
+
+ */
+
diff --git a/libs/phylogeny/computeMarginalAlg.h b/libs/phylogeny/computeMarginalAlg.h
new file mode 100644
index 0000000..653f3e4
--- /dev/null
+++ b/libs/phylogeny/computeMarginalAlg.h
@@ -0,0 +1,29 @@
+// $Id: computeMarginalAlg.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___COMPUTE_MARGINAL_ALG
+#define ___COMPUTE_MARGINAL_ALG
+
+#include "definitions.h"
+#include "suffStatComponent.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+
+// This function will give one (for DNA, for example)
+// P(A | DATA), P (C | DATA), ... etc, for each node.
+// This is the case in the homogenous model only.
+// for the Gamma case, the marginal in a specific node, is in fact
+// p(A | DATA, r), P( C | DATA, r), ... etc.
+
+class computeMarginalAlg {
+public:
+ void fillComputeMarginal(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const suffStatGlobalHomPos& cdown,
+ doubleRep & posProb);
+};
+#endif
diff --git a/libs/phylogeny/computePijComponent.cpp b/libs/phylogeny/computePijComponent.cpp
new file mode 100644
index 0000000..9e4bbf7
--- /dev/null
+++ b/libs/phylogeny/computePijComponent.cpp
@@ -0,0 +1,109 @@
+
+// $Id: computePijComponent.cpp 5249 2008-11-17 12:24:49Z adist $
+
+#include "definitions.h"
+#include "treeIt.h"
+#include "computePijComponent.h"
+#include "logFile.h"
+
+void computePijHomSpec::fillPij(const MDOUBLE dis, const stochasticProcess& sp, int derivationOrder, bool isReversible)
+{
+
+ if (!(isReversible && sp.isReversible())) // if one is false
+ isReversible = false;
+ resize(sp.alphabetSize());
+ int i,j;
+ for (i=0; i<sp.alphabetSize(); i++) {
+ switch (derivationOrder) {
+ case 0:
+ _V[i][i] = sp.Pij_t(i,i,dis);
+ break;
+ case 1:
+ _V[i][i] = sp.dPij_dt(i,i,dis);
+ break;
+ case 2:
+ _V[i][i] = sp.d2Pij_dt2(i,i,dis);
+ break;
+ default:
+ errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
+ }
+
+ for (j=i+1; j<sp.alphabetSize(); j++) {
+ switch (derivationOrder) {
+ case 0:
+ _V[i][j] = sp.Pij_t(i,j,dis);
+ if ((_V[i][j] == 0 )&& (dis !=0)){
+
+ _V[i][j] = EPSILON;
+ }
+
+ break;
+ case 1:
+ _V[i][j] = sp.dPij_dt(i,j,dis);
+ break;
+ case 2:
+ _V[i][j] = sp.d2Pij_dt2(i,j,dis);
+ break;
+ default:
+ errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
+ }
+ if (sp.freq(j) == 0.0) {
+ if (isReversible) {
+ errorMsg::reportError("error in function fillPij");
+ }
+
+ }
+// else {
+ if (isReversible){
+ _V[j][i] = _V[i][j]* sp.freq(i)/sp.freq(j);
+ }
+ else {
+ switch (derivationOrder) {
+ case 0:
+ _V[j][i] = sp.Pij_t(j,i,dis);
+ if ((_V[j][i] == 0 )&& (dis !=0))
+ _V[j][i] = EPSILON;
+ break;
+ case 1:
+ _V[j][i] = sp.dPij_dt(j,i,dis);
+ break;
+ case 2:
+ _V[j][i] = sp.d2Pij_dt2(j,i,dis);
+ break;
+ default:
+ errorMsg::reportError("error in function fillPij - derivationOrder must be 0, 1 or 2");
+ }
+ }
+// }
+ }
+ }
+}
+
+
+void computePijHom::fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder, bool isReversible) {
+ _V.resize(et.getNodesNum());
+ treeIterTopDownConst tIt(et);
+ tree::nodeP myNode = tIt.first();
+ {// skipping the root, but allocating place for the root pij even if they are not use
+ // to maintain that all arrays have the same size.
+ _V[myNode->id()].resize(sp.alphabetSize());
+ }
+ LOGDO(50,et.output(myLog::LogFile(),tree::ANCESTOR));
+ LOGDO(50,et.output(myLog::LogFile(),tree::PHYLIP));
+ for (; myNode != tIt.end(); myNode = tIt.next()) {
+ if (!(myNode->isRoot()))
+ _V[myNode->id()].fillPij(myNode->dis2father()*sp.getGlobalRate(),sp,derivationOrder,isReversible);
+// else
+// myLog::LogFile()<<"ROOT IS "<<myNode->name()<<endl;
+ }
+}
+
+
+void computePijGam::fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder, bool isReversible) {
+ _V.resize(sp.categories());
+ for (int i=0; i < _V.size(); ++i) {
+ tree cp = et;
+ cp.multipleAllBranchesByFactor(sp.rates(i)/sp.getGlobalRate());// the global rate is taken care of in the hom pij.
+ _V[i].fillPij(cp,sp,derivationOrder,isReversible);
+ }
+}
diff --git a/libs/phylogeny/computePijComponent.h b/libs/phylogeny/computePijComponent.h
new file mode 100644
index 0000000..fac2225
--- /dev/null
+++ b/libs/phylogeny/computePijComponent.h
@@ -0,0 +1,54 @@
+// $Id: computePijComponent.h 3064 2007-12-23 10:23:57Z cohenofi $
+
+#ifndef ___COMPUTE_PIJ_COMPONENT
+#define ___COMPUTE_PIJ_COMPONENT
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+
+class computePijHomSpec {//specific node, no rate variation
+public:
+ virtual ~computePijHomSpec(){};
+ void fillPij(const MDOUBLE dis, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
+ void resize(const int alphabetSize) {
+ _V.resize(alphabetSize);
+ for (int z=0;z<alphabetSize;++z) _V[z].resize(alphabetSize);
+ }
+
+ int alphabetSize() const {return _V.size();}
+ MDOUBLE getPij(const int let1,const int let2)const{
+ return _V[let1][let2];
+ }
+ VVdouble _V; // let, let
+};
+
+class computePijHom {//all nodes, no rate variation
+public:
+ virtual ~computePijHom(){};
+ void fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
+ int alphabetSize() const {return _V[0].alphabetSize();}
+ int getNodesNum() const {return _V.size();}
+ MDOUBLE getPij(const int nodeId,const int let1,const int let2)const{
+ return _V[nodeId].getPij(let1,let2);
+ }
+ vector<computePijHomSpec> _V; // let, let
+};
+
+class computePijGam {//
+public:
+ virtual ~computePijGam(){};
+ void fillPij(const tree& et, const stochasticProcess& sp, int derivationOrder = 0, bool isReversible =true);
+ int categories() const {return _V.size();}
+ int alphabetSize() const {return _V[0].alphabetSize();}
+ int getNodesNum() const {return _V[0].getNodesNum();}
+
+ MDOUBLE getPij(const int rateCategor,const int nodeId,const int let1,const int let2)const{
+ return _V[rateCategor].getPij(nodeId,let1,let2);
+ }
+ computePijHom& operator[] (int i) {return _V[i];}
+ const computePijHom& operator[] (int i) const {return _V[i];}
+ vector<computePijHom> _V; // each rate category
+};
+
+#endif
diff --git a/libs/phylogeny/computePosteriorExpectationOfSubstitutions.cpp b/libs/phylogeny/computePosteriorExpectationOfSubstitutions.cpp
new file mode 100644
index 0000000..44fb384
--- /dev/null
+++ b/libs/phylogeny/computePosteriorExpectationOfSubstitutions.cpp
@@ -0,0 +1,202 @@
+#include "computePosteriorExpectationOfSubstitutions.h"
+#include "definitions.h"
+#include "computeDownAlg.h"
+#include "computeUpAlg.h"
+#include "matrixUtils.h"
+#include "treeIt.h"
+#include "likelihoodComputation.h"
+
+using namespace std;
+
+/********************************************************************************************
+computePosteriorExpectationOfSubstitutions
+*********************************************************************************************/
+computePosteriorExpectationOfSubstitutions::computePosteriorExpectationOfSubstitutions(const tree &tr, const sequenceContainer &sc, const stochasticProcess *sp):
+_tr(tr), _sc(sc){
+ if(!sp){
+ errorMsg::reportError("error in the constructor computePosteriorExpectationOfSubstitutions sp argument is NULL");
+ }
+ else{
+ _sp = sp;
+ }
+}
+/********************************************************************************************
+Expectation of number of substitutions from character u to v --- =
+sum over all substitutions x,y:
+Posterior(Node=x,Father=y|D)*Exp(substitutions u to v|Node=x,Father=y)
+The second term is given to the function as input (can be obtained via simulations)
+*********************************************************************************************/
+VVdouble computePosteriorExpectationOfSubstitutions::computeExpectationAcrossTree(
+ simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbs,
+ VVVdouble &expForBranch)
+{
+ //int numNodes = _tr.getNodesNum();
+ int alphabetSize = _sp->alphabetSize();
+ VVdouble res;
+ resizeMatrix(res,alphabetSize,alphabetSize);
+ treeIterTopDownConst tIt(_tr);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for (int fromState=0;fromState<alphabetSize;++fromState)
+ {
+ for (int toState=0;toState<alphabetSize;++toState)
+ {
+ if (fromState==toState)
+ continue;
+ expForBranch[mynode->id()][fromState][toState] = computeExpectationOfChangePerBranch(sim,posteriorProbs,mynode,fromState,toState);
+ res[fromState][toState] +=expForBranch[mynode->id()][fromState][toState];
+
+ }
+ }
+ }
+ return res;
+}
+/********************************************************************************************
+Posterior probabilities computed across entire tree, for all substitutions from character u to v
+*********************************************************************************************/
+VVdouble computePosteriorExpectationOfSubstitutions::computePosteriorAcrossTree(
+ simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch)
+{
+ //int numNodes = _tr.getNodesNum();
+ int alphabetSize = _sp->alphabetSize();
+ // N: resized before
+ //probsForBranch.resize(numNodes);
+ //for (int n=0;n<numNodes;++n)
+ // resizeMatrix(probsForBranch[n],alphabetSize,alphabetSize);
+
+ VVdouble res;
+ resizeMatrix(res,alphabetSize,alphabetSize);
+ treeIterTopDownConst tIt(_tr);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for (int fromState=0;fromState<alphabetSize;++fromState)
+ {
+ for (int toState=0;toState<alphabetSize;++toState)
+ {
+ if (fromState==toState)
+ continue;
+ probsForBranch[mynode->id()][fromState][toState]= computePosteriorOfChangePerBranch(sim,posteriorProbsGivenTerminals,mynode,fromState,toState);
+ res[fromState][toState] +=probsForBranch[mynode->id()][fromState][toState];
+
+ }
+ }
+ }
+ return res;
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE computePosteriorExpectationOfSubstitutions::computePosteriorOfChangePerBranch(simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbs,
+ tree::nodeP node,
+ int fromState, int toState)
+{
+ int alphabetSize = _sp->alphabetSize();
+ MDOUBLE res = 0;
+
+ for (int x=0;x<alphabetSize;++x)
+ {
+ for (int y=0;y<alphabetSize;++y)
+ {
+ res+=sim.getProb(node->name(),x,y,fromState,toState)*posteriorProbs[node->id()][x][y];
+ }
+ }
+ return res;
+}
+
+/********************************************************************************************
+Posterior of observing a certain state substitution along a branch:
+P(Node=x,Father=y|D) = P(D,Node=x,Father=y)/P(D)
+usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
+*********************************************************************************************/
+void computePosteriorExpectationOfSubstitutions::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){
+ int numNodes = _tr.getNodesNum();
+ int alphabetSize = _sp->alphabetSize();
+ posteriorPerNodePer2States.resize(numNodes);
+ for (int n=0;n<posteriorPerNodePer2States.size();++n)
+ resizeMatrix(posteriorPerNodePer2States[n],alphabetSize,alphabetSize);
+ suffStatGlobalHomPos sscUp;
+ suffStatGlobalHomPos sscDown; //for a reversible model
+ sscUp.allocatePlace(numNodes,alphabetSize);
+ computePijHom pi;
+ pi.fillPij(_tr,*_sp);
+
+ computeUpAlg comp_Up;
+ computeDownAlg comp_Down;
+ comp_Up.fillComputeUp(_tr,_sc,pos,pi,sscUp);
+ comp_Down.fillComputeDown(_tr,_sc,pos,pi,sscDown,sscUp);
+ treeIterTopDownConst tIt(_tr);
+ MDOUBLE ll = convert(likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp));
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for (int sonState = 0; sonState<alphabetSize; ++sonState){
+ for (int fatherState = 0; fatherState<alphabetSize; ++fatherState){
+ posteriorPerNodePer2States[mynode->id()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDown, pi,ll,mynode->name());
+ }
+ }
+ }
+}
+/********************************************************************************************
+Posterior of observing a certain state substitution along a branch:
+P(Node=sonState,Father=fatherState|D) = P(D,Node=sonState,Father=fatherState)/P(D)
+usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
+*********************************************************************************************/
+MDOUBLE computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch
+ (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
+ suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName)
+{
+ MDOUBLE res, Down, Up, pij;
+ Down = convert(sscDown.get(nodeId,fatherState));
+ Up = convert(sscUp.get(nodeId,sonState));
+ pij = pi.getPij(nodeId,fatherState,sonState);
+ res=_sp->freq(fatherState)*Down*Up*pij;
+ res/=LLData;
+// if(gainLossOptions::_printDEBUGinfo)
+// LOG(3,<<nodeName<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
+
+ if (res > 1 + 1e-4){
+ LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" res "<<res<<" LLData "<<LLData<<endl);
+ res = 1;
+ }
+ if (res<-1e-4){
+ LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" res "<<res<<" LLData "<<LLData<<endl);
+ res = 0;
+ }
+ if ((res > 1 + 0.000001) || (res<-0.000001)){
+ string err = "Error in computePosteriorExpectationOfSubstitutions::computePosterioGivenTerminalsPerBranch, non probability value ";
+ err+=double2string(res);
+ err+=" at node ";
+ err+=int2string(nodeId);
+ err+= " sonState ";
+ err+= int2string(sonState);
+ err+= " fatherState ";
+ err+= int2string(fatherState);
+ errorMsg::reportError(err);
+ }
+ return res;
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE computePosteriorExpectationOfSubstitutions::computeExpectationOfChangePerBranch(
+ simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbsGivenTerminals,
+ tree::nodeP node,int fromState, int toState)
+{
+ int alphabetSize = _sp->alphabetSize();
+
+
+ MDOUBLE nodeExpectation = 0;
+ for (int x = 0; x<alphabetSize; ++x){
+ for (int y = 0; y<alphabetSize; ++y){
+ nodeExpectation+=(posteriorProbsGivenTerminals[node->id()][x][y]*
+ sim.getExpectation(node->name(),x,y,fromState,toState));
+ //DEBUG
+ LOG(6,<<"node "<<node->id()<<endl);
+ LOG(6,<<"from "<<fromState<<" to "<<toState<<" given "<<x<<" and "<<y
+ <<" post= "<<posteriorProbsGivenTerminals[node->id()][x][y]<<" sim= "<< sim.getExpectation(node->name(),x,y,fromState,toState)<<endl);
+ }
+ }
+ return nodeExpectation;
+}
+
+
+
+
diff --git a/libs/phylogeny/computePosteriorExpectationOfSubstitutions.h b/libs/phylogeny/computePosteriorExpectationOfSubstitutions.h
new file mode 100644
index 0000000..6a43363
--- /dev/null
+++ b/libs/phylogeny/computePosteriorExpectationOfSubstitutions.h
@@ -0,0 +1,60 @@
+
+#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS
+#define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS
+
+
+/*
+This is a father class where it implements the computePosteriorExpectationOfSubstitutions
+procedure for a reversible stochastic process. Its son, computePosteriorExpectationOfSubstitutions_nonReversibleSp
+implements the computePosteriorExpectationOfSubstitutions for a non-reversible stochastic process. The implementation
+difference is in two functions: computePosteriorOfChangeGivenTerminals and computePosterioGivenTerminalsPerBranch
+*/
+
+#include "definitions.h"
+#include "simulateJumps.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "suffStatComponent.h"
+#include "computePijComponent.h"
+#include "simulateJumpsAbstract.h"
+
+class computePosteriorExpectationOfSubstitutions {
+
+public:
+ explicit computePosteriorExpectationOfSubstitutions(const tree &tr, const sequenceContainer &sc, const stochasticProcess *sp);
+ virtual ~computePosteriorExpectationOfSubstitutions(){};
+
+
+ VVdouble computeExpectationAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbs, VVVdouble &expForBranch);
+ VVdouble computePosteriorAcrossTree(simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbsGivenTerminals,VVVdouble &probsForBranch);
+
+ virtual void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos);
+
+private:
+ MDOUBLE computePosteriorOfChangePerBranch(
+ simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbs,
+ tree::nodeP node,
+ int fromState, int toState);
+
+ MDOUBLE computeExpectationOfChangePerBranch(
+ simulateJumpsAbstract &sim, //input given from simulation studies
+ const VVVdouble &posteriorProbsGivenTerminals,
+ tree::nodeP node,
+ int fromState, int toState);
+
+ MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
+ suffStatGlobalHomPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName);
+
+
+protected:
+ const tree &_tr;
+ const sequenceContainer &_sc;
+ const stochasticProcess *_sp;
+};
+
+
+#endif
diff --git a/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp b/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp
new file mode 100644
index 0000000..0f575f2
--- /dev/null
+++ b/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp
@@ -0,0 +1,91 @@
+#include "definitions.h"
+#include "computeDownAlg.h"
+#include "computeUpAlg.h"
+#include "matrixUtils.h"
+#include "treeIt.h"
+#include "likelihoodComputation.h"
+#include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h"
+
+using namespace std;
+
+
+
+/********************************************************************************************
+Posterior of observing a certain state substitution along a branch:
+P(Node=x,Father=y|D) = P(D,Node=x,Father=y)/P(D)
+usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
+*********************************************************************************************/
+void computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos){
+ int numNodes = _tr.getNodesNum();
+ int alphabetSize = _sp->alphabetSize();
+ posteriorPerNodePer2States.resize(numNodes);
+ for (int n=0;n<posteriorPerNodePer2States.size();++n)
+ resizeMatrix(posteriorPerNodePer2States[n],alphabetSize,alphabetSize);
+ suffStatGlobalHomPos sscUp;
+ suffStatGlobalGamPos sscDownNonRev; // The "Gam" is used for the letter at father - sscGivenRoot
+ sscUp.allocatePlace(numNodes,alphabetSize);
+ computePijHom pi;
+ pi.fillPij(_tr,*_sp);
+
+ computeUpAlg comp_Up;
+ computeDownAlg comp_Down;
+ comp_Up.fillComputeUp(_tr,_sc,pos,pi,sscUp);
+ comp_Down.fillComputeDownNonReversible(_tr,_sc,pos,pi,sscDownNonRev,sscUp);
+ treeIterTopDownConst tIt(_tr);
+ MDOUBLE ll = convert(likelihoodComputation::getLofPos(pos,_tr,_sc,pi,*_sp));
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for (int sonState = 0; sonState<alphabetSize; ++sonState){
+ for (int fatherState = 0; fatherState<alphabetSize; ++fatherState){
+ posteriorPerNodePer2States[mynode->id()][fatherState][sonState]= computePosterioGivenTerminalsPerBranch(mynode->id(),sonState,fatherState,sscUp,sscDownNonRev, pi,ll,mynode->name());
+ }
+ }
+ }
+}
+
+/********************************************************************************************
+Posterior of observing a certain state substitution along a branch:
+P(Node=sonState,Father=fatherState|D) = P(D,Node=sonState,Father=fatherState)/P(D)
+usage: posteriorPerNodePer2States[mynode->id()][fatherState][sonState]
+*********************************************************************************************/
+MDOUBLE computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch
+ (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
+ suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName)
+{
+ MDOUBLE res=0.0;
+ MDOUBLE resDXY, Down, Up, pij;
+ for (int stateAtRoot = 0; stateAtRoot<_sp->alphabetSize(); ++stateAtRoot){
+ Down = convert(sscDown.get(stateAtRoot,nodeId,fatherState));
+ Up = convert(sscUp.get(nodeId,sonState));
+ pij = pi.getPij(nodeId,fatherState,sonState);
+
+ res+=(_sp->freq(stateAtRoot)*
+ Down*
+ Up*
+ pij);
+ }
+ resDXY = res;
+ res/=LLData;
+// if(gainLossOptions::_printDEBUGinfo)
+// LOG(3,<<nodeName<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
+
+ if (res > 1 + 1e-4){
+ LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
+ res = 1;
+ }
+ if (res<-1e-4){
+ LOGnOUT(3,<<nodeId<<" son "<<sonState<<" Down "<<Down<<" father "<<fatherState<<" Up "<<Up<<" pij "<<pij<<" resDXY "<<resDXY<<" LLData "<<LLData<<" prob "<<res<<endl);
+ res = 0;
+ }
+ if ((res > 1 + 0.000001) || (res<-0.000001)){
+ string err = "Error in computePosteriorExpectationOfSubstitutions_nonReversibleSp::computePosterioGivenTerminalsPerBranch, non probability value ";
+ err+=double2string(res);
+ err+=" at node ";
+ err+=int2string(nodeId);
+ err+= " sonState ";
+ err+= int2string(sonState);
+ err+= " fatherState ";
+ err+= int2string(fatherState);
+ errorMsg::reportError(err);
+ }
+ return res;
+}
\ No newline at end of file
diff --git a/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.h b/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.h
new file mode 100644
index 0000000..fea0ff4
--- /dev/null
+++ b/libs/phylogeny/computePosteriorExpectationOfSubstitutions_nonReversibleSp.h
@@ -0,0 +1,22 @@
+#ifndef ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP
+#define ___COMPUTE_POSTERIOR_EXPECTATION_OF_SUBSTITUTIONS_NONREVERSIBLESP
+
+#include "computePosteriorExpectationOfSubstitutions.h"
+
+class computePosteriorExpectationOfSubstitutions_nonReversibleSp:public computePosteriorExpectationOfSubstitutions {
+public:
+ explicit computePosteriorExpectationOfSubstitutions_nonReversibleSp(const tree &tr, const sequenceContainer &sc, stochasticProcess *sp):computePosteriorExpectationOfSubstitutions(tr,sc,sp){}
+ virtual ~computePosteriorExpectationOfSubstitutions_nonReversibleSp(){};
+
+ void computePosteriorOfChangeGivenTerminals(VVVdouble &posteriorPerNodePer2States, int pos);
+
+private:
+ MDOUBLE computePosterioGivenTerminalsPerBranch (int nodeId,int sonState, int fatherState,suffStatGlobalHomPos &sscUp,
+ suffStatGlobalGamPos &sscDown,computePijHom &pi, MDOUBLE &LLData, const string nodeName);
+
+};
+
+#endif
+
+
+
diff --git a/libs/phylogeny/computeSubstitutionCounts.cpp b/libs/phylogeny/computeSubstitutionCounts.cpp
new file mode 100644
index 0000000..e83c6a7
--- /dev/null
+++ b/libs/phylogeny/computeSubstitutionCounts.cpp
@@ -0,0 +1,378 @@
+#include "computeSubstitutionCounts.h"
+#include "computePosteriorExpectationOfSubstitutions.h"
+#include "computePosteriorExpectationOfSubstitutions_nonReversibleSp.h"
+#include "multipleStochasticProcess.h"
+#include "matrixUtils.h"
+#include "simulateJumps.h"
+#include "simulateCodonsJumps.h"
+#include "simulateJumpsAbstract.h"
+#include "treeIt.h"
+#include "treeUtil.h"
+
+/********************************************************************************************
+computeSubstitutionCounts
+*********************************************************************************************/
+computeSubstitutionCounts::computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum, const MDOUBLE probCutOffSum, bool isSilent):
+_tr(tr),_sc(sc),_pMSp(MultSpPtr),_outDir(outDir),_LpostPerSpPerCat(LpostPerSpPerCat), _simulationsIterNum(simulationsIterNum), _probCutOffSum(probCutOffSum),_isSilent(isSilent)
+{
+ if(!_pMSp->getSPVecSize()){
+ errorMsg::reportError("Trying to call computeSubstitutionCounts with an empty multipleStochasticProcess object at computeSubstitutionCounts::computeSubstitutionCounts");
+ }
+ _alphabetSize = _pMSp->getSp(0)->alphabetSize();
+}
+
+computeSubstitutionCounts& computeSubstitutionCounts::operator=(const computeSubstitutionCounts &other){
+ if (this != &other) { // Check for self-assignment
+ }
+ return *this;
+}
+
+
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::run()
+{
+ for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ //if(sonStateIndex == fatherStateIndex) continue;
+ _expMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0);
+ _probMap_father2son[fatherStateIndex][sonStateIndex].resize(_sc.seqLen(),0);
+ }
+ }
+
+ resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_jointProb_PosNodeXY);
+ resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_probChanges_PosNodeXY);
+ resize_VVVV(_sc.seqLen(),_tr.getNodesNum(),_alphabetSize,_alphabetSize,_expChanges_PosNodeXY);
+
+ computePosteriorOfChangeGivenTerminalsPerSpPerCat(); // GLM - multiple SPs
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::computePosteriorOfChangeGivenTerminalsPerSpPerCat()
+{
+ int numOfSPs = _pMSp->getSPVecSize();
+
+ // per Sp
+ for (int spIndex=0; spIndex < numOfSPs; ++spIndex) {
+ // Per RateCategory -- All the computations are done while looping over rate categories
+ stochasticProcess * currentSp = _pMSp->getSp(spIndex);
+ int numOfRateCategories = currentSp->categories();
+ for (int rateCategIndex=0 ; rateCategIndex < numOfRateCategories;++rateCategIndex)
+ {
+ tree copy_et = _tr;
+ MDOUBLE rateCategVal = currentSp->rates(rateCategIndex);
+ MDOUBLE minimumRateCategVal = 0.0000001;
+ MDOUBLE rate2multiply = max(rateCategVal,minimumRateCategVal);
+ if(rateCategVal < minimumRateCategVal){
+ LOGnOUT(4, <<" >>> NOTE: the rate category "<<rateCategVal<<" is too low for computePosteriorExpectationOfChangePerSite"<<endl); }
+ copy_et.multipleAllBranchesByFactor(rate2multiply);
+ //if(!_isSilent)
+ //LOGnOUT(4, <<"running "<<gainLossOptions::_numOfSimulationsForPotExp<<" simulations for rate "<<rate2multiply<<endl);
+ simulateJumpsAbstract* simPerRateCategory;
+ if(_alphabetSize == 61)
+ simPerRateCategory = new simulateCodonsJumps(copy_et,*currentSp,_alphabetSize);
+ else
+ simPerRateCategory = new simulateJumps(copy_et,*currentSp,_alphabetSize);
+
+ simPerRateCategory->runSimulation(_simulationsIterNum);
+ if(!_isSilent)
+ LOGnOUT(4,<<"finished simulations"<<endl);
+
+ // Per POS
+ for (int pos = 0; pos <_sc.seqLen(); ++pos)
+ {
+ LOG(6,<<"pos "<<pos+1<<endl);
+ // I) computePosteriorOfChangeGivenTerminals
+ VVVdouble posteriorsGivenTerminalsPerRateCategoryPerPos;
+ computePosteriorExpectationOfSubstitutions* cpesPerRateCategoryPerPos ;
+ if(currentSp->isReversible())
+ cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions(copy_et,_sc,currentSp); // Per POS,CAT
+ else
+ cpesPerRateCategoryPerPos = new computePosteriorExpectationOfSubstitutions_nonReversibleSp(copy_et,_sc,currentSp); // Per POS,CAT
+ cpesPerRateCategoryPerPos->computePosteriorOfChangeGivenTerminals(posteriorsGivenTerminalsPerRateCategoryPerPos,pos);
+
+ // II) Exp - take in account both: 1) simulations 2) posteriorsGivenTerminal
+ VVVdouble expChangesForBranchPerRateCategoryPerPos; // Sim+Exp
+ resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,expChangesForBranchPerRateCategoryPerPos);
+
+ VVdouble expVV = cpesPerRateCategoryPerPos->computeExpectationAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,
+ expChangesForBranchPerRateCategoryPerPos); // Per POS
+ for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ _expMap_father2son[fatherStateIndex][sonStateIndex][pos] += expVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
+ }
+ }
+
+ // III) Sim - take in account both: 1) simulations 2) posteriorsGivenTerminal
+ VVVdouble probChangesForBranchPerRateCategoryPerPos; // Sim+Prob
+ resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,probChangesForBranchPerRateCategoryPerPos);
+ VVdouble probVV = cpesPerRateCategoryPerPos->computePosteriorAcrossTree(*simPerRateCategory,posteriorsGivenTerminalsPerRateCategoryPerPos,probChangesForBranchPerRateCategoryPerPos);
+ for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ _probMap_father2son[fatherStateIndex][sonStateIndex][pos] += probVV[fatherStateIndex][sonStateIndex]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
+ }
+ }
+ // Store all information PerCat,PerPOS
+ for(int i=0;i<_probChanges_PosNodeXY[pos].size();++i){ // nodeId
+ for(int j=0;j<_probChanges_PosNodeXY[pos][i].size();++j){ // fatherState
+ for(int k=0;k<_probChanges_PosNodeXY[pos][i][j].size();++k){ // sonState
+ _jointProb_PosNodeXY[pos][i][j][k] += posteriorsGivenTerminalsPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
+ _probChanges_PosNodeXY[pos][i][j][k] += probChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
+ _expChanges_PosNodeXY[pos][i][j][k] += expChangesForBranchPerRateCategoryPerPos[i][j][k]*_LpostPerSpPerCat[spIndex][rateCategIndex][pos];
+ }
+ }
+ }
+ delete(cpesPerRateCategoryPerPos);
+ }
+ delete(simPerRateCategory);
+ // Per POS
+ }
+ // per rateCat
+ }
+ // Per Sp
+}
+
+
+
+/********************************************************************************************
+printProbExp()
+print perPos (over all branches)
+use the members _expV01, _expV10 for basic
+*********************************************************************************************/
+void computeSubstitutionCounts::printProbExp()
+{
+
+ string posteriorExpectationOfChangeString = _outDir + "//" + "posteriorExpectationOfChange.txt";
+ ofstream posteriorExpectationStream(posteriorExpectationOfChangeString.c_str());
+ string posteriorProbabilityOfChangeString = _outDir + "//" + "posteriorProbabilityOfChange.txt";
+ ofstream posteriorProbabilityStream(posteriorProbabilityOfChangeString.c_str());
+
+ int fatherStateIndex,sonStateIndex;
+ posteriorExpectationStream<<"#POS"<<"\t";
+ posteriorProbabilityStream<<"#POS"<<"\t";
+
+ for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ posteriorExpectationStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
+ posteriorProbabilityStream<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
+ }
+ }
+ posteriorExpectationStream<<endl;
+ posteriorProbabilityStream<<endl;
+
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ posteriorExpectationStream<<pos+1<<"\t";
+ posteriorProbabilityStream<<pos+1<<"\t";
+ for (fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for (sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;//ofir, note the change in print format
+ posteriorExpectationStream<<_expMap_father2son[fatherStateIndex][sonStateIndex][pos]<<"\t";
+ posteriorProbabilityStream<<_probMap_father2son[fatherStateIndex][sonStateIndex][pos]<<"\t";
+ }
+ }
+ posteriorExpectationStream<<endl;
+ posteriorProbabilityStream<<endl;
+ }
+ posteriorExpectationStream.close();
+ posteriorProbabilityStream.close();
+}
+
+
+/********************************************************************************************
+printProbabilityPerPosPerBranch 1
+produce 2 print files:
+1. print detailed file (out)
+2. print summary over all branches (outSum)
+*********************************************************************************************/
+void computeSubstitutionCounts::printProbabilityPerPosPerBranch()
+{
+ string probabilityPerPosPerBranch = _outDir + "//" + "probabilityPerPosPerBranch.txt";
+ ofstream probabilityPerPosPerBranchStream(probabilityPerPosPerBranch.c_str());
+ probabilityPerPosPerBranchStream<<"# print values over probCutOff "<<_probCutOffSum<<endl;
+ probabilityPerPosPerBranchStream<<"#Event"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"probability"<<endl;
+
+ string countProbPerPos = _outDir + "//" + "probabilityPerPos.txt";
+ ofstream countProbPerPosStream(countProbPerPos.c_str());
+ countProbPerPosStream<<"# print values over probCutOff "<<_probCutOffSum<<endl;
+ countProbPerPosStream<<"#POS"<<"\t";
+ for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ countProbPerPosStream<<"prob"<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t";
+ }
+ }
+ countProbPerPosStream<<endl;
+
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ printProbabilityPerPosPerBranch(pos, _probChanges_PosNodeXY[pos],probabilityPerPosPerBranchStream,countProbPerPosStream);
+ }
+}
+/********************************************************************************************
+printGainLossProbabilityPerPosPerBranch 1.1
+*********************************************************************************************/
+void computeSubstitutionCounts::printProbabilityPerPosPerBranch(int pos, VVVdouble& probChanges, ostream& out, ostream& outCount)
+{
+ VVdouble countFromFather2Son;
+ countFromFather2Son.resize(_alphabetSize);
+ int fatherStateIndex,sonStateIndex;
+ treeIterTopDownConst tIt(_tr);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ countFromFather2Son[fatherStateIndex].resize(_alphabetSize,0);
+ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ if(probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > _probCutOffSum){//NIM
+ out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
+ countFromFather2Son[fatherStateIndex][sonStateIndex] += probChanges[mynode->id()][fatherStateIndex][sonStateIndex];
+ }
+ }
+ }
+ }
+ outCount<<pos+1<<"\t";
+ for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ //if(countFromFather2Son[fatherStateIndex][sonStateIndex] == 0) continue;//NIMROD
+ outCount<<countFromFather2Son[fatherStateIndex][sonStateIndex]<<"\t";
+ }
+ }
+ outCount<<endl;
+}
+
+
+
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::printExpectationPerBranch()
+{
+ // ExpectationPerBranch
+ VVVdouble posteriorsGivenTerminalsTotal;
+ resize_VVV(_tr.getNodesNum(),_alphabetSize,_alphabetSize,posteriorsGivenTerminalsTotal);
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ for(int i=0;i<_expChanges_PosNodeXY[pos].size();++i){
+ for(int j=0;j<_expChanges_PosNodeXY[pos][i].size();++j){
+ for(int k=0;k<_expChanges_PosNodeXY[pos][i][j].size();++k){
+ posteriorsGivenTerminalsTotal[i][j][k] += _expChanges_PosNodeXY[pos][i][j][k];
+ }
+ }
+ }
+ }
+ string expectationPerBranch = _outDir + "//" + "ExpectationPerBranch.txt";
+ ofstream expectationPerBranchStream(expectationPerBranch.c_str());
+ printExpectationPerBranch(posteriorsGivenTerminalsTotal,expectationPerBranchStream);
+}
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::printExpectationPerBranch(VVVdouble& expectChanges, ostream& out)
+{
+ treeIterTopDownConst tIt(_tr);
+ out<<"#Event"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"expectation"<<endl;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for(int fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(int sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
+ mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<expectChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
+ }
+ }
+ }
+}
+
+
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::printTreesWithExpectationValuesAsBP(int from,int to)
+{
+ // ExpectationPerPosPerBranch - Print Trees
+ Vstring Vnames;
+ fillAllNodesNames(Vnames,_tr);
+ createDir(_outDir, "TreesWithExpectationValuesAsBP");
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ string strTreeNum = _outDir + "//" + "TreesWithExpectationValuesAsBP" + "//" + "expTree" + int2string(pos+1) + ".ph";
+ ofstream tree_out(strTreeNum.c_str());
+ printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_expChanges_PosNodeXY[pos],from,to);
+ }
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+void computeSubstitutionCounts::printTreesWithProbabilityValuesAsBP(int from,int to)
+{
+ // ProbabilityPerPosPerBranch - Print Trees
+ Vstring Vnames;
+ fillAllNodesNames(Vnames,_tr);
+ createDir(_outDir, "TreesWithProbabilityValuesAsBP");
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ string strTreeNum = _outDir + "//" + "TreesWithProbabilityValuesAsBP"+ "//" + "probTree" + int2string(pos+1) + ".ph";
+ ofstream tree_out(strTreeNum.c_str());
+ printTreeWithValuesAsBP(tree_out,_tr,Vnames,&_probChanges_PosNodeXY[pos],from,to);
+ }
+}
+
+/********************************************************************************************
+printProbExpPerPosPerBranch 1
+produce 2 print files:
+1. print detailed file (out)
+2. print summary over all branches (outSum)
+*********************************************************************************************/
+void computeSubstitutionCounts::printProbExpPerPosPerBranch(MDOUBLE probCutOff, MDOUBLE countsCutOff)
+{
+ string probExpPerPosPerBranch = _outDir + "//" + "expPerPosPerBranch.txt";
+ ofstream probExpPerPosPerBranchStream(probExpPerPosPerBranch.c_str());
+ probExpPerPosPerBranchStream<<"# print values over probCutOff "<<probCutOff<<endl;
+ probExpPerPosPerBranchStream<<"#Event"<<"\t"<<"POS"<<"\t"<<"branch"<<"\t"<<"branchLength"<<"\t"<<"distance2root"<<"\t"<<"probability"<<"\t"<<"expectation"<<endl;
+
+ string probExpPerPos = _outDir + "//" + "probExpCountPerPos.txt";
+ ofstream countProbPerPosStream(probExpPerPos.c_str());
+ countProbPerPosStream<<"# print count over probCutOff "<<countsCutOff<<endl;
+ countProbPerPosStream<<"#POS"<<"\t"<<"Event"<<"\t"<<"EventProb"<<"\t"<<"EventExp"<<"\t"<<"EventCount"<<endl;
+
+ for (int pos = 0; pos <_sc.seqLen(); ++pos){
+ printProbExpPerPosPerBranch(pos, probCutOff,countsCutOff, _probChanges_PosNodeXY[pos],_expChanges_PosNodeXY[pos],probExpPerPosPerBranchStream,countProbPerPosStream);
+ }
+}
+/********************************************************************************************
+printGainLossProbExpPerPosPerBranch 1.1
+Get pos, and iterate over all branches:
+1. print detailed file (out)
+2. print summary over all branches (outSum)
+*********************************************************************************************/
+void computeSubstitutionCounts::printProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outSum)
+{
+ VVdouble probFather2Son,expFather2Son;
+ VVint countFather2Son;
+ probFather2Son.resize(_alphabetSize);
+ expFather2Son.resize(_alphabetSize);
+ countFather2Son.resize(_alphabetSize);
+ int fatherStateIndex,sonStateIndex;
+
+ treeIterTopDownConst tIt(_tr);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ probFather2Son[fatherStateIndex].resize(_alphabetSize,0);
+ expFather2Son[fatherStateIndex].resize(_alphabetSize,0);
+ countFather2Son[fatherStateIndex].resize(_alphabetSize,0);
+ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ out<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
+ pos+1<<"\t"<<mynode->name()<<"\t"<<mynode->dis2father()<<"\t"<<getDistanceFromNode2ROOT(mynode)<<"\t"<<probChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<"\t"<<expChanges[mynode->id()][fatherStateIndex][sonStateIndex]<<endl;
+ probFather2Son[fatherStateIndex][sonStateIndex] += probChanges[mynode->id()][fatherStateIndex][sonStateIndex];
+ expFather2Son[fatherStateIndex][sonStateIndex] += expChanges[mynode->id()][fatherStateIndex][sonStateIndex];
+ if (probChanges[mynode->id()][fatherStateIndex][sonStateIndex] > countCutOff)
+ countFather2Son[fatherStateIndex][sonStateIndex] += 1;
+ }
+ }
+ }
+ for(fatherStateIndex = 0;fatherStateIndex < _alphabetSize;++fatherStateIndex){
+ for(sonStateIndex = 0;sonStateIndex < _alphabetSize;++sonStateIndex){
+ if(sonStateIndex == fatherStateIndex) continue;
+ outSum<<pos+1<<"\t"<<_sc.getAlphabet()->fromInt(fatherStateIndex)<<"->"<<_sc.getAlphabet()->fromInt(sonStateIndex)<<"\t"<<
+ probFather2Son[fatherStateIndex][sonStateIndex]<<"\t"<<expFather2Son[fatherStateIndex][sonStateIndex]<<"\t"<<countFather2Son[fatherStateIndex][sonStateIndex]<<endl;
+ }
+ }
+}
+
diff --git a/libs/phylogeny/computeSubstitutionCounts.h b/libs/phylogeny/computeSubstitutionCounts.h
new file mode 100644
index 0000000..719db1a
--- /dev/null
+++ b/libs/phylogeny/computeSubstitutionCounts.h
@@ -0,0 +1,71 @@
+#ifndef ___COMPUTE_SUBSTITUTION_COUNTS
+#define ___COMPUTE_SUBSTITUTION_COUNTS
+
+#include "definitions.h"
+#include "replacementModel.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include <map>
+
+class multipleStochasticProcess;
+class computeSubstitutionCounts{
+public:
+ explicit computeSubstitutionCounts(const sequenceContainer& sc, const tree& tr, multipleStochasticProcess* MultSpPtr, string& outDir, VVVdouble& LpostPerSpPerCat, const int simulationsIterNum=1000, const MDOUBLE probCutOffSum=0.5, bool isSilent=false);//DEBUG: Change simulationsIterNum back to 10000
+
+ computeSubstitutionCounts(const computeSubstitutionCounts& other) {*this = other;}
+ computeSubstitutionCounts& operator=(const computeSubstitutionCounts &other);
+ virtual ~computeSubstitutionCounts() {}
+ void run();
+ void computePosteriorOfChangeGivenTerminalsPerSpPerCat();
+
+ void printProbExp();
+ void printProbabilityPerPosPerBranch();
+ void printProbExpPerPosPerBranch(MDOUBLE probCutOff =0.5,MDOUBLE countsCutOff= 0.2);
+ void printExpectationPerBranch();
+
+ void printTreesWithExpectationValuesAsBP(int from,int to);
+ void printTreesWithProbabilityValuesAsBP(int from,int to);
+
+ void printProbabilityPerPosPerBranch(int pos, VVVdouble& probChanges, ostream& out, ostream& outCount);
+ void printExpectationPerBranch(VVVdouble& expectChanges, ostream& out);
+ void printProbExpPerPosPerBranch(int pos, MDOUBLE probCutOff, MDOUBLE countCutOff, VVVdouble& probChanges, VVVdouble& expChanges, ostream& out, ostream& outCount);
+
+
+ map<int,map<int,vector<double> > > get_expMap_father2son() {return _expMap_father2son;};
+ map<int,map<int,vector<double> > > get_probMap_father2son() {return _probMap_father2son;};
+
+ VVVVdouble getExpChanges(){return _expChanges_PosNodeXY;}; // expChanges_PosNodeXY[pos][nodeID][x][y]
+ VVVVdouble getProbChanges(){return _probChanges_PosNodeXY;}; // probChangesForBranch[pos][nodeID][x][y]
+ VVVVdouble getJointProb(){return _jointProb_PosNodeXY;}; // _jointProb_PosNodeXY[pos][nodeID][x][y]
+
+
+protected:
+//members
+ int _alphabetSize;
+ const tree _tr;
+ const sequenceContainer _sc;
+
+ multipleStochasticProcess* _pMSp;
+
+ sequence* _refSeq; // the reference sequence
+ string _outDir;
+ bool _isSilent;
+ int _simulationsIterNum;
+ MDOUBLE _probCutOffSum;
+
+ VVdouble _LpostPerCat; // the posterior probability for each position for each rate category
+ VVVdouble _LpostPerSpPerCat; // _LpostPerSpPerCat[sp][rateCat][pos]
+
+
+ map<int,map<int,vector<double> > > _expMap_father2son;
+
+ map<int,map<int,vector<double> > > _probMap_father2son;
+
+ //VVVVdouble _posteriorsGivenTerminals; // posteriorsGivenTerminals[pos][nodeID][x][y]
+ VVVVdouble _probChanges_PosNodeXY; // probChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations
+ VVVVdouble _expChanges_PosNodeXY; // expChanges_PosNodeXY[pos][nodeID][fatherState][sonState] - after simulations and postProb
+ VVVVdouble _jointProb_PosNodeXY; // probJoint_PosNodeXY[pos][nodeID][fatherState][sonState] - after computePosteriorOfChangeGivenTerminals
+
+};
+
+#endif
diff --git a/libs/phylogeny/computeUpAlg.cpp b/libs/phylogeny/computeUpAlg.cpp
new file mode 100644
index 0000000..fdbd062
--- /dev/null
+++ b/libs/phylogeny/computeUpAlg.cpp
@@ -0,0 +1,157 @@
+// $Id: computeUpAlg.cpp 5988 2009-03-18 18:20:05Z itaymay $
+
+#include "definitions.h"
+#include "computeUpAlg.h"
+#include "treeIt.h"
+#include "seqContainerTreeMap.h"
+#include "logFile.h"
+#include <iostream>
+#include <cassert>
+using namespace std;
+
+void computeUpAlg::fillComputeUp(const tree& et,
+ const sequenceContainer & sc,
+ const computePijGam& pi,
+ suffStatGlobalGam& ssc) {
+ computeUpAlg cupAlg;
+ ssc.allocatePlace(sc.seqLen(),pi.categories(),et.getNodesNum(),pi.alphabetSize());
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ for (int categor = 0; categor < pi.categories(); ++categor) {
+ cupAlg.fillComputeUp(et,sc,pos,pi[categor],ssc[pos][categor]);
+ }
+ }
+}
+
+void computeUpAlg::fillComputeUp(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc) {
+
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter;
+ if (mynode->isLeaf()) {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ }
+ else {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
+ prob += ssc.get(mynode->getSon(i)->id(), letInSon)*
+ pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
+ }
+ total_prob*=prob;
+ }
+ ssc.set(mynode->id(),letter,total_prob);
+ }
+ }
+ }
+}
+/*
+void computeUpAlg::fillComputeUp(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc) {
+
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter;
+ if (mynode->isLeaf()) {// leaf
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ }
+ else {
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ MDOUBLE total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ MDOUBLE prob=0.0;
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
+ sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new.
+ }
+ assert(prob>=0.0);
+ total_prob*=prob;
+ }
+ ssc.set(mynode->id(),letter,total_prob);
+ }
+ }
+ }
+}
+*/
+void computeUpAlg::fillComputeUpSpecificGlobalRate(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const MDOUBLE gRate) {
+ if (sp.categories() >1) {// because we do not multiply all branch lengths by the rate[categories])
+ errorMsg::reportError("the function fillComputeUpSpecificGlobalRate should not be used with a gamma model");
+ }
+
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+#ifdef VERBOS
+ LOG(15,<<endl<<endl<<"doing node: "<<mynode->name()<<endl);
+#endif
+ int letter;
+ if (mynode->isLeaf()) {
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ }
+ else {
+ int letterWithTotalProbEqZero =0;
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ assert(ssc.get(mynode->getSon(i)->id(),letInSon)>=0);
+ assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0);
+ prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
+ sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate);
+ }
+ assert(prob>=0.0);
+ total_prob*=prob;
+ }
+ if (total_prob==0.0) ++letterWithTotalProbEqZero;
+
+ ssc.set(mynode->id(),letter,total_prob);
+ } // end of else
+ if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) {
+ LOG(5,<<" total prob =0");
+ for (int z=0; z <mynode->getNumberOfSons(); ++z) {
+ LOG(5,<<"son "<<z<<" is "<<mynode->getSon(z)->name()<<endl);
+ LOG(5,<<"dis2father is "<<mynode->getSon(z)->dis2father()<<endl);
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ LOG(5,<<"let = "<<letInSon<<endl);
+ LOG(5,<<"ssc.get(mynode->getSon(z)->id(),letInSon) = "<<convert(ssc.get(mynode->getSon(z)->id(),letInSon))<<endl);
+ }
+ }
+ return;
+ }
+ }
+ }
+}
diff --git a/libs/phylogeny/computeUpAlg.h b/libs/phylogeny/computeUpAlg.h
new file mode 100644
index 0000000..000b2cc
--- /dev/null
+++ b/libs/phylogeny/computeUpAlg.h
@@ -0,0 +1,67 @@
+// $Id: computeUpAlg.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___COMPUTE_UP_ALG
+#define ___COMPUTE_UP_ALG
+
+#include "definitions.h"
+#include "tree.h"
+#include "suffStatComponent.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+
+
+class computeUpAlg {
+public:
+ void fillComputeUp(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc);
+
+ void fillComputeUp(const tree& et,
+ const sequenceContainer & sc,
+ const computePijGam& pi,
+ suffStatGlobalGam& ssc);
+
+ /*void fillComputeUp(const tree& et, // not to be used at all. problematic in case of a gamma function.
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc);*/
+
+ /*void fillComputeUp(const tree& et, // not to be used, accept for debuging (very slow func.)
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ suffStatGlobalGam& ssc);*/
+
+ void fillComputeUpSpecificGlobalRate(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const MDOUBLE gRate);
+
+// my attemp to add factors
+ void fillComputeUpWithFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ vector<MDOUBLE>& factors);
+ void fillComputeUpWithFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ vector<MDOUBLE>& factors);
+ void fillComputeUpSpecificGlobalRateFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const MDOUBLE gRate,
+ vector<MDOUBLE>& factors);
+};
+#endif
+
+
diff --git a/libs/phylogeny/computeUpAlgFactors.cpp b/libs/phylogeny/computeUpAlgFactors.cpp
new file mode 100644
index 0000000..829d951
--- /dev/null
+++ b/libs/phylogeny/computeUpAlgFactors.cpp
@@ -0,0 +1,190 @@
+// $Id: computeUpAlgFactors.cpp 1738 2007-02-26 13:49:16Z itaymay $
+
+#include "definitions.h"
+#include "computeUpAlg.h"
+#include "seqContainerTreeMap.h"
+#include "logFile.h"
+#include <iostream>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+using namespace std;
+
+void computeNodeFactorAndSetSsc(MDOUBLE & minFactor,suffStatGlobalHomPos& ssc, int nodeId, const int alphSize){
+ // given a number = probability (val), it is changed to a new number which is 10 to the power of factor + val.
+ // for example if val = 0.001, it is changed to 0.1 and factor 2.
+ minFactor=100000;
+ for (int i=0; i < alphSize; ++i) {
+ MDOUBLE tmpfactor=0;
+ doubleRep val = ssc.get(nodeId,i);
+ if (val >0) {
+ while (val < 0.1) {
+ val *=10;
+ tmpfactor++;
+ }
+ }
+ else tmpfactor=minFactor;
+ if (tmpfactor<minFactor) minFactor=tmpfactor;
+ }
+ for (int j=0; j < alphSize; ++j) {
+ doubleRep tmp = ssc.get(nodeId,j);
+ tmp = tmp * pow(static_cast<MDOUBLE>(10.0),minFactor);
+ ssc.set(nodeId,j,tmp);
+ }
+}
+
+void computeUpAlg::fillComputeUpWithFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ vector<MDOUBLE>& factors) {
+ factors.resize(et.getNodesNum(),0.0);
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter;
+ if (mynode->getNumberOfSons() == 0) {// leaf
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize());
+ }
+ else {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons(); ++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
+ prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
+ pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
+ }
+ total_prob*=prob;
+ }
+ ssc.set(mynode->id(),letter,total_prob);
+ }
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),pi.alphabetSize());
+ for(int k=0; k < mynode->getNumberOfSons();++k) {
+ factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
+ }
+ }
+ }
+}
+
+void computeUpAlg::fillComputeUpWithFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ vector<MDOUBLE>& factors) {
+ factors.resize(et.getNodesNum(),0.0);
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter;
+ if (mynode->getNumberOfSons() == 0) {// leaf
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
+ }
+ else {
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
+ sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*sp.getGlobalRate());// taking care of the glubal is new.
+ }
+ assert(prob>=0);
+ total_prob*=prob;
+ }
+ ssc.set(mynode->id(),letter,total_prob);
+ }
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
+ for(int k=0; k < mynode->getNumberOfSons();++k) {
+ factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
+ }
+ }
+ }
+}
+
+void computeUpAlg::fillComputeUpSpecificGlobalRateFactors(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const stochasticProcess& sp,
+ suffStatGlobalHomPos& ssc,
+ const MDOUBLE gRate,
+ vector<MDOUBLE>& factors) {
+ factors.resize(et.getNodesNum(),0.0);
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),sp.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+#ifdef VERBOS
+ LOG(5,<<endl<<endl<<"doing node: "<<mynode->name()<<endl);
+#endif
+ int letter;
+ if (mynode->getNumberOfSons() == 0) {// leaf
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ doubleRep val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
+ }
+ else {
+ int letterWithTotalProbEqZero =0;
+ for(letter=0; letter<sp.alphabetSize();letter++) {
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ assert(ssc.get(mynode->getSon(i)->id(),letInSon)>=0);
+ assert(sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate)>=0);
+ prob += ssc.get(mynode->getSon(i)->id(),letInSon)*
+ sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate);
+ }
+ assert(prob>=0);
+ total_prob*=prob;
+ }
+ if (total_prob ==0) ++letterWithTotalProbEqZero;
+
+ ssc.set(mynode->id(),letter,total_prob);
+ } // end of else
+ computeNodeFactorAndSetSsc(factors[mynode->id()],ssc,mynode->id(),sp.alphabetSize());
+ for(int k=0; k < mynode->getNumberOfSons();++k) {
+ factors[mynode->id()]+=factors[mynode->getSon(k)->id()];
+ }
+ if (letterWithTotalProbEqZero == sp.alphabetSize() && (mynode->getNumberOfSons() > 0)) {
+ LOG(5,<<" total prob =0");
+ for (int z=0; z <mynode->getNumberOfSons(); ++z) {
+ LOG(5,<<"son "<<z<<" is "<<mynode->getSon(z)->name()<<endl);
+ LOG(5,<<"dis2father is "<<mynode->getSon(z)->dis2father()<<endl);
+ for(int letInSon=0; letInSon<sp.alphabetSize();letInSon++) {
+ LOG(5,<<"let = "<<letInSon<<endl);
+ LOG(5,<<"ssc.get(mynode->sons[z]->id(),letInSon) = "<<convert(ssc.get(mynode->getSon(z)->id(),letInSon))<<endl);
+// LOG(5,<<"sp.Pij_t(letter,letInSon,mynode->getSon(i)->dis2father()*gRate) = "<<sp.Pij_t(letter,letInSon,mynode->sons[i]->dis2father()*gRate)<<endl);
+// LOG(5,<<"mynode->getSon(i)->dis2father() = "<<mynode->getSon(i)->dis2father()<<endl);
+
+
+
+
+
+ }
+ }
+ exit(3);
+ }
+ }
+ }
+}
diff --git a/libs/phylogeny/countTableComponent.cpp b/libs/phylogeny/countTableComponent.cpp
new file mode 100644
index 0000000..d77caa4
--- /dev/null
+++ b/libs/phylogeny/countTableComponent.cpp
@@ -0,0 +1,35 @@
+// $Id: countTableComponent.cpp 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#include "countTableComponent.h"
+#include "logFile.h"
+
+void countTableComponentHom::zero() {
+ for (int i=0; i < _countValues.size() ;++i) {
+ for (int j=0; j < _countValues[0].size() ;++j) {
+ _countValues[i][j] = 0;
+ }
+ }
+}
+
+void countTableComponentHom::countTableComponentAllocatePlace(
+ const int alphabetSize) {
+ int i;
+ _countValues.resize(alphabetSize);
+ for (i=0; i < alphabetSize;++i) _countValues[i].resize(alphabetSize);
+}
+
+void countTableComponentHom::printTable(ostream& out) const {
+ MDOUBLE sumCheck = 0.0;
+ for (int i=0; i < _countValues.size();++i) {
+ for (int k=0; k < _countValues.size();++k) {
+ out<<"counts["<<i<<"]["<<k<<"]"<<_countValues[i][k];
+ sumCheck += _countValues[i][k];
+ out<<endl;
+ }
+ }
+ out<<"sum is: "<<sumCheck<<endl;
+}
+
diff --git a/libs/phylogeny/countTableComponent.h b/libs/phylogeny/countTableComponent.h
new file mode 100644
index 0000000..6f41bea
--- /dev/null
+++ b/libs/phylogeny/countTableComponent.h
@@ -0,0 +1,84 @@
+// $Id: countTableComponent.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___COUNT_TABLE_COMPONENT
+#define ___COUNT_TABLE_COMPONENT
+
+#include "definitions.h"
+#include <cassert>
+
+class countTableComponentHom{
+public:
+
+ void setCount( const int letter1,
+ const int letter2,
+ const MDOUBLE val) {
+ _countValues[letter1][letter2]=val;
+ }
+ int alphabetSize() const {return _countValues.size();}
+ void zero();
+ MDOUBLE getCounts( const int letter1,
+ const int letter2) const {
+ return _countValues[letter1][letter2];
+ }
+ void addToCounts(const int let1,const int let2,const MDOUBLE val) {
+ _countValues[let1][let2]+=val;
+ }
+ bool isEmpty (){return (_countValues.empty());};
+ void countTableComponentAllocatePlace(const int alphabetSize);
+ void printTable(ostream & out) const;
+private:
+ VVdouble _countValues;//letter1,letter2
+
+};
+
+class countTableComponentGam{
+public:
+
+ void setCount( const int letter1,
+ const int letter2,
+ const int rateCategor,
+ const MDOUBLE val) {
+ _countValues[rateCategor].setCount(letter1,letter2,val);
+ }
+
+ int alphabetSize() const {return _countValues.empty()?0:_countValues[0].alphabetSize();}
+ void zero(){
+ for (int i=0; i < _countValues.size(); ++i ) _countValues[i].zero();
+ }
+
+
+ MDOUBLE getCounts( const int letter1,
+ const int letter2,
+ const int rateCategor) const {
+ assert(_countValues[rateCategor].getCounts(letter1,letter2)>=0);
+ return _countValues[rateCategor].getCounts(letter1,letter2);
+ }
+
+ void addToCounts(const int let1,const int let2,
+ const int rate,const MDOUBLE val) {
+ _countValues[rate].addToCounts(let1,let2,val);
+ }
+
+ bool isEmpty (){return (_countValues.empty());};
+
+ void countTableComponentAllocatePlace(const int alphabetSize,
+ const int numberOfrateCategories) {
+ _countValues.resize(numberOfrateCategories);
+ for (int i=0; i < _countValues.size(); ++i ){
+ _countValues[i].countTableComponentAllocatePlace(alphabetSize);
+ }
+ }
+ void printTable(ostream & out) const {
+ for (int i=0; i < _countValues.size(); ++i) {
+ _countValues[i].printTable(out);
+ }
+ }
+ countTableComponentHom& operator[] (int i) {return _countValues[i];}
+ const countTableComponentHom& operator[] (int i) const {return _countValues[i];}
+private:
+ vector<countTableComponentHom> _countValues;//letter1,letter2,rateCategor
+
+};
+
+#endif
+
diff --git a/libs/phylogeny/cpREV45.dat.q b/libs/phylogeny/cpREV45.dat.q
new file mode 100644
index 0000000..60adfe2
--- /dev/null
+++ b/libs/phylogeny/cpREV45.dat.q
@@ -0,0 +1,22 @@
+" 105 "
+" 227 357 "
+" 175 43 4435 "
+" 669 823 538 10 "
+" 157 1745 768 400 10 "
+" 499 152 1055 3691 10 3122 "
+" 665 243 653 431 303 133 379 "
+" 66 715 1405 331 441 1269 162 19 "
+" 145 136 168 10 280 92 148 40 29 "
+" 197 203 113 10 396 286 82 20 66 1745 "
+" 236 4482 2430 412 48 3313 2629 263 305 345 218 "
+" 185 125 61 47 159 202 113 21 10 1772 1351 193 "
+" 68 53 97 22 726 10 145 25 127 454 1268 72 327 "
+" 490 87 173 170 285 323 185 28 152 117 219 302 100 43 "
+" 2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202 "
+" 1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151 "
+" 14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29 "
+" 56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346 "
+" 968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119 "
+" 0.076 0.062 0.041 0.037 0.009 0.038 0.049 0.084 0.025 0.081 "
+" 0.101 0.050 0.022 0.051 0.043 0.062 0.054 0.018 0.031 0.066 "
+" cpREV45 model "
diff --git a/libs/phylogeny/datMatrixHolder.cpp b/libs/phylogeny/datMatrixHolder.cpp
new file mode 100644
index 0000000..6043e7f
--- /dev/null
+++ b/libs/phylogeny/datMatrixHolder.cpp
@@ -0,0 +1,32 @@
+// $Id: datMatrixHolder.cpp 5804 2009-01-20 09:18:05Z adido $
+
+#include "datMatrixHolder.h"
+
+const datMatrixString datMatrixHolder::cpREV45(
+#include "cpREV45.dat.q"
+);
+const datMatrixString datMatrixHolder::dayhoff(
+#include "dayhoff.dat.q"
+);
+const datMatrixString datMatrixHolder::jones(
+#include "jones.dat.q"
+);
+const datMatrixString datMatrixHolder::mtREV24(
+#include "mtREV24.dat.q"
+);
+const datMatrixString datMatrixHolder::wag(
+#include "wag.dat.q"
+);
+const datMatrixString datMatrixHolder::HIVb(
+#include "HIVb.dat.q"
+);
+const datMatrixString datMatrixHolder::HIVw(
+#include "HIVw.dat.q"
+);
+const datMatrixString datMatrixHolder::empiriCodon(
+#include "adrianCodon.dat.q"
+);
+const datMatrixString datMatrixHolder::lg(
+#include "LG.dat.q"
+);
+
diff --git a/libs/phylogeny/datMatrixHolder.h b/libs/phylogeny/datMatrixHolder.h
new file mode 100644
index 0000000..0c21cf9
--- /dev/null
+++ b/libs/phylogeny/datMatrixHolder.h
@@ -0,0 +1,31 @@
+// $Id: datMatrixHolder.h 5804 2009-01-20 09:18:05Z adido $
+
+#ifndef ___DATMATRIXHOLDER
+#define ___DATMATRIXHOLDER
+
+#include <string>
+using namespace std;
+
+// THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX
+// THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES.
+
+class datMatrixString {
+public:
+ const string Val;
+ explicit datMatrixString(const char * str): Val(str){};
+};
+
+class datMatrixHolder {
+public:
+ static const datMatrixString cpREV45;
+ static const datMatrixString dayhoff;
+ static const datMatrixString jones; // This is JTT
+ static const datMatrixString mtREV24;
+ static const datMatrixString wag;
+ static const datMatrixString HIVb;
+ static const datMatrixString HIVw;
+ static const datMatrixString lg;
+ static const datMatrixString empiriCodon; //This is the empirical matrix for codon by gina and adrian
+};
+
+#endif // ___DATMATRIXHOLDER
diff --git a/libs/phylogeny/dayhoff.dat.q b/libs/phylogeny/dayhoff.dat.q
new file mode 100644
index 0000000..a57a8c6
--- /dev/null
+++ b/libs/phylogeny/dayhoff.dat.q
@@ -0,0 +1,79 @@
+" 27 "
+" 98 32 "
+" 120 0 905 "
+" 36 23 0 0 "
+" 89 246 103 134 0 "
+" 198 1 148 1153 0 716 "
+" 240 9 139 125 11 28 81 "
+" 23 240 535 86 28 606 43 10 "
+" 65 64 77 24 44 18 61 0 7 "
+" 41 15 34 0 0 73 11 7 44 257 "
+" 26 464 318 71 0 153 83 27 26 46 18 "
+" 72 90 1 0 0 114 30 17 0 336 527 243 "
+" 18 14 14 0 0 0 0 15 48 196 157 0 92 "
+" 250 103 42 13 19 153 51 34 94 12 32 33 17 11 "
+" 409 154 495 95 161 56 79 234 35 24 17 96 62 46 245 "
+" 371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550 "
+" 0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0 "
+" 24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61 "
+" 208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28 "
+" 0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530 "
+" 0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772 "
+" 0.050680 0.069577 0.058542 0.010494 0.029916 0.064718 "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" S_ij = S_ji and PI_i for the Dayhoff model, with the rate Q_ij=S_ij*PI_j "
+" The rest of the file is not used. "
+" Prepared by Z. Yang, March 1995. "
+" See the following reference for notation used here: "
+" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
+" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "
+" ----------------------------------------------------------------------- "
+" "
+" 30 "
+" 109 17 "
+" 154 0 532 "
+" 33 10 0 0 "
+" 93 120 50 76 0 "
+" 266 0 94 831 0 422 "
+" 579 10 156 162 10 30 112 "
+" 21 103 226 43 10 243 23 10 "
+" 66 30 36 13 17 8 35 0 3 "
+" 95 17 37 0 0 75 15 17 40 253 "
+" 57 477 322 85 0 147 104 60 23 43 39 "
+" 29 17 0 0 0 20 7 7 0 57 207 90 "
+" 20 7 7 0 0 0 0 17 20 90 167 0 17 "
+" 345 67 27 10 10 93 40 49 50 7 43 43 4 7 "
+" 772 137 432 98 117 47 86 450 26 20 32 168 20 40 269 "
+" 590 20 169 57 10 37 31 50 14 129 52 200 28 10 73 696 "
+" 0 27 3 0 0 0 0 0 3 0 13 0 0 10 0 17 0 "
+" 20 3 36 0 30 0 10 0 40 13 23 10 0 260 0 22 23 6 "
+" 365 20 13 17 33 27 37 97 30 661 303 17 77 10 50 43 186 0 17 "
+" A R N D C Q E G H I L K M F P S T W Y V "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" Accepted point mutations (x10) Figure 80 (Dayhoff 1978) "
+" ------------------------------------------------------- "
+" A 100 /* Ala */ A 0.087 /* Ala */ "
+" R 65 /* Arg */ R 0.041 /* Arg */ "
+" N 134 /* Asn */ N 0.040 /* Asn */ "
+" D 106 /* Asp */ D 0.047 /* Asp */ "
+" C 20 /* Cys */ C 0.033 /* Cys */ "
+" Q 93 /* Gln */ Q 0.038 /* Gln */ "
+" E 102 /* Glu */ E 0.050 /* Glu */ "
+" G 49 /* Gly */ G 0.089 /* Gly */ "
+" H 66 /* His */ H 0.034 /* His */ "
+" I 96 /* Ile */ I 0.037 /* Ile */ "
+" L 40 /* Leu */ L 0.085 /* Leu */ "
+" K 56 /* Lys */ K 0.081 /* Lys */ "
+" M 94 /* Met */ M 0.015 /* Met */ "
+" F 41 /* Phe */ F 0.040 /* Phe */ "
+" P 56 /* Pro */ P 0.051 /* Pro */ "
+" S 120 /* Ser */ S 0.070 /* Ser */ "
+" T 97 /* Thr */ T 0.058 /* Thr */ "
+" W 18 /* Trp */ W 0.010 /* Trp */ "
+" Y 41 /* Tyr */ Y 0.030 /* Tyr */ "
+" V 74 /* Val */ V 0.065 /* Val */ "
+" scale factor = SUM_OF_PRODUCT = 75.246 "
+" Relative Mutability The equilibrium freqs. "
+" (Table 21) Table 22 "
+" (Dayhoff 1978) Dayhoff (1978) "
+" ---------------------------------------------------------------- "
diff --git a/libs/phylogeny/definitions.h b/libs/phylogeny/definitions.h
new file mode 100644
index 0000000..ad4e94f
--- /dev/null
+++ b/libs/phylogeny/definitions.h
@@ -0,0 +1,83 @@
+// $Id: definitions.h 4452 2008-07-17 14:23:41Z cohenofi $
+
+#ifndef ___DEFINITIONS_H
+#define ___DEFINITIONS_H
+
+#ifdef _MSC_VER
+#define LIMITS_WORKING
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4786)
+#pragma warning (disable: 4267)
+#pragma warning (disable: 4018)
+#pragma warning (disable: 4305) //truncation from 'double' to 'float'
+#endif
+
+
+#include <vector>
+#include <string>
+
+#ifdef LIMITS_WORKING
+ #include <limits>
+#endif
+using namespace std;
+
+#define MDOUBLE double
+//#define MDOUBLE float
+
+typedef vector<MDOUBLE> Vdouble;
+typedef vector<int> Vint;
+typedef vector<Vint> VVint;
+typedef vector<VVint> VVVint;
+typedef vector<char> Vchar;
+typedef vector<Vdouble> VVdouble;
+typedef vector<VVdouble> VVVdouble;
+typedef vector<VVVdouble> VVVVdouble;
+typedef vector<VVVVdouble> VVVVVdouble;
+typedef vector<string> Vstring;
+
+#ifdef LIMITS_WORKING
+ const MDOUBLE VERYBIG = numeric_limits<MDOUBLE>::max();
+ const MDOUBLE VERYSMALL = -VERYBIG;
+ const MDOUBLE EPSILON = numeric_limits<MDOUBLE>::epsilon();
+#else
+// IF <limits> is not recognized, and MDOUBLE is double.
+ const MDOUBLE VERYBIG = 1.79769e+308;
+ const MDOUBLE VERYSMALL = -VERYBIG;
+ const MDOUBLE EPSILON = 2.22045e-016;
+#endif
+
+//The maximum value for type float is: 3.40282e+038
+//The maximum value for type double is: 1.79769e+308
+//::epsilon() returns the difference between 1 and the smallest value greater than 1 that is representable for the data type.
+//epsilon float 1.19209e-007
+//epsilon double 2.22045e-016
+
+#ifdef LOGREP
+ class logRep;
+ typedef vector<logRep> VlogRep;
+ typedef vector <vector<logRep> > VVlogRep;
+ typedef logRep doubleRep;
+ typedef VlogRep VdoubleRep;
+ typedef VVlogRep VVdoubleRep;
+ #include "logRep.h"
+#elif defined (DOUBLEREP)
+ class doubleRepMantisa;
+ typedef vector<doubleRepMantisa> VdoubleRepMantisa;
+ typedef vector <vector<doubleRepMantisa> > VVdoubleRepMantisa;
+ typedef vector <VVdoubleRepMantisa > VVVdoubleRepMantisa;
+ typedef doubleRepMantisa doubleRep;
+ typedef VdoubleRepMantisa VdoubleRep;
+ typedef VVdoubleRepMantisa VVdoubleRep;
+ #include "doubleRep.h"
+#else
+ typedef MDOUBLE doubleRep;
+ typedef Vdouble VdoubleRep;
+ typedef VVdouble VVdoubleRep;
+ inline MDOUBLE convert (MDOUBLE d) {return (d);}
+#endif
+
+#endif
+
+
diff --git a/libs/phylogeny/distanceBasedSeqs2Tree.cpp b/libs/phylogeny/distanceBasedSeqs2Tree.cpp
new file mode 100644
index 0000000..43898b1
--- /dev/null
+++ b/libs/phylogeny/distanceBasedSeqs2Tree.cpp
@@ -0,0 +1,554 @@
+// $Id: distanceBasedSeqs2Tree.cpp 6002 2009-03-20 19:39:03Z privmane $
+
+#include "distanceBasedSeqs2Tree.h"
+#include "uniDistribution.h"
+#include "distanceTable.h"
+#include "bestAlpha.h"
+#include "siteSpecificRate.h"
+#include "someUtil.h"
+#include "bblEM.h"
+#include "tamura92.h"
+#include "bestTamura92param.h"
+#include "bestGtrModelParams.h"
+#include <float.h>
+#include "replacementModelSSRV.h"
+#include "trivialAccelerator.h"
+
+// **********************************************************************
+// *** The basic non-iterative versions *********************************
+// **********************************************************************
+
+tree distanceBasedSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+
+ // Calculate distance table
+ tree et;
+ VVdouble distTable;
+ vector<string> vNames;
+ giveDistanceTable(_distM,sc,distTable,vNames,_weights);
+
+ // Build tree from the distance table
+ et = _dist2et->computeTree(distTable, vNames, _constraintTreePtr);
+
+ LOG(6,<<"# distanceBasedSeqs2Tree::seqs2Tree: The reconsructed tree:"<<endl);
+ LOGDO(6,et.output(myLog::LogFile()));
+
+ return et;
+}
+
+tree distanceBasedSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ return seqs2Tree(sc, weights, constraintTreePtr);
+}
+
+// **********************************************************************
+// *** iterativeDistanceSeqs2Tree ***************************************
+// **********************************************************************
+
+iterativeDistanceSeqs2Tree::iterativeDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights,
+ const MDOUBLE epsilonLikelihoodImprovement,
+ const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL,
+ const int maxIterationsBBL)
+ : distanceBasedSeqs2Tree(distM, dist2et, weights),
+ _epsilonLikelihoodImprovement ( epsilonLikelihoodImprovement ),
+ _epsilonLikelihoodImprovement4alphaOptimiz( epsilonLikelihoodImprovement4alphaOptimiz),
+ _epsilonLikelihoodImprovement4BBL ( epsilonLikelihoodImprovement4BBL ),
+ _maxIterationsBBL ( maxIterationsBBL )
+{
+ // Check that the stochasticProcess in likeDist is not const
+ if (distM.isTheInternalStochasticProcessConst()) {
+ errorMsg::reportError("iterativeDistanceSeqs2Tree::iterativeDistanceSeqs2Tree: The stochasticProcess in the given likeDist object is const. A non-const stochasticProcess is required.");
+ }
+
+ // Keep a pointer to the stochasticProcess in distM, so that we will be able to change its alpha, etc.
+ _spPtr = &(distM.getNonConstStochasticProcess());
+ if (_spPtr->categories() >1)
+ _alpha = (static_cast<gammaDistribution*>(_spPtr->distr()))->getAlpha();
+ else
+ _alpha=-99.9; // this should never be used
+
+}
+
+// *** Iterative tree building ******************************************
+tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven) {
+ LOGDO(3,printTime(myLog::LogFile()));
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternal:"<<endl<<"# Initial tree:"<<endl);
+ seqs2TreeOneIterationInternal(sc, initSideInfoGiven);
+
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, true, _newTree, _newAlpha);
+}
+
+// *** Iterative tree building, given an initial tree and alpha *********
+// *** Optimize branch lengths and sideInfo for the given tree topology
+tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, const tree &initTree) {
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven: Started optimizeSideInfo. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+ _newTree=initTree;
+ _newTreeLogLikelihood=optimizeSideInfo(sc, _newTree);
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven: Finished optimizeSideInfo. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, true, _newTree, _newAlpha);
+}
+
+// *** Iterative tree building, given an initial tree and alpha *********
+// *** If sideInfo is not given - calculate it for the fixed tree and alpha
+tree iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, bool initSideInfoGiven, const tree &initTree, MDOUBLE initAlpha) {
+ _newTree=initTree;
+ _newAlpha=initAlpha;
+
+ LOGDO(3,printTime(myLog::LogFile()));
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven"<<endl);
+ if (!initSideInfoGiven) {
+ _newTreeLogLikelihood=calcSideInfoGivenTreeAndAlpha(sc, initTree, initAlpha);
+ }
+ int iterationNum = 0;
+ LOGDO(3,printTime(myLog::LogFile()));
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"<<endl<<"# The given initial tree:"<<endl);
+ LOGDO(3,_newTree.output(myLog::LogFile()));
+
+ do {
+ ++iterationNum;
+ LOGDO(5,printTime(myLog::LogFile()));
+ LOG(3,<<"# Iteration "<<iterationNum<<":"<<endl);
+
+ // save the best tree so far, and its likelihood and the sideInfo that was calculated for it
+ _et=_newTree;
+ _treeLogLikelihood=_newTreeLogLikelihood;
+ acceptSideInfo();
+ LOG(7,<<"# Side info for the tree"<<endl);
+ LOGDO(7,printSideInfo(myLog::LogFile()));
+
+ seqs2TreeOneIterationInternal(sc, true);
+
+ } while (_newTreeLogLikelihood > _treeLogLikelihood + _epsilonLikelihoodImprovement);
+
+ LOGDO(3,printTime(myLog::LogFile()));
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeIterativeInternalInitTreeGiven:"<<endl<<"# Finished iterative distance-based tree reconstruction, done "<<iterationNum<<" iterations"<<endl);
+ return _et;
+}
+
+// *** Tree building procedure that is called iteratively **********************
+void iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal(const sequenceContainer &sc, const bool sideInfoSet) {
+
+ // 1. Calculate distance table
+ VVdouble distTable;
+ vector<string> vNames;
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Started giveDistanceTable. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+ if (!sideInfoSet) { // Then use homogeneous rates
+
+ // Create homogeneous likeDist
+ _alpha = 1.5; // Since no ASRV side info is known yet, we set an initial alpha for bestAlphaAndBBL optimizations
+ uniDistribution distribution;
+ stochasticProcess* uniDistSp = NULL;
+ replacementModelSSRV* rmSSRV =
+ dynamic_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel());
+ if (!rmSSRV) {
+ uniDistSp = new stochasticProcess(&distribution, _spPtr->getPijAccelerator());
+ } else {
+ trivialAccelerator pijAcc(rmSSRV->getBaseRM());
+ uniDistSp = new stochasticProcess(&distribution, &pijAcc);
+ }
+ likeDist homogeneousDist(*uniDistSp,static_cast<likeDist*>(_distM)->getToll());
+
+ giveDistanceTable(&homogeneousDist,sc,distTable,vNames,_weights);
+ delete uniDistSp;
+
+ } else { // use the side information
+ utilizeSideInfo();
+ giveDistanceTable(_distM,sc,distTable,vNames,_weights);
+ }
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished giveDistanceTable, started distances2Tree::computeTree. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+
+ // 2. Build tree from the distance table
+ _newTree = _dist2et->computeTree(distTable, vNames, _constraintTreePtr);
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::computeTree, started optimizeSideInfo. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+
+ // 3. Optimize branch lengths and side info for the tree topology
+ _newTreeLogLikelihood=optimizeSideInfo(sc, _newTree);
+ LOG(7,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal: Finished distances2Tree::optimizeSideInfo. ");
+ LOGDO(7,printTime(myLog::LogFile()));
+
+ if (!sideInfoSet) {
+ LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"<<endl<<"# Homogeneous rates tree"<<endl);
+ } else {
+ LOG(5,<<"# iterativeDistanceSeqs2Tree::seqs2TreeOneIterationInternal:"<<endl<<"# Tree based on alpha"<<endl);
+ }
+ LOGDO(5,_newTree.output(myLog::LogFile()));
+ LOG(5,<<"# Log likelihood:"<<endl<<_newTreeLogLikelihood<<endl);
+}
+
+// Perform one bootstrap iteration, assuming that side info has been set (as if acceptSideInfo has been called)
+tree iterativeDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrap: Started a single bootstrap iteration. ");
+ LOGDO(3,printTime(myLog::LogFile()));
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+
+ // Calculate distance table
+ tree localScopeEt;
+ VVdouble distTable;
+ vector<string> vNames;
+ utilizeSideInfo();
+ giveDistanceTable(_distM,sc,distTable,vNames,_weights);
+
+ // Build tree from the distance table
+ localScopeEt = _dist2et->computeTree(distTable,vNames, _constraintTreePtr);
+
+ LOG(3,<<"# iterativeDistanceSeqs2Tree::seqs2TreeBootstrapInternal:"<<endl<<"# Bootstrap tree based on alpha, without optimizations"<<endl);
+ LOGDO(3,localScopeEt.output(myLog::LogFile()));
+
+ return localScopeEt;
+}
+
+/********************************
+ * commonAlphaDistanceSeqs2Tree *
+ ********************************/
+tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _alpha = initAlpha;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, true);
+}
+
+tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, false);
+}
+
+tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
+}
+
+tree commonAlphaDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _alpha = initAlpha;
+ _weights = weights;
+
+ _constraintTreePtr=constraintTreePtr;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
+}
+
+// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _alpha = alpha;
+ _constraintTreePtr=constraintTreePtr;
+ seqs2TreeOneIterationInternal(sc, true);
+ return _newTree;
+}
+
+tree commonAlphaDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _alpha = alpha;
+ return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
+}
+
+// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+tree commonAlphaDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ return seqs2TreeIterative(sc,weights,constraintTreePtr);
+}
+
+MDOUBLE commonAlphaDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
+{
+ if (dynamic_cast<tamura92*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
+ // Optimizing params of the tamura92 model
+ bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4BBL/*0.01*/,
+ 5.0, _maxIterationsBBL, _alpha, 5.0 );
+ _newAlpha=optimizer.getBestAlpha();
+ return(optimizer.getBestL());
+
+ } else if (dynamic_cast<gtrModel*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
+ // Optimizing params of the gtr model
+ bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5,
+ _epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4alphaOptimiz,
+ true, true);
+ _newAlpha=optimizer.getBestAlpha();
+ return(optimizer.getBestL());
+
+ } else {
+ bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0,
+ _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz,
+ _maxIterationsBBL);
+ _newAlpha=optimizer.getBestAlpha();
+ return(optimizer.getBestL());
+ }
+}
+
+MDOUBLE commonAlphaDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
+{
+ _newAlpha = alpha;
+ (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
+ return likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(et, sc, *_spPtr, _weights);
+}
+
+void commonAlphaDistanceSeqs2Tree::acceptSideInfo()
+{
+ _alpha = _newAlpha;
+}
+
+void commonAlphaDistanceSeqs2Tree::utilizeSideInfo()
+{
+ // set new alpha value in the sp that is used in _distM
+ (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
+ LOG(10,<<"# utilizing alpha"<<endl<<_alpha<<endl<<endl);
+
+}
+
+void commonAlphaDistanceSeqs2Tree::printSideInfo(ostream& out) const
+{
+ out<<"Alpha: "<<_alpha<<endl;
+}
+
+// non virtual
+void commonAlphaDistanceSeqs2Tree::setSideInfo(const MDOUBLE alpha)
+{
+ _alpha=alpha;
+}
+
+MDOUBLE commonAlphaDistanceSeqs2Tree::getSideInfo() const
+{
+ return _alpha;
+}
+
+/******************************
+ * rate4siteDistanceSeqs2Tree *
+ ******************************/
+tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble &initRates, const Vdouble *weights, const tree* constraintTreePtr) {
+ _rates = initRates;
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, true);
+}
+
+tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, false);
+}
+
+tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
+}
+
+tree rate4siteDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
+}
+
+// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _rates = rates;
+ _constraintTreePtr=constraintTreePtr;
+
+ seqs2TreeOneIterationInternal(sc, true);
+ return _newTree;
+}
+
+tree rate4siteDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _rates = rates;
+ return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
+}
+
+// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+tree rate4siteDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ return seqs2TreeIterative(sc,weights,constraintTreePtr);
+}
+
+MDOUBLE rate4siteDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
+{
+ bblEM optimizer(et, sc, *_spPtr, _weights, _maxIterationsBBL, _epsilonLikelihoodImprovement4BBL);
+
+ // Note: this verstion of ML rates computation can only use a uniDistribution stochasticProcess
+ Vdouble likelihoods;
+ MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement);
+ //computeEB_EXP_siteSpecificRate
+ return(treeLogLikelihood);
+}
+
+MDOUBLE rate4siteDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
+{
+ _newAlpha = alpha;
+ Vdouble likelihoods;
+ MDOUBLE treeLogLikelihood = computeML_siteSpecificRate(_newRates, likelihoods, sc, *_spPtr, et,20,_epsilonLikelihoodImprovement);
+ //computeEB_EXP_siteSpecificRate
+ return(treeLogLikelihood);
+}
+
+void rate4siteDistanceSeqs2Tree::acceptSideInfo()
+{
+ _alpha = _newAlpha;
+ _rates = _newRates;
+}
+
+void rate4siteDistanceSeqs2Tree::utilizeSideInfo()
+{
+ (static_cast<givenRatesMLDistance*>(_distM))->setRates(_rates);
+ LOG(10,<<"# utilizing rates"<<endl<<_rates<<endl<<endl);
+
+ // set new alpha value in the sp that is used in _distM
+ // (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
+}
+
+void rate4siteDistanceSeqs2Tree::printSideInfo(ostream& out) const
+{
+ if (_rates.size())
+ out<<"ML rates: "<<_rates<<endl;
+}
+
+// non virtual
+void rate4siteDistanceSeqs2Tree::setSideInfo(const Vdouble &rates)
+{
+ _rates = rates;
+}
+
+const Vdouble& rate4siteDistanceSeqs2Tree::getSideInfo() const
+{
+ return _rates;
+}
+
+/******************************
+ * posteriorDistanceSeqs2Tree *
+ ********************************/
+tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights, const tree* constraintTreePtr) {
+ _alpha = initAlpha;
+ _posterior = initPosterior;
+ _weights = weights;
+ _constraintTreePtr=constraintTreePtr;
+ return seqs2TreeIterativeInternal(sc, true);
+}
+
+tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, false);
+}
+
+tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
+}
+
+tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
+}
+
+tree posteriorDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights, const tree* constraintTreePtr) {
+ _alpha = initAlpha;
+ _posterior = initPosterior;
+ _weights = weights;
+ _constraintTreePtr=constraintTreePtr;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
+}
+
+// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _posterior = posterior;
+ _constraintTreePtr=constraintTreePtr;
+ seqs2TreeOneIterationInternal(sc, true);
+ return _newTree;
+}
+
+tree posteriorDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _posterior = posterior;
+ return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
+}
+
+// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+tree posteriorDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ return seqs2TreeIterative(sc, weights, constraintTreePtr);
+}
+
+MDOUBLE posteriorDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
+{
+ if (dynamic_cast<tamura92*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
+ // Optimizing params of the tamura92 model
+ bestTamura92ParamAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, 5, _epsilonLikelihoodImprovement/*0.05*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4alphaOptimiz/*0.01*/,
+ _epsilonLikelihoodImprovement4BBL/*0.01*/,
+ 5.0, _maxIterationsBBL, _alpha, 5.0 );
+ _newAlpha=optimizer.getBestAlpha();
+ return(optimizer.getBestL());
+
+ } else if (dynamic_cast<gtrModel*>(_spPtr->getPijAccelerator()->getReplacementModel())) {
+ // Optimizing params of the gtr model
+ bestGtrModel optimizer(et, sc, *_spPtr, _weights, 5,
+ _epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4alphaOptimiz,
+ true, true);
+ _newAlpha=optimizer.getBestAlpha();
+ return(optimizer.getBestL());
+
+ } else {
+ bestAlphaAndBBL optimizer(et, sc, *_spPtr, _weights, _alpha, 5.0,
+ _epsilonLikelihoodImprovement4BBL/*0.01*/, _epsilonLikelihoodImprovement4alphaOptimiz,
+ _maxIterationsBBL);
+ _newAlpha=optimizer.getBestAlpha(); // cached only to make alpha optimization faster
+ }
+
+ // Compute posterior probabilities of rates per site
+ return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior);
+}
+
+MDOUBLE posteriorDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
+{
+ _newAlpha = alpha;
+ (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
+ // Compute posterior probabilities of rates per site
+ return likelihoodComputation::getPosteriorOfRates(et, sc, *_spPtr, _newPosterior);
+}
+
+void posteriorDistanceSeqs2Tree::acceptSideInfo()
+{
+ _alpha = _newAlpha;
+ _posterior = _newPosterior;
+}
+
+void posteriorDistanceSeqs2Tree::utilizeSideInfo()
+{
+ (static_cast<posteriorDistance*>(_distM))->setPosterior(_posterior);
+ LOG(10,<<"# utilizing posterior"<<endl<<_posterior<<endl<<endl);
+ // set new alpha value in the sp that is used in _distM
+ // (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
+}
+
+void posteriorDistanceSeqs2Tree::printSideInfo(ostream& out) const
+{
+ if (_posterior.size())
+ out<<_posterior<<endl;
+}
+
+// non virtual
+void posteriorDistanceSeqs2Tree::setSideInfo(const VVdoubleRep &posterior)
+{
+ _posterior = posterior;
+}
+
+const VVdoubleRep& posteriorDistanceSeqs2Tree::getSideInfo() const
+{
+ return _posterior;
+}
+
diff --git a/libs/phylogeny/distanceBasedSeqs2Tree.h b/libs/phylogeny/distanceBasedSeqs2Tree.h
new file mode 100644
index 0000000..9f84bfc
--- /dev/null
+++ b/libs/phylogeny/distanceBasedSeqs2Tree.h
@@ -0,0 +1,195 @@
+// $Id: distanceBasedSeqs2Tree.h 5989 2009-03-19 09:27:26Z privmane $
+
+#ifndef ___DISTANCE_BASED_SEQS2TREE
+#define ___DISTANCE_BASED_SEQS2TREE
+
+#include "distanceMethod.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "likeDist.h"
+#include "distances2Tree.h"
+#include "givenRatesMLDistance.h"
+#include "posteriorDistance.h"
+#include "float.h"
+
+// NOTE: These modules take sequenceContainer as argument, and do not
+// manipulate it. If you want to take care of gaps do it yourself!
+class distanceBasedSeqs2Tree {
+public:
+ distanceBasedSeqs2Tree(distanceMethod &distM, distances2Tree &dist2et, const Vdouble *weights = NULL)
+ : _distM(distM.clone()), _dist2et(dist2et.clone()), _weights(weights), _treeLogLikelihood(VERYBIG) {}
+ virtual ~distanceBasedSeqs2Tree() {delete (_distM);delete (_dist2et);}
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // Does one bootstrap iteration
+ virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual MDOUBLE getLogLikelihood() {return _treeLogLikelihood;}
+
+protected:
+ distanceMethod *_distM;
+ distances2Tree *_dist2et;
+ const Vdouble * _weights;
+ MDOUBLE _treeLogLikelihood;
+ const tree* _constraintTreePtr;
+};
+
+class iterativeDistanceSeqs2Tree : public distanceBasedSeqs2Tree {
+public:
+ iterativeDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
+ const MDOUBLE epsilonLikelihoodImprovement = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
+ const int maxIterationsBBL = 10);
+ virtual ~iterativeDistanceSeqs2Tree() {}
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0; // iterative
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0;
+ // Start from optimization of branch length and side info for a given initial topology
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0;
+ // Start from calculating side info for a given tree and alpha
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL) = 0;
+ // Does one bootstrap iteration
+ virtual tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ tree getTree() {return _et;}
+
+ // *** handling side info ***
+
+ // Optimize nj tree (optimize alpha, branch lengths, etc.) and produce
+ // side info based on the optimized tree
+ virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et) = 0;
+ // Calculate side info without changing the given tree and alpha
+ // (Optimization should be done in here for side info that includes other optimizable parameters
+ // e.g. ML rates, Nu...)
+ virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha) = 0;
+ // Copy new side info (based on the new tree) to the "current" side info variable, before the next iteration
+ virtual void acceptSideInfo() = 0;
+ // Apply the optimized side info into _optimizedSp
+ virtual void utilizeSideInfo() = 0;
+ virtual void printSideInfo(ostream& out) const = 0;
+ MDOUBLE getAlpha() const { return _alpha; }
+
+
+protected:
+ tree seqs2TreeIterativeInternal(const sequenceContainer &sc, bool initSideInfoGiven=false);
+ tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, const tree &initTree);
+ tree seqs2TreeIterativeInternalInitTreeGiven(const sequenceContainer &sc, bool initSideInfoGiven, const tree &initTree, MDOUBLE initAlpha);
+ void seqs2TreeOneIterationInternal(const sequenceContainer &sc, const bool sideInfoSet);
+
+ MDOUBLE _newTreeLogLikelihood;
+ MDOUBLE _epsilonLikelihoodImprovement;
+ MDOUBLE _epsilonLikelihoodImprovement4alphaOptimiz;
+ MDOUBLE _epsilonLikelihoodImprovement4BBL;
+ int _maxIterationsBBL;
+
+ MDOUBLE _alpha;
+ MDOUBLE _newAlpha;
+
+ stochasticProcess *_spPtr;
+ tree _et, _newTree;
+};
+
+class commonAlphaDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
+public:
+ // Given likeDist is assumed to hold a gamma-distribution stochasticProcess
+ commonAlphaDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
+ const MDOUBLE epsilonLikelihoodImprovement = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
+ const int maxIterationsBBL = 50)
+ : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
+ virtual ~commonAlphaDistanceSeqs2Tree() {}
+
+ // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+ tree seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // Does one bootstrap iteration
+ tree seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, const Vdouble *weights, const tree* constraintTreePtr=NULL);
+ // Explicitly ask for iterations
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration
+ tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+
+ // handling side info
+ virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
+ virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
+ virtual void acceptSideInfo();
+ virtual void utilizeSideInfo();
+ virtual void printSideInfo(ostream& out) const;
+ void setSideInfo(const MDOUBLE alpha);
+ MDOUBLE getSideInfo() const;
+};
+
+class rate4siteDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
+public:
+ rate4siteDistanceSeqs2Tree(givenRatesMLDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
+ const MDOUBLE epsilonLikelihoodImprovement = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
+ const int maxIterationsBBL = 50)
+ : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
+ virtual ~rate4siteDistanceSeqs2Tree() {}
+
+ // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL);
+ // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+ tree seqs2Tree(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL);
+ // Does one bootstrap iteration
+ tree seqs2TreeBootstrap(const sequenceContainer &sc, const Vdouble &rates, const Vdouble *weights, const tree* constraintTreePtr=NULL);
+ // Explicitly ask for iterations
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration
+ tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble &initRates, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+
+ // handling side info
+ virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
+ virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
+ virtual void acceptSideInfo();
+ virtual void utilizeSideInfo();
+ virtual void printSideInfo(ostream& out) const;
+ void setSideInfo(const Vdouble &rates);
+ const Vdouble& getSideInfo() const;
+
+private:
+ Vdouble _rates;
+ Vdouble _newRates;
+};
+
+class posteriorDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
+public:
+ posteriorDistanceSeqs2Tree(posteriorDistance &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
+ const MDOUBLE epsilonLikelihoodImprovement = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4alphaOptimiz = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
+ const int maxIterationsBBL = 50)
+ : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4alphaOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
+ virtual ~posteriorDistanceSeqs2Tree() {}
+
+ // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL);
+ // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+ tree seqs2Tree(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights = NULL, const tree* constraintTreePtr=NULL);
+ // Does one bootstrap iteration
+ tree seqs2TreeBootstrap(const sequenceContainer &sc, const VVdoubleRep &posterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // Explicitly ask for iterations
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration
+ tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const VVdoubleRep &initPosterior, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+
+ // handling side info
+ virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
+ virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
+ virtual void acceptSideInfo();
+ virtual void utilizeSideInfo();
+ virtual void printSideInfo(ostream& out) const;
+ void setSideInfo(const VVdoubleRep &posterior);
+ const VVdoubleRep& getSideInfo() const;
+
+private:
+ VVdoubleRep _posterior;
+ VVdoubleRep _newPosterior;
+};
+
+#endif
diff --git a/libs/phylogeny/distanceMethod.h b/libs/phylogeny/distanceMethod.h
new file mode 100644
index 0000000..d9e9d6a
--- /dev/null
+++ b/libs/phylogeny/distanceMethod.h
@@ -0,0 +1,24 @@
+// $Id: distanceMethod.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___DISTANCE_METHOD
+#define ___DISTANCE_METHOD
+#include "definitions.h"
+#include "sequence.h"
+
+/*********************************************************
+Distance method is a class for computing pairwise distance
+between 2 different sequences
+*******************************************************/
+class distanceMethod {
+public:
+ virtual const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights=NULL,
+ MDOUBLE* score=NULL) const=0;
+ virtual distanceMethod* clone(void) const=0;
+ virtual ~distanceMethod() {}
+};
+
+
+#endif
+
diff --git a/libs/phylogeny/distanceTable.cpp b/libs/phylogeny/distanceTable.cpp
new file mode 100644
index 0000000..3520898
--- /dev/null
+++ b/libs/phylogeny/distanceTable.cpp
@@ -0,0 +1,21 @@
+// $Id: distanceTable.cpp 1740 2007-02-26 13:53:10Z itaymay $
+
+#include "definitions.h"
+#include "distanceTable.h"
+
+void giveDistanceTable(const distanceMethod* dis,
+ const sequenceContainer& sc,
+ VVdouble& res,
+ vector<string>& names,
+ const vector<MDOUBLE> * weights){
+ res.resize(sc.numberOfSeqs());
+ for (int z=0; z< sc.numberOfSeqs();++z) res[z].resize(sc.numberOfSeqs(),0.0);
+
+ for (int i=0; i < sc.numberOfSeqs();++i) {
+ for (int j=i+1; j < sc.numberOfSeqs();++j) {
+ res[i][j] = dis->giveDistance(sc[sc.placeToId(i)],sc[sc.placeToId(j)],weights,NULL);
+ //LOG(5,<<"res["<<i<<"]["<<j<<"] ="<<res[i][j]<<endl);
+ }
+ names.push_back(sc[sc.placeToId(i)].name());
+ }
+}
diff --git a/libs/phylogeny/distanceTable.h b/libs/phylogeny/distanceTable.h
new file mode 100644
index 0000000..5a908d9
--- /dev/null
+++ b/libs/phylogeny/distanceTable.h
@@ -0,0 +1,17 @@
+// $Id: distanceTable.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___DISTANCE_TABLE
+#define ___DISTANCE_TABLE
+
+#include "definitions.h"
+#include "distanceMethod.h"
+#include "sequenceContainer.h"
+
+void giveDistanceTable(const distanceMethod* dis,
+ const sequenceContainer& sc,
+ VVdouble& res,
+ vector<string>& names,
+ const vector<MDOUBLE> * weights = NULL);
+
+
+#endif
diff --git a/libs/phylogeny/distances2Tree.h b/libs/phylogeny/distances2Tree.h
new file mode 100644
index 0000000..6722ed0
--- /dev/null
+++ b/libs/phylogeny/distances2Tree.h
@@ -0,0 +1,18 @@
+// $Id: distances2Tree.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___DISTANCES2TREE
+#define ___DISTANCES2TREE
+
+#include "definitions.h"
+#include "tree.h"
+#include <string>
+using namespace std;
+
+class distances2Tree {
+public:
+ virtual ~distances2Tree() {}
+ virtual distances2Tree* clone() const =0;
+ virtual tree computeTree(VVdouble distances, const vector<string>& names, const tree * const constriantTree = NULL) = 0;
+};
+
+#endif
diff --git a/libs/phylogeny/distribution.cpp b/libs/phylogeny/distribution.cpp
new file mode 100644
index 0000000..ba5ce75
--- /dev/null
+++ b/libs/phylogeny/distribution.cpp
@@ -0,0 +1,13 @@
+// $Id: distribution.cpp 2709 2007-11-19 14:49:21Z itaymay $
+
+#include "distribution.h"
+#include "errorMsg.h"
+
+distribution::~distribution(){}
+// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
+// assignment
+
+void distribution::change_number_of_categories(int in_number_of_categories)
+{
+ errorMsg::reportError("not implemented: distribution::change_number_of_categories()!");
+}
\ No newline at end of file
diff --git a/libs/phylogeny/distribution.h b/libs/phylogeny/distribution.h
new file mode 100644
index 0000000..2578ac3
--- /dev/null
+++ b/libs/phylogeny/distribution.h
@@ -0,0 +1,31 @@
+// $Id: distribution.h 2709 2007-11-19 14:49:21Z itaymay $
+
+// version 2.00
+// last modified 21 Mar 2004
+
+/************************************************************
+ This is a virtual class from which all types of distribution classes inherit from.
+************************************************************/
+
+#ifndef ___DISTRIBUTION
+#define ___DISTRIBUTION
+
+#include "definitions.h"
+
+class distribution {
+public:
+ virtual distribution* clone() const = 0;
+ virtual ~distribution() = 0;
+
+ virtual const int categories() const=0; // @@@@ there is no need to return a const int.
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual const MDOUBLE rates(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE.
+ virtual const MDOUBLE ratesProb(const int i) const=0; // @@@@ there is no need to return a const MDOUBLE.
+ virtual void setGlobalRate(const MDOUBLE x)=0;
+ virtual MDOUBLE getGlobalRate()const=0; // @@@@ there is no need to return a const MDOUBLE.
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const = 0; // @@@@ there is no need to return a const MDOUBLE.
+
+};
+#endif
+
+
diff --git a/libs/phylogeny/distributionPlusCategory.cpp b/libs/phylogeny/distributionPlusCategory.cpp
new file mode 100644
index 0000000..c2df3a6
--- /dev/null
+++ b/libs/phylogeny/distributionPlusCategory.cpp
@@ -0,0 +1,100 @@
+#include "distributionPlusCategory.h"
+
+distributionPlusCategory::distributionPlusCategory(const distribution* pBaseDist, MDOUBLE baseDistProb,MDOUBLE categoryVal,MDOUBLE globalRate)
+:
+_globalRate(globalRate),
+_categoryVal(categoryVal),
+_baseDistProb(baseDistProb)
+{
+ if (pBaseDist!= NULL)
+ _pBaseDist = pBaseDist->clone();
+}
+
+distributionPlusCategory::distributionPlusCategory()
+:
+_globalRate(1.0),
+_pBaseDist(NULL),
+_categoryVal(1.0),
+_baseDistProb(0.0)
+{
+}
+
+distributionPlusCategory::distributionPlusCategory(const distributionPlusCategory& other)
+{
+ (*this) = other;
+}
+
+distributionPlusCategory& distributionPlusCategory::operator=(const distributionPlusCategory &other)
+{
+ if (this != &other)
+ {
+ _globalRate = other._globalRate;
+ if (other._pBaseDist) {
+ _pBaseDist = other._pBaseDist->clone();
+ }
+ else {
+ _pBaseDist = NULL;
+ }
+ _categoryVal = other._categoryVal;
+ _baseDistProb = other._baseDistProb;
+
+ }
+ return *this;
+}
+
+distributionPlusCategory::~distributionPlusCategory()
+{
+ if (_pBaseDist)
+ delete _pBaseDist;
+}
+
+const int distributionPlusCategory::categories() const
+{
+ return _pBaseDist->categories()+1;
+}
+
+
+const MDOUBLE distributionPlusCategory::rates(const int category) const
+{
+ if (category < _pBaseDist->categories())
+ return _pBaseDist->rates(category);
+ else
+ return _categoryVal;
+}
+
+
+const MDOUBLE distributionPlusCategory::ratesProb(const int category) const
+{
+ if (category < _pBaseDist->categories())
+ return _pBaseDist->ratesProb(category) * _baseDistProb;
+ else
+ return (1-_baseDistProb); //category prob
+}
+
+
+//gets cumulative probability till a certain point
+const MDOUBLE distributionPlusCategory::getCumulativeProb(const MDOUBLE x) const
+{
+ MDOUBLE res(0.0);
+ if (x < 0)
+ errorMsg::reportError("x < 0 in distributionPlusCategory::getCumulativeProb()");
+ if (x > _categoryVal - EPSILON)
+ res += 1-_baseDistProb;
+ res += _baseDistProb * _pBaseDist->getCumulativeProb(x);
+ return res;
+}
+
+
+void distributionPlusCategory::change_number_of_categories(int in_number_of_categories)
+{
+ _pBaseDist->change_number_of_categories(in_number_of_categories);
+}
+
+
+void distributionPlusCategory::setBaseDistProb(MDOUBLE baseDistProb)
+{
+ if ((baseDistProb < 0.0) || (baseDistProb>1.0) )
+ errorMsg::reportError("illegal baseDistProb in distributionPlusCategory::setBaseDistProb");
+
+ _baseDistProb = baseDistProb;
+}
\ No newline at end of file
diff --git a/libs/phylogeny/distributionPlusCategory.h b/libs/phylogeny/distributionPlusCategory.h
new file mode 100644
index 0000000..48f4bef
--- /dev/null
+++ b/libs/phylogeny/distributionPlusCategory.h
@@ -0,0 +1,43 @@
+
+#ifndef ___DIST_PLUS_CATEGORY
+#define ___DIST_PLUS_CATEGORY
+
+#include "definitions.h"
+#include "distribution.h"
+#include "logFile.h"
+#include "errorMsg.h"
+
+class distributionPlusCategory : public distribution {
+
+public:
+ explicit distributionPlusCategory(const distribution* pBaseDist, MDOUBLE baseDistProb,MDOUBLE categoryVal,MDOUBLE globalRate=1);
+ explicit distributionPlusCategory();
+ explicit distributionPlusCategory(const distributionPlusCategory& other);
+ virtual ~distributionPlusCategory();
+ virtual distributionPlusCategory& operator=(const distributionPlusCategory &other);
+ virtual distribution* clone() const { return new distributionPlusCategory(*this); }
+
+ distribution* getBaseDistribution() {return _pBaseDist;}
+ virtual const int categories() const;
+ virtual const MDOUBLE rates(const int category) const;
+ virtual const MDOUBLE ratesProb(const int category) const;
+
+ virtual void setGlobalRate(const MDOUBLE x) {_globalRate=x;}
+ virtual MDOUBLE getGlobalRate()const {return _globalRate;}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual void change_number_of_categories(int in_number_of_categories);
+
+ virtual MDOUBLE getCategoryVal() const {return _categoryVal;}
+ virtual MDOUBLE getBaseDistProb() const {return _baseDistProb;}
+ virtual void setCategoryVal(MDOUBLE categoryVal) { _categoryVal = categoryVal;}
+ virtual void setBaseDistProb(MDOUBLE baseDistProb);
+
+protected:
+ MDOUBLE _globalRate;
+ distribution* _pBaseDist;
+ MDOUBLE _categoryVal;
+ MDOUBLE _baseDistProb;
+
+};
+
+#endif // ___DIST_PLUS_CATEGORY
diff --git a/libs/phylogeny/distributionPlusInvariant.cpp b/libs/phylogeny/distributionPlusInvariant.cpp
new file mode 100644
index 0000000..2b7c225
--- /dev/null
+++ b/libs/phylogeny/distributionPlusInvariant.cpp
@@ -0,0 +1,77 @@
+#include "definitions.h"
+#include "distributionPlusInvariant.h"
+#include "errorMsg.h"
+#include "logFile.h"
+
+//#define RATE_INVARIANT 1e-10
+
+
+distributionPlusInvariant::distributionPlusInvariant(
+ distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate, MDOUBLE rateInvariantVal)
+{
+ _globalRate=globalRate;
+ _Pinv = pInv;
+ _rateInvariantVal = rateInvariantVal;
+ _pBaseDist = NULL;
+ if (pDist!= NULL)
+ _pBaseDist = pDist->clone();
+}
+
+distributionPlusInvariant::distributionPlusInvariant()
+{
+ _globalRate=1.0;
+ _Pinv = 0;
+ _rateInvariantVal = 0;
+ _pBaseDist = NULL;
+}
+
+
+distributionPlusInvariant& distributionPlusInvariant::operator=(const distributionPlusInvariant& other)
+{
+ _globalRate = other._globalRate;
+ _Pinv = other._Pinv;
+ _rateInvariantVal = other._rateInvariantVal;
+ _pBaseDist = NULL;
+ if (other._pBaseDist != NULL)
+ _pBaseDist = other._pBaseDist->clone();
+ return *this;
+}
+
+distributionPlusInvariant::~distributionPlusInvariant()
+{
+ if (_pBaseDist != NULL)
+ delete _pBaseDist;
+}
+
+
+//gets cumulative probability till a certain point
+const MDOUBLE distributionPlusInvariant::getCumulativeProb(const MDOUBLE x) const
+{
+ if (x < 0)
+ errorMsg::reportError("x < 0 in distributionPlusInvariant::getCumulativeProb()");
+ return (_Pinv + (1 -_Pinv) * _pBaseDist->getCumulativeProb(x));
+}
+
+
+const MDOUBLE distributionPlusInvariant::ratesProb(const int category) const
+{
+ if (category == categories()-1)
+ return _Pinv;
+ else
+ return (1 - _Pinv) * _pBaseDist->ratesProb(category);
+}
+
+const MDOUBLE distributionPlusInvariant::rates(const int category) const
+{
+ if (category == categories()-1)
+ return _rateInvariantVal; //RATE_INVARIANT
+ else
+ return _pBaseDist->rates(category);
+}
+
+const int distributionPlusInvariant::categories() const
+{
+ return 1 + _pBaseDist->categories();
+}
+
+
diff --git a/libs/phylogeny/distributionPlusInvariant.h b/libs/phylogeny/distributionPlusInvariant.h
new file mode 100644
index 0000000..5201ad2
--- /dev/null
+++ b/libs/phylogeny/distributionPlusInvariant.h
@@ -0,0 +1,41 @@
+#ifndef __DISTPLUSINV
+#define __DISTPLUSINV
+/************************************************************
+This class describes a combination of a predefined dsitrubtion ,
+with an additional invariant category of probability _Pinv
+This category is always the last rate category (i.e., rate(categories()) == 0)
+************************************************************/
+#include "definitions.h"
+#include "distribution.h"
+
+class distributionPlusInvariant : public distribution {
+public:
+ explicit distributionPlusInvariant(
+ distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10);
+ explicit distributionPlusInvariant();
+ distributionPlusInvariant(const distributionPlusInvariant& other): _pBaseDist(NULL){(*this) = other;}
+ virtual distributionPlusInvariant& operator=(const distributionPlusInvariant& other);
+ distributionPlusInvariant* clone() const {return new distributionPlusInvariant(*this);}
+
+ virtual ~distributionPlusInvariant();
+
+ distribution* getBaseDistribution(){return _pBaseDist;}
+ //get/set the parameters of the mixture
+ const int categories() const;
+ void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
+ MDOUBLE getGlobalRate() const {return _globalRate;}
+ virtual void setInvProb(const MDOUBLE p) {_Pinv = p;}
+ const MDOUBLE getInvProb() const {return _Pinv;}
+
+ //get distribution statistics
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual const MDOUBLE rates(const int category) const;
+ virtual const MDOUBLE ratesProb(const int i) const;
+
+protected:
+ MDOUBLE _globalRate;
+ MDOUBLE _Pinv;
+ MDOUBLE _rateInvariantVal;
+ distribution* _pBaseDist;
+};
+#endif
diff --git a/libs/phylogeny/doubleRep.cpp b/libs/phylogeny/doubleRep.cpp
new file mode 100644
index 0000000..219ab97
--- /dev/null
+++ b/libs/phylogeny/doubleRep.cpp
@@ -0,0 +1,73 @@
+#ifdef DOUBLEREP
+#include "doubleRep.h"
+#include <cmath>
+
+
+
+doubleRepMantisa::doubleRepMantisa(MDOUBLE mantissa, int expon){
+ _mantissa=mantissa;
+ _expon=expon;
+ fixParams();
+}
+
+
+doubleRepMantisa::doubleRepMantisa(MDOUBLE a){
+ int answerExp;
+ MDOUBLE answerMantissa=frexp(a,&answerExp);
+ _mantissa=answerMantissa;
+ _expon=answerExp;
+}
+
+
+
+doubleRepMantisa::doubleRepMantisa(const doubleRepMantisa& other): _mantissa(other._mantissa), _expon(other._expon) {
+}
+
+
+//make sure 0.5<=mantissa<1, as a matter of convention
+void doubleRepMantisa::fixParams(){
+ while (_mantissa>=1){
+ _expon++;
+ _mantissa/=2.0;
+ }
+ while ((_mantissa<0.5) && (_mantissa>0)){
+ _expon--;
+ _mantissa*=2.0;
+ }
+ while (_mantissa<=-1){
+ _expon++;
+ _mantissa/=2.0;
+ }
+ while ((_mantissa>-0.5) && (_mantissa<0)){
+ _expon--;
+ _mantissa*=2.0;
+ }
+}
+
+MDOUBLE convert(const doubleRepMantisa& a){
+ MDOUBLE aFullRep= ldexp(a._mantissa,a._expon);
+ return aFullRep;
+}
+
+//switches from base 2 to base e
+const MDOUBLE doubleRepMantisa::d_log() const{
+ static const MDOUBLE log2(log(2.0));
+ return log(_mantissa)+log2*_expon;
+}
+
+
+ostream& operator<<(ostream &out, const doubleRepMantisa& a){
+ a.output(out);
+ // a.output0x(out);
+// out<<a._mantissa<<string(" * 2^")<<a._expon;
+// out<<a._mantissa<<" * 2^"<<a._expon;
+ return out;
+}
+
+istream& operator>>(istream &in, doubleRepMantisa& a) {
+ MDOUBLE num;
+ in >> num;
+ a = num;
+ return in;
+}
+#endif
diff --git a/libs/phylogeny/doubleRep.h b/libs/phylogeny/doubleRep.h
new file mode 100644
index 0000000..2920977
--- /dev/null
+++ b/libs/phylogeny/doubleRep.h
@@ -0,0 +1,316 @@
+#ifndef __DOUBLE_REP_H
+#define __DOUBLE_REP_H
+
+#ifdef DOUBLEREP
+#include "definitions.h"
+
+#include <iostream>
+#include <cmath>
+using namespace std;
+
+/* doubleRepMantisa: enables working with much larger or smaller numbers than normally possible
+by the regular double representation
+ * Representation of a double x as x=_mantissa*2^_expon
+ Note: Base is 2!!
+ */
+
+class doubleRepMantisa{
+public:
+
+ doubleRepMantisa(){};
+ explicit doubleRepMantisa(MDOUBLE mantissa, int expon);
+ doubleRepMantisa(MDOUBLE a);
+ doubleRepMantisa(const doubleRepMantisa& other);
+ doubleRepMantisa* clone() {return new doubleRepMantisa(*this);}
+
+ void output(ostream &out) const{ out<<_mantissa<<string(" * 2^")<<_expon;}
+ // void output0x(ostream &out) const{ double e0x=_expon*0.3010299956639; // log_10(2)
+ // int e=(int)(trunc(e0x))-1;
+ // double m=_mantissa*pow(10,e0x-e);
+ // out<<m;
+ // if (e<0)
+ // out<<"e"<<e;
+ // else
+ // out<<"e+"<<e;
+ //}
+ void outputn(ostream &out) { out<<_mantissa<<string(" * 2^")<<_expon<<endl;}
+
+ friend MDOUBLE convert(const doubleRepMantisa& a);
+ inline doubleRepMantisa& operator=(const doubleRepMantisa& a);
+ inline doubleRepMantisa& operator+=(doubleRepMantisa a);
+ inline doubleRepMantisa& operator++();
+ inline doubleRepMantisa operator++(int);
+ inline doubleRepMantisa& operator--();
+ inline doubleRepMantisa operator--(int);
+ friend inline doubleRepMantisa operator+(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ inline doubleRepMantisa& operator-=(const doubleRepMantisa& a);
+ friend inline doubleRepMantisa operator-(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ inline doubleRepMantisa& operator*=(const doubleRepMantisa& a);
+ friend inline doubleRepMantisa operator*(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ inline doubleRepMantisa& operator/=(const doubleRepMantisa& a);
+ friend inline doubleRepMantisa operator/(const doubleRepMantisa& a, const doubleRepMantisa& b);
+
+ friend inline bool operator==(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline bool operator!=(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline bool operator<(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline bool operator<=(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline bool operator>(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b);
+ friend inline doubleRepMantisa abs(const doubleRepMantisa& d);
+
+
+ const MDOUBLE d_log() const;
+// friend ostream& operator<<(ostream &out, const doubleRepMantisa& a);
+
+ const MDOUBLE mantissa() const {return _mantissa;}
+ const int expon() const {return _expon;}
+
+private:
+ void fixParams();
+
+
+private:
+ MDOUBLE _mantissa;
+ int _expon;
+};
+
+inline doubleRepMantisa& doubleRepMantisa::operator=(const doubleRepMantisa& a){
+ _mantissa=a.mantissa();
+ _expon=a.expon();
+ return *this;
+}
+
+
+inline doubleRepMantisa& doubleRepMantisa::operator++() {
+ return (*this)+=1;
+}
+
+// matan:
+inline doubleRepMantisa doubleRepMantisa::operator++(int) {
+ doubleRepMantisa ans = *this;
+ ++(*this);
+ return ans;
+}
+
+// matan:
+inline doubleRepMantisa& doubleRepMantisa::operator--() {
+ return (*this)-=1;
+}
+
+// matan:
+inline doubleRepMantisa doubleRepMantisa::operator--(int) {
+ doubleRepMantisa ans = *this;
+ --(*this);
+ return ans;
+}
+
+
+// Original version by Adi Stern
+inline doubleRepMantisa& doubleRepMantisa::operator+=(doubleRepMantisa a){
+ //ensuring that (*this) is bigger than 'a' for sake of convenience
+ if (a.expon()>_expon || ((a.expon()==_expon) && (a.mantissa()>_mantissa))){
+ MDOUBLE tmpMant=0.0; int tmpExp=0;
+ tmpMant=_mantissa;
+ tmpExp=_expon;
+ _mantissa=a.mantissa();
+ a._mantissa=tmpMant;
+ tmpExp=_expon;
+ _expon=a.expon();
+ a._expon=tmpExp;
+ }
+ if (a.mantissa()==0)
+ return *this;
+ if (_mantissa==0){
+ _mantissa=a.mantissa();
+ _expon=a.expon();
+ return *this;
+ }
+ if (abs(_expon-a.expon())>51){ //limit of epsilon difference
+ return *this;
+ }
+ _mantissa+=a.mantissa()*pow(2.0,(a.expon()-_expon)*1.0);
+ fixParams();
+ return *this;
+}
+
+inline doubleRepMantisa operator+(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ doubleRepMantisa temp(a);
+ temp+=b;
+ return temp;
+}
+
+inline doubleRepMantisa& doubleRepMantisa::operator-=(const doubleRepMantisa& a){
+ doubleRepMantisa b(-a.mantissa(),a.expon());
+ doubleRepMantisa me(_mantissa,_expon);
+ me+=b;
+ _mantissa=me.mantissa();
+ _expon=me.expon();
+ return *this;
+}
+
+inline doubleRepMantisa operator-(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ doubleRepMantisa temp(a);
+ temp-=b;
+ return temp;
+}
+
+inline doubleRepMantisa operator-(const doubleRepMantisa& a) {
+ return doubleRepMantisa(0) - a;
+}
+
+inline doubleRepMantisa& doubleRepMantisa::operator*=(const doubleRepMantisa& a){
+ _mantissa*=a.mantissa();
+ _expon+=a.expon();
+ fixParams();
+ return *this;
+}
+
+inline doubleRepMantisa operator*(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ doubleRepMantisa temp(a);
+ temp*=b;
+ return temp;
+}
+
+inline doubleRepMantisa& doubleRepMantisa::operator/=(const doubleRepMantisa& a){
+ _mantissa/=a.mantissa();
+ _expon-=a.expon();
+ fixParams();
+ return *this;
+}
+
+inline doubleRepMantisa operator/(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ doubleRepMantisa temp(a);
+ temp/=b;
+ return temp;
+}
+
+/************************
+ * Comparison operators *
+ ************************/
+inline bool operator==(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ return (a._mantissa==b._mantissa && a._expon==b._expon);
+}
+inline bool operator!=(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ return !(a==b);
+}
+
+inline bool operator<(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ // if the numbers have opposite signs
+ if (a._mantissa*b._mantissa<0.0){
+ if (a._mantissa<b._mantissa) {return true;}
+ else {return false;}
+ }
+ // if the expon values are different
+ if (a._expon!=b._expon) {
+ // special case where one number is zero
+ if (a._mantissa == 0.0) {
+ if (b._mantissa > 0.0) {return true;}
+ else {return false;}
+ }
+ if (b._mantissa == 0.0) {
+ if (a._mantissa < 0.0) {return true;}
+ else {return false;}
+ }
+
+ if (a._expon<b._expon) {
+ if (a._mantissa > 0.0) {return true;}
+ else {return false;}
+ } else {
+ if (a._mantissa < 0.0) {return true;}
+ else {return false;}
+ }
+ // expon values are identical
+ } else {
+ return (a._mantissa < b._mantissa);
+ }
+}
+
+inline bool operator>(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ // if the numbers have opposite signs
+ if (a._mantissa*b._mantissa<0.0){
+ if (a._mantissa>b._mantissa) {return true;}
+ else {return false;}
+ }
+ // if the expon values are different
+ if (a._expon!=b._expon) {
+ // special case where one number is zero
+ if (a._mantissa == 0.0) {
+ if (b._mantissa < 0.0) {return true;}
+ else {return false;}
+ }
+ if (b._mantissa == 0.0) {
+ if (a._mantissa > 0.0) {return true;}
+ else {return false;}
+ }
+
+ if (a._expon>b._expon) {
+ if (a._mantissa > 0.0) {return true;}
+ else {return false;}
+ } else {
+ if (a._mantissa < 0.0) {return true;}
+ else {return false;}
+ }
+ // expon values are identical
+ } else {
+ return (a._mantissa > b._mantissa);
+ }
+}
+
+inline bool operator<=(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ return !(a>b);
+}
+
+inline bool operator>=(const doubleRepMantisa& a, const doubleRepMantisa& b){
+ return !(a<b);
+}
+
+
+
+
+ostream& operator<<(ostream &out, const doubleRepMantisa& a);
+istream& operator>>(istream &in, doubleRepMantisa& a);
+
+inline MDOUBLE log(const doubleRepMantisa& d) {return d.d_log();}
+
+inline ostream &operator<<(ostream &out, const VdoubleRepMantisa &v){
+ for (int j=0;j<v.size();++j)
+ out<< v[j]<<" ";
+ out <<endl;
+ return(out);
+}
+
+inline ostream &operator<<(ostream &out, const VVdoubleRepMantisa &m){
+ for (int i=0;i<m.size();++i)
+ out<<m[i];
+ out <<endl;
+ return(out);
+}
+
+inline doubleRepMantisa pow(const doubleRepMantisa& d1, const doubleRepMantisa& d2) {
+ return doubleRepMantisa(pow(convert(d1), convert(d2)));
+}
+
+inline doubleRepMantisa abs(const doubleRepMantisa& d) {
+ return doubleRepMantisa(abs(d._mantissa), d._expon);
+}
+
+inline doubleRepMantisa fabs(const doubleRepMantisa& d) {
+ return abs(d);
+}
+
+inline doubleRepMantisa exp(const doubleRepMantisa& d) {
+ return doubleRepMantisa(exp(convert(d)));
+}
+
+inline doubleRepMantisa sqrt(const doubleRepMantisa& d) {
+ return doubleRepMantisa(sqrt(convert(d)));
+}
+
+
+
+
+
+//inline const MDOUBLE convert (const MDOUBLE d) const {return(d);}
+
+#endif
+#endif
diff --git a/libs/phylogeny/errorMsg.cpp b/libs/phylogeny/errorMsg.cpp
new file mode 100644
index 0000000..e8a67b1
--- /dev/null
+++ b/libs/phylogeny/errorMsg.cpp
@@ -0,0 +1,45 @@
+// $Id: errorMsg.cpp 6066 2009-04-14 19:11:10Z itaymay $
+
+// version 1.01
+// last modified 1 Jan 2004
+#include "definitions.h"
+#include <cassert>
+#include "errorMsg.h"
+#include "logFile.h"
+#include <errno.h>
+#include <string.h> //for strerror
+#include <stdlib.h> //for exit()
+
+ostream *errorMsg::_errorOut= NULL;
+
+void errorMsg::reportError(const vector<string>& textToPrint, const int exitCode) {
+ for (int i =0 ; i < textToPrint.size() ; ++i) {
+ LOG(1,<<textToPrint[i]<<endl);
+ cerr<<textToPrint[i]<<endl;
+ if (_errorOut != NULL && *_errorOut != cerr) {
+ (*_errorOut)<<textToPrint[i]<<endl;
+ }
+ }
+ if (errno!=0){
+ LOG(1,<<"System Error: "<<strerror(errno)<<endl);
+ cerr<<"System Error: "<<strerror(errno)<<endl;
+ }
+ assert(0); // always stop here if in DEBUG mode.
+ exit(exitCode);
+}
+
+void errorMsg::reportError(const string& textToPrint, const int exitCode) {
+ LOG(1,<<endl<<textToPrint<<endl);
+ cerr<<endl<<textToPrint<<endl;
+ if (_errorOut != NULL && *_errorOut != cerr) {
+ (*_errorOut)<<textToPrint<<endl;
+ }
+ if (errno!=0){
+ LOG(1,<<"System Error: "<<strerror(errno)<<endl);
+ cerr<<"System Error: "<<strerror(errno)<<endl;
+ }
+ assert(0); // always stop here if in DEBUG mode.
+ exit(exitCode);
+}
+
+
diff --git a/libs/phylogeny/errorMsg.h b/libs/phylogeny/errorMsg.h
new file mode 100644
index 0000000..28bea0d
--- /dev/null
+++ b/libs/phylogeny/errorMsg.h
@@ -0,0 +1,33 @@
+// $Id: errorMsg.h 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.01
+// last modified 1 Jan 2004
+
+#ifndef ___ERROR_MSG_H
+#define ___ERROR_MSG_H
+
+#include <string>
+#include <vector>
+#include <iostream>
+
+using namespace std;
+
+// The error is always send to cerr. _errorOut is NULL, unless setErrorOstream is called.
+
+
+class errorMsg {
+public:
+ static void reportError(const vector<string>& textToPrint, const int exitCode=1);
+ static void reportError(const string& textToPrint, const int exitCode=1);
+ static void setErrorOstream(ostream* errorOut) {_errorOut = errorOut;}
+private:
+ static ostream* _errorOut;
+};
+
+// example of how to output to a file called error.txt
+// ofstream f("error.txt");
+// errorMsg::setErrorOstream(&f);
+// errorMsg::reportError("cheers");
+
+#endif
+
diff --git a/libs/phylogeny/evaluateCharacterFreq.cpp b/libs/phylogeny/evaluateCharacterFreq.cpp
new file mode 100644
index 0000000..f9a5832
--- /dev/null
+++ b/libs/phylogeny/evaluateCharacterFreq.cpp
@@ -0,0 +1,151 @@
+// $Id: evaluateCharacterFreq.cpp 3895 2008-04-21 07:38:32Z itaymay $
+
+#include "evaluateCharacterFreq.h"
+#include "someUtil.h"
+#include <cassert>
+
+vector<MDOUBLE> sumAlphabetCounts(const sequenceContainer & sc) {
+ vector<MDOUBLE> charFreq(sc.alphabetSize(),0.0);
+ sequenceContainer::constTaxaIterator tIt;
+ sequenceContainer::constTaxaIterator tItEnd;
+ tIt.begin(sc);
+ tItEnd.end(sc);
+ while (tIt!= tItEnd) {
+ sequence::constIterator sIt;
+ sequence::constIterator sItEnd;
+ sIt.begin(*tIt);
+ sItEnd.end(*tIt);
+ while (sIt != sItEnd) {
+ if ((*sIt >= 0) && (*sIt <charFreq.size())) ++charFreq[(*sIt)];
+ ++sIt;
+ }
+ ++tIt;
+ }
+ return charFreq;
+}
+
+void changeCountsToFreqs(vector<MDOUBLE>& charFreq){
+ MDOUBLE sumA = 0;
+ int i=0;
+ for (i=0; i < charFreq.size(); ++i) {
+ sumA+=charFreq[i] ;
+ }
+ for (i=0; i < charFreq.size(); ++i) {
+ charFreq[i] /= sumA;
+ }
+}
+
+void makeSureNoZeroFreqs(vector<MDOUBLE> & charFreq){
+ // CORRECT SO THAT THERE ARE NO ZERO FREQUENCIES.
+ // ALL FREQS THAT WERE ZERO ARE CHANGED
+ MDOUBLE ZERO_FREQ = 0.00001;
+ MDOUBLE sumB=0;
+ int charWithZeroFreq = 0;
+ int i=0;
+ for (i=0; i < charFreq.size(); ++i) {
+ if (DEQUAL(charFreq[i], 0.0)) {
+ charFreq[i] = ZERO_FREQ;
+ ++charWithZeroFreq;
+ }
+ else sumB +=charFreq[i];
+ }
+ if (!DEQUAL(sumB, 1.0))
+ {
+ cerr<<"sumFreq = "<<sumB<<endl;
+ errorMsg::reportError("error in makeSureNoZeroFreqs(). Input frequencies must sum to 1.0");
+ }
+ MDOUBLE scaleFactor = sumB - (charWithZeroFreq * ZERO_FREQ);
+ for (i=0; i < charFreq.size(); ++i) {
+ if (charFreq[i] != ZERO_FREQ)
+ charFreq[i] *= scaleFactor;
+ }
+}
+
+
+vector<MDOUBLE> evaluateCharacterFreq(const sequenceContainer & sc) {
+ vector<MDOUBLE> charFreq=sumAlphabetCounts(sc);
+ changeCountsToFreqs(charFreq);
+ makeSureNoZeroFreqs(charFreq);
+ return charFreq;
+}
+
+VVdouble evaluateCharacterFreqOneForEachGene(const vector<sequenceContainer> & scVec){
+ VVdouble charFreq;
+ for (int k=0; k < scVec.size(); ++k) {
+ charFreq.push_back(evaluateCharacterFreq(scVec[k]));
+ }
+ return charFreq;
+}
+
+
+
+
+vector<MDOUBLE> evaluateCharacterFreqBasedOnManyGenes(const vector<sequenceContainer> & scVec) {
+ // note: all alphabets have to be the same!
+ vector<MDOUBLE> charFreq(scVec[0].alphabetSize(),0.0);
+ for (int i=0; i < scVec.size();++i) {
+ assert(scVec[0].getAlphabet()->size()==scVec[i].getAlphabet()->size());
+ vector<MDOUBLE> charFreqTmp=sumAlphabetCounts(scVec[i]);
+ for (int z=0; z < charFreq.size();++z) charFreq[z]+=charFreqTmp[z];
+ }
+ changeCountsToFreqs(charFreq);
+ makeSureNoZeroFreqs(charFreq);
+ return charFreq;
+}
+
+//returns the number of each character in each position.
+//NOTE: returns also the number of unknown charecters in the last place in each vector, so that the actual vector size for each position is alphabetSize()+1
+void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos)
+{
+ const alphabet* pAlph = sc.getAlphabet();
+ int alphSize = sc.alphabetSize();
+ int pos;
+ counts4pos.resize(sc.seqLen());
+ for (pos = 0; pos < sc.seqLen(); ++pos)
+ counts4pos[pos].resize(alphSize + 1, 0);
+
+ for (int seq = 0; seq < sc.numberOfSeqs();++seq)
+ {
+ int id = sc.placeToId(seq);
+ for (pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ int charType = sc[id][pos];
+ if (pAlph->isSpecific(charType))
+ {
+ ++counts4pos[pos][charType];
+ }
+ else
+ ++counts4pos[pos][alphSize];
+ }
+ }
+}
+
+//returns the number of different character types in each position
+void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos)
+{
+ VVint counts4Pos;
+ getCharacterCounts(sc, counts4Pos);
+ charactersType4pos.resize(sc.seqLen(), 0);
+ for (int pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ for (int c = 0; c < counts4Pos[pos].size()-1; ++c)
+ {
+ if (counts4Pos[pos][c] > 0)
+ ++charactersType4pos[pos];
+ }
+ }
+}
+
+//returns the distribution of the different character types in each position along the whole alignment
+void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist)
+{
+ Vint charactersType4pos;
+ getCharacterType4pos(sc, charactersType4pos);
+ charactersTypeDist.resize(sc.numberOfSeqs()+1, 0);
+ for (int pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ int count = charactersType4pos[pos];
+ ++charactersTypeDist[count];
+ }
+
+}
diff --git a/libs/phylogeny/evaluateCharacterFreq.h b/libs/phylogeny/evaluateCharacterFreq.h
new file mode 100644
index 0000000..86a1224
--- /dev/null
+++ b/libs/phylogeny/evaluateCharacterFreq.h
@@ -0,0 +1,26 @@
+// $Id: evaluateCharacterFreq.h 3895 2008-04-21 07:38:32Z itaymay $
+
+#ifndef __Evaluate_Character_Freq_h
+#define __Evaluate_Character_Freq_h
+
+#include <iostream>
+using namespace std;
+
+#include "sequenceContainer.h"
+#include "definitions.h"
+
+vector<MDOUBLE> sumAlphabetCounts(const sequenceContainer & sc);
+vector<MDOUBLE> evaluateCharacterFreq(const sequenceContainer & sc);
+VVdouble evaluateCharacterFreqOneForEachGene(const vector<sequenceContainer> & scVec);
+vector<MDOUBLE> evaluateCharacterFreqBasedOnManyGenes(const vector<sequenceContainer> & scVec);
+
+void changeCountsToFreqs(vector<MDOUBLE>& charFreq);
+void makeSureNoZeroFreqs(vector<MDOUBLE> & charFreq);
+
+//returns the number of each character in each position
+void getCharacterCounts(const sequenceContainer & sc, VVint& counts4pos);
+//returns the number of different character types in each position
+void getCharacterType4pos(const sequenceContainer & sc, Vint& charactersType4pos);
+//returns the distribution of the different character types in each position along the whole alignment
+void getCharacterTypeDistribution(const sequenceContainer & sc, Vint& charactersTypeDist);
+#endif
diff --git a/libs/phylogeny/fastStartTree.cpp b/libs/phylogeny/fastStartTree.cpp
new file mode 100644
index 0000000..13c2fce
--- /dev/null
+++ b/libs/phylogeny/fastStartTree.cpp
@@ -0,0 +1,145 @@
+// $Id: fastStartTree.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "tree.h"
+#include "treeUtil.h"
+#include "fastStartTree.h"
+#include "bblEM.h"
+#include "likeDist.h"
+#include "likelihoodComputation.h"
+#include "getRandomWeights.h"
+#include "distanceTable.h"
+#include "nj.h"
+#include "logFile.h"
+
+#include <algorithm>
+
+using namespace std;
+using namespace likelihoodComputation;
+
+
+vector<tree> eliminateHalf(vector<tree>& tVec,
+ sequenceContainer& orginal,
+ stochasticProcess& sp,
+ ostream& out,
+ const int maxIterEM){
+ vector<MDOUBLE> likeScore(tVec.size(),0.0);
+ int i;
+ for (i=0; i < tVec.size(); ++i) {
+ bblEM bblEM1(tVec[i],orginal,sp,NULL,maxIterEM,0.01);
+ likeScore[i] = bblEM1.getTreeLikelihood();
+
+ LOG(5,<<"~");
+ }
+
+ vector<MDOUBLE> sortedL = likeScore;
+ sort(sortedL.begin(),sortedL.end());
+ MDOUBLE median = sortedL[sortedL.size()/2];
+
+ // printing the top ten with their scores;
+// int toPrint = sortedL.size()>10? 10 : sortedL.size();
+// MDOUBLE treshToPrint = sortedL[sortedL.size()-toPrint];
+// out<<"current best 10 (or less) trees: "<<endl;
+// for (int h=0; h < likeScore.size(); ++h) {
+// if (likeScore[h]>treshToPrint) {
+// out<<"likelihood of tree: "<<h<<" = "<<likeScore[h]<<endl;
+// tVec[h].output(out);
+// }
+// }
+
+ for (int p=0; p < sortedL.size(); ++p ){
+ out<<"L["<<p<<"]= "<<sortedL[p]<<endl;
+ }
+ out<<endl;
+
+ vector<tree> newTreeVec;
+ for (i=0;i < tVec.size(); ++i) {
+ if (likeScore[i]>=median) newTreeVec.push_back(tVec[i]); // ok this is a heck to mark trees
+ }
+ if (newTreeVec.size() == 0 ) newTreeVec.push_back(tVec[0]); // in case for example that all have the same L
+ return newTreeVec;
+}
+
+
+
+
+
+
+
+
+
+
+//------------------ get N starting different NJ trees --------------------
+
+tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether,
+ stochasticProcess& sp,
+ const int numOfNJtrees,
+ const MDOUBLE tmpForStartingTreeSearch,
+ const MDOUBLE epslionWeights,
+ ostream& out) {
+
+
+ likeDist pd1(sp,0.01);
+ vector<tree> tVec;
+ int treeTries = 0;
+ while (tVec.size() < numOfNJtrees) {
+ ++treeTries;
+ if (treeTries == 5000) break;
+
+ Vdouble startingTreeWeights(allTogether.seqLen(),1.0);
+ if (treeTries>1) {// the first is the regular NJ tree
+ getRandomWeights::randomWeightsGamma(startingTreeWeights,
+ tmpForStartingTreeSearch);
+ }
+ for (int p=0; p < startingTreeWeights.size(); ++p){
+ if (startingTreeWeights[p]<epslionWeights) startingTreeWeights[p]=0.0;
+ }
+ #ifdef VERBOS
+ if (treeTries ==2){ LOG(5,<<" weights for the 25 positions"<<endl);
+ for (int h=0; h < 25; ++h) LOG(5,<<startingTreeWeights[h]<<" ");
+ }
+ #endif
+ VVdouble disTab;
+ vector<string> vNames;
+ giveDistanceTable(&pd1,
+ allTogether,
+ disTab,
+ vNames,
+ &startingTreeWeights);
+ NJalg nj1;
+ tree et = nj1.computeTree(disTab,vNames);
+
+ bool treeAlreadyThere = false;
+ for (int z=0; z< tVec.size();++z) {
+ if (sameTreeTolopogy(tVec[z],et)) treeAlreadyThere=true;
+ }
+ if (treeAlreadyThere == false) {
+ tVec.push_back(et);
+ }
+ }
+ LOG(5,<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl);
+ out<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl;
+
+ int numOfTreesToPrint = tVec.size()<10?tVec.size():10;
+ out<<"starting with: "<<tVec.size()<<" trees! "<<endl;
+ for (int g=0; g < numOfTreesToPrint; ++g) tVec[g].output(out);
+
+//------------------ chossing the ML tree from these NJ trees --------------------
+ int maxIterEM=0;
+ while (tVec.size() > 1) {
+ LOG(5,<<" current size = "<<tVec.size()<<endl);
+ tVec = eliminateHalf(tVec,allTogether,sp,out,maxIterEM);
+ maxIterEM=1; // first round without bbl at all.
+ }
+ LOG(5,<<" final size = "<<tVec.size()<<endl);
+
+ bblEM bblEM1(tVec[0],allTogether,sp,NULL,100,0.01);
+ MDOUBLE res = bblEM1.getTreeLikelihood();
+
+
+ LOGDO(5,tVec[0].output(myLog::LogFile()));
+ LOG(5,<<"likelihood = "<<res<<endl);
+ tVec[0].output(out);
+ out<<"likelihood = "<<res<<endl;
+ return tVec[0];
+}
diff --git a/libs/phylogeny/fastStartTree.h b/libs/phylogeny/fastStartTree.h
new file mode 100644
index 0000000..856cb8c
--- /dev/null
+++ b/libs/phylogeny/fastStartTree.h
@@ -0,0 +1,24 @@
+// $Id: fastStartTree.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___FAST_START_TREE
+#define ___FAST_START_TREE
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include <iostream>
+
+using namespace std;
+
+
+
+tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether,
+ stochasticProcess& sp,
+ const int numOfNJtrees,
+ const MDOUBLE tmpForStartingTreeSearch,
+ const MDOUBLE epslionWeights,
+ ostream& out);
+
+
+#endif
diff --git a/libs/phylogeny/fastaFormat.cpp b/libs/phylogeny/fastaFormat.cpp
new file mode 100644
index 0000000..fd4fc86
--- /dev/null
+++ b/libs/phylogeny/fastaFormat.cpp
@@ -0,0 +1,75 @@
+// $Id: fastaFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "fastaFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+#include <algorithm>
+using namespace std;
+
+sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+
+
+sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData;
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+ if (seqFileData.empty()){
+ errorMsg::reportError("unable to open file, or file is empty in fasta format");
+ }
+
+ vector<string>::const_iterator it1;
+ int localid=0;
+ for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
+ if (it1->empty()) {++it1;continue; }// empty line continue
+
+ string remark;
+ string name;
+
+ if ((*it1)[0] == '>') {
+ string::const_iterator itstrtmp = (*it1).begin();
+ itstrtmp++;
+ while (itstrtmp != (*it1).end()) {
+ name+= *itstrtmp;
+ itstrtmp++;
+ }
+
+ //for (string::iterator i = name.begin(); i!=(name.end()-2);++i) {
+ // *i=*(i+1); // removing the ">". should be done more elegant...
+ //}
+ ++it1;
+ } else {
+ LOG(0,<<"problem in line: "<<*it1<<endl);
+ errorMsg::reportError("Error reading fasta file, error finding sequence name starting with >",1);
+ }
+ while (it1->empty()) it1++; // empty line continue
+
+ string str;
+ while (it1!= seqFileData.end()) {
+ if ((*it1)[0] == '>') break;
+ str+=*it1;
+ ++it1;
+ }
+ // remove spaces form str;
+ str.erase(
+ std::remove(str.begin(),str.end(),' '),str.end()
+ );
+
+ mySeqData.add(sequence(str,name,remark,localid,alph));
+ localid++;
+ }
+
+ return mySeqData;
+}
+
+
+void fastaFormat::write(ostream &out, const sequenceContainer& sd) {
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+ out<<">"<<(it5)->name()<<endl;
+ out<<it5->toString()<<endl;
+ }
+}
+
diff --git a/libs/phylogeny/fastaFormat.h b/libs/phylogeny/fastaFormat.h
new file mode 100644
index 0000000..c7ddf21
--- /dev/null
+++ b/libs/phylogeny/fastaFormat.h
@@ -0,0 +1,35 @@
+// $Id: fastaFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___FASTA_FORMAT
+#define ___FASTA_FORMAT
+
+#include "sequenceContainer.h"
+
+class fastaFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+};
+
+#endif
+
+/* EXAMPLE OF FASTA FORMAT:
+>Langur
+KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
+>Baboon
+KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
+>Human
+KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
+>Rat
+KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
+>Cow
+KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
+>Horse
+KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
+
+
+*/
+
+
diff --git a/libs/phylogeny/findRateOfGene.cpp b/libs/phylogeny/findRateOfGene.cpp
new file mode 100644
index 0000000..59f4c6a
--- /dev/null
+++ b/libs/phylogeny/findRateOfGene.cpp
@@ -0,0 +1,81 @@
+// $Id: findRateOfGene.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "findRateOfGene.h"
+#include "computeUpAlg.h"
+
+//#define VERBOS
+
+class findRateOfGene{
+public:
+ explicit findRateOfGene(const tree &t,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights): _t(t), _sc(sc),
+ _sp(sp),_weights(weights){};
+private:
+ const tree& _t;
+ const sequenceContainer& _sc;
+ stochasticProcess& _sp;
+ const Vdouble * _weights;
+public:
+ MDOUBLE operator() (const MDOUBLE fac) {
+#ifdef VERBOS
+ LOG(5,<<"factor = "<<fac<<endl);
+#endif
+ _sp.setGlobalRate(fac);
+ MDOUBLE tmp = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_t,_sc,_sp,_weights);
+#ifdef VERBOS
+ LOG(5,<<"likelihood = "<<tmp<<endl);
+#endif
+ return -tmp;
+ }
+};
+
+MDOUBLE findTheBestFactorFor(const tree &t,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ MDOUBLE & logLresults) {
+#ifdef VERBOS
+ LOG(5,<<"xxx in funtion findTheNestFactorFor xxxxxxxxx"<<endl);
+ LOG(5,<<"xxx b4 optimization xxxxxxxxx"<<endl);
+ MDOUBLE myL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(t,sc,sp);
+ LOG(5,<<" likelihod is: "<<myL<<endl);
+ LOG(5,<<" global rate is: "<<sp.getGlobalRate()<<endl);
+ LOG(5,<<"\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \n");
+#endif
+
+ const MDOUBLE ax=0,bx=1.0,cx=4.0,tol=0.01f;
+ MDOUBLE res=-1.0;
+ logLresults =-brent(ax,bx,cx,
+ findRateOfGene(t,sc,sp,weights),
+ tol,
+ &res);
+#ifdef VERBOS
+ LOG(5,<<"rate of gene = "<<res<<endl);
+ LOG(5,<<"xxx in funtion findTheNestFactorFor xxxxxxxxx"<<endl);
+ LOG(5,<<"xxx after optimization xxxxxxxxx"<<endl);
+ myL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(t,sc,sp);
+ LOG(5,<<" likelihod is: "<<myL<<"\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \n");
+#endif
+ sp.setGlobalRate(res);
+ return res;}
+
+void makeAverageRateEqOne(tree& et,vector<stochasticProcess> & spVec){
+ MDOUBLE sumGlobalRates=0.0;
+ for (int k=0; k < spVec.size(); ++k) {
+ sumGlobalRates+=spVec[k].getGlobalRate();
+ }
+ for (int j=0; j < spVec.size(); ++j) {
+ MDOUBLE newGlobalRate = spVec[j].getGlobalRate();
+ newGlobalRate*=(spVec.size()/sumGlobalRates);
+ spVec[j].setGlobalRate(newGlobalRate);
+
+ }
+ et.multipleAllBranchesByFactor(sumGlobalRates/spVec.size());
+}
+
+
+
+
diff --git a/libs/phylogeny/findRateOfGene.h b/libs/phylogeny/findRateOfGene.h
new file mode 100644
index 0000000..6118d11
--- /dev/null
+++ b/libs/phylogeny/findRateOfGene.h
@@ -0,0 +1,24 @@
+// $Id: findRateOfGene.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ____FIND_RATE_OF_GENE
+#define ____FIND_RATE_OF_GENE
+
+
+#include "numRec.h"
+#include "errorMsg.h"
+#include "likelihoodComputation.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "suffStatComponent.h"
+#include "definitions.h"
+
+MDOUBLE findTheBestFactorFor(const tree &t,
+ const sequenceContainer& sc,
+ stochasticProcess& sp,
+ const Vdouble * weights,
+ MDOUBLE & logLresults);
+
+void makeAverageRateEqOne(tree& et,vector<stochasticProcess> & spVec);
+
+#endif
diff --git a/libs/phylogeny/fromCountTableComponentToDistance.cpp b/libs/phylogeny/fromCountTableComponentToDistance.cpp
new file mode 100644
index 0000000..fbe3450
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance.cpp
@@ -0,0 +1,23 @@
+// $Id: fromCountTableComponentToDistance.cpp 4742 2008-08-19 17:40:56Z cohenofi $
+
+#include "fromCountTableComponentToDistance.h"
+#include "likeDist.h"
+#include <cassert>
+
+fromCountTableComponentToDistance::fromCountTableComponentToDistance(
+ const countTableComponentGam& ctc,
+ const stochasticProcess &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess,
+ unObservableData* unObservableData_p) : _sp(sp), _ctc(ctc),_unObservableData_p(unObservableData_p) {
+ _distance = brLenIntialGuess ;//0.03;
+ _toll = toll;
+}
+
+void fromCountTableComponentToDistance::computeDistance() {
+ MDOUBLE maxPairwiseDistance = 5.0; // The default
+ likeDist likeDist1(_sp,_toll,maxPairwiseDistance,_unObservableData_p);
+ MDOUBLE initGuess = _distance;
+ _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess);
+ assert(_distance>=0);
+}
diff --git a/libs/phylogeny/fromCountTableComponentToDistance.h b/libs/phylogeny/fromCountTableComponentToDistance.h
new file mode 100644
index 0000000..9237799
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance.h
@@ -0,0 +1,37 @@
+// $Id: fromCountTableComponentToDistance.h 4742 2008-08-19 17:40:56Z cohenofi $
+
+#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE
+#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+#include "unObservableData.h"
+
+static const MDOUBLE startingGuessForTreeBrLen = 0.029;
+
+class fromCountTableComponentToDistance {
+
+public:
+ explicit fromCountTableComponentToDistance(
+ const countTableComponentGam& ctc,
+ const stochasticProcess &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen
+ unObservableData* unObservableData_p = NULL); // a class used to for presence/absence
+
+ void computeDistance();// return the likelihood
+ MDOUBLE getDistance() { return _distance;} // return the distance.
+ MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
+private:
+ const stochasticProcess & _sp;
+ const countTableComponentGam& _ctc;
+ MDOUBLE _toll;
+ MDOUBLE _distance;
+ MDOUBLE _likeDistance;
+ unObservableData* _unObservableData_p;
+ int alphabetSize() {return _ctc.alphabetSize();}
+};
+
+#endif
+
diff --git a/libs/phylogeny/fromCountTableComponentToDistance2Codon.cpp b/libs/phylogeny/fromCountTableComponentToDistance2Codon.cpp
new file mode 100644
index 0000000..f2d7052
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance2Codon.cpp
@@ -0,0 +1,22 @@
+// $Id: fromCountTableComponentToDistance2Codon.cpp 950 2006-10-19 12:12:34Z eyalprivman $
+
+#include "fromCountTableComponentToDistance2Codon.h"
+#include "likeDist2Codon.h"
+#include "likeDist.h"
+#include <cassert>
+
+fromCountTableComponentToDistance2Codon::fromCountTableComponentToDistance2Codon(
+ const countTableComponentGam& ctc,
+ const vector<stochasticProcess> &spVec,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess ) : _spVec(spVec), _ctc(ctc) {
+ _distance =brLenIntialGuess ;//0.03;
+ _toll = toll;
+}
+
+void fromCountTableComponentToDistance2Codon::computeDistance() {
+ likeDist2Codon likeDist1(_spVec,_toll);
+ MDOUBLE initGuess = _distance;
+ _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess);
+ assert(_distance>=0);
+}
diff --git a/libs/phylogeny/fromCountTableComponentToDistance2Codon.h b/libs/phylogeny/fromCountTableComponentToDistance2Codon.h
new file mode 100644
index 0000000..4db1498
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance2Codon.h
@@ -0,0 +1,34 @@
+// $Id: fromCountTableComponentToDistance2Codon.h 950 2006-10-19 12:12:34Z eyalprivman $
+
+#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON
+#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_CODON
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+
+static const MDOUBLE startingGuessForTreeBrLen = 0.029;
+
+class fromCountTableComponentToDistance2Codon {
+
+public:
+ explicit fromCountTableComponentToDistance2Codon(
+ const countTableComponentGam& ctc,
+ const vector<stochasticProcess> &spVec,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen
+
+ void computeDistance();// return the likelihood
+ MDOUBLE getDistance() { return _distance;} // return the distance.
+ MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
+private:
+ const vector<stochasticProcess> & _spVec;
+ const countTableComponentGam& _ctc;
+ MDOUBLE _toll;
+ MDOUBLE _distance;
+ MDOUBLE _likeDistance;
+ int alphabetSize() {return _ctc.alphabetSize();}
+};
+
+#endif
+
diff --git a/libs/phylogeny/fromCountTableComponentToDistance2USSRV.cpp b/libs/phylogeny/fromCountTableComponentToDistance2USSRV.cpp
new file mode 100644
index 0000000..a83c307
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance2USSRV.cpp
@@ -0,0 +1,22 @@
+// $Id: fromCountTableComponentToDistance2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "fromCountTableComponentToDistance2USSRV.h"
+#include "likeDist.h"
+#include <cassert>
+
+fromCountTableComponentToDistance2USSRV::fromCountTableComponentToDistance2USSRV(
+ const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ const ussrvModel &model,
+ MDOUBLE toll,
+ MDOUBLE brLenIntialGuess ) : _model(model), _ctcBase(ctcBase), _ctcSSRV(ctcSSRV) {
+ _distance = brLenIntialGuess ;//0.03;
+ _toll = toll;
+}
+
+void fromCountTableComponentToDistance2USSRV::computeDistance() {
+ likeDist2USSRV likeDist1(_model,_toll);
+ MDOUBLE initGuess = _distance;
+ _distance = likeDist1.giveDistance(_ctcBase,_ctcSSRV,_likeDistance,initGuess);
+ assert(_distance>=0);
+}
diff --git a/libs/phylogeny/fromCountTableComponentToDistance2USSRV.h b/libs/phylogeny/fromCountTableComponentToDistance2USSRV.h
new file mode 100644
index 0000000..c22db04
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistance2USSRV.h
@@ -0,0 +1,39 @@
+// $Id: fromCountTableComponentToDistance2USSRV.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
+#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+#include "ussrvModel.h"
+#include "likeDist2USSRV.h"
+
+static const MDOUBLE startingGuessForTreeBrLen = 0.029;
+
+class fromCountTableComponentToDistance2USSRV {
+
+public:
+ explicit fromCountTableComponentToDistance2USSRV(
+ const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ const ussrvModel& model,
+ MDOUBLE toll,
+ MDOUBLE brLenIntialGuess);// =startingGuessForTreeBrLen
+
+ void computeDistance();// return the likelihood
+ MDOUBLE getDistance() { return _distance;} // return the distance.
+ MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
+
+private:
+ const ussrvModel & _model;
+ const countTableComponentGam& _ctcBase;
+ const countTableComponentHom& _ctcSSRV;
+ MDOUBLE _toll;
+ MDOUBLE _distance;
+ MDOUBLE _likeDistance;
+// int alphabetSize() {return _ctc.alphabetSize();}
+};
+
+#endif //___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_2_USSRV
+
diff --git a/libs/phylogeny/fromCountTableComponentToDistanceProp.cpp b/libs/phylogeny/fromCountTableComponentToDistanceProp.cpp
new file mode 100644
index 0000000..ade5afa
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistanceProp.cpp
@@ -0,0 +1,18 @@
+// $Id: fromCountTableComponentToDistanceProp.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "fromCountTableComponentToDistanceProp.h"
+#include "likeDistProp.h"
+
+fromCountTableComponentToDistanceProp::fromCountTableComponentToDistanceProp(
+ const vector<countTableComponentGam>& ctc,
+ const vector<stochasticProcess> &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess ) : _sp(sp), _ctc(ctc) {
+ _distance =brLenIntialGuess;
+ _toll = toll;
+}
+
+void fromCountTableComponentToDistanceProp::computeDistance() {
+ likeDistProp likeDist1(alphabetSize(),_sp,_toll);
+ _distance = likeDist1.giveDistance(_ctc,_likeDistance);
+}
diff --git a/libs/phylogeny/fromCountTableComponentToDistanceProp.h b/libs/phylogeny/fromCountTableComponentToDistanceProp.h
new file mode 100644
index 0000000..f321516
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistanceProp.h
@@ -0,0 +1,33 @@
+// $Id: fromCountTableComponentToDistanceProp.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP
+#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE_PROP
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+
+
+class fromCountTableComponentToDistanceProp {
+
+public:
+ explicit fromCountTableComponentToDistanceProp(
+ const vector<countTableComponentGam>& ctc,
+ const vector<stochasticProcess> &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess = 0.029);// =startingGuessForTreeBrLen
+
+ void computeDistance();// return the likelihood
+ MDOUBLE getDistance() { return _distance;} // return the distance.
+ MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
+private:
+ const vector<stochasticProcess> & _sp;
+ const vector<countTableComponentGam>& _ctc;
+ MDOUBLE _toll;
+ MDOUBLE _distance;
+ MDOUBLE _likeDistance;
+ int alphabetSize() {return (_ctc.empty()?0:_ctc[0].alphabetSize());}
+};
+
+#endif
+
diff --git a/libs/phylogeny/fromCountTableComponentToDistancefixRoot.cpp b/libs/phylogeny/fromCountTableComponentToDistancefixRoot.cpp
new file mode 100644
index 0000000..faf6992
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistancefixRoot.cpp
@@ -0,0 +1,26 @@
+// $Id: fromCountTableComponentToDistance.cpp 4471 2008-07-17 15:38:50Z cohenofi $
+
+#include "fromCountTableComponentToDistancefixRoot.h"
+#include "likeDistfixRoot.h"
+#include <cassert>
+
+fromCountTableComponentToDistancefixRoot::fromCountTableComponentToDistancefixRoot(
+ const vector<countTableComponentGam>& ctc,
+ const stochasticProcess &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess,
+ unObservableData* unObservableData_p)
+ : _sp(sp), _ctc(ctc) {
+ _distance =brLenIntialGuess ;//0.03;
+ _toll = toll;
+ _unObservableData_p = unObservableData_p;
+
+}
+
+void fromCountTableComponentToDistancefixRoot::computeDistance() {
+ MDOUBLE maxPairwiseDistance = 5.0; // The default
+ likeDistfixRoot likeDist1(_sp,_toll,maxPairwiseDistance,_unObservableData_p);
+ MDOUBLE initGuess = _distance;
+ _distance = likeDist1.giveDistance(_ctc,_likeDistance,initGuess);
+ assert(_distance>=0);
+}
diff --git a/libs/phylogeny/fromCountTableComponentToDistancefixRoot.h b/libs/phylogeny/fromCountTableComponentToDistancefixRoot.h
new file mode 100644
index 0000000..a5f6d35
--- /dev/null
+++ b/libs/phylogeny/fromCountTableComponentToDistancefixRoot.h
@@ -0,0 +1,39 @@
+// $Id: fromCountTableComponentToDistance.h 4471 2008-07-17 15:38:50Z cohenofi $
+
+#ifndef ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT
+#define ___FROM_COUNT_TABLE_COMPONENT_TO_DISTANCE__FIX_ROOT
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+#include "unObservableData.h"
+
+static const MDOUBLE startingGuessForTreeBrLen = 0.029;
+
+class fromCountTableComponentToDistancefixRoot {
+
+public:
+ explicit fromCountTableComponentToDistancefixRoot(
+ const vector<countTableComponentGam>& ctc,
+ const stochasticProcess &sp,
+ const MDOUBLE toll,
+ const MDOUBLE brLenIntialGuess, // =startingGuessForTreeBrLen
+ unObservableData* unObservableData_p);
+
+ void computeDistance();// return the likelihood
+ MDOUBLE getDistance() { return _distance;} // return the distance.
+ MDOUBLE getLikeDistance() { return _likeDistance;} // return the distance.
+private:
+ const stochasticProcess & _sp;
+ const vector<countTableComponentGam>& _ctc; //_ctc[letterAtRoot][rate][alph][alph]
+ MDOUBLE _toll;
+ MDOUBLE _distance;
+ MDOUBLE _likeDistance;
+ unObservableData* _unObservableData_p;
+
+// int alphabetSize() {return _ctc.alphabetSize();}
+ int alphabetSize() {return _ctc[0].alphabetSize();}
+};
+
+#endif
+
diff --git a/libs/phylogeny/fromInstructionFile.cpp b/libs/phylogeny/fromInstructionFile.cpp
new file mode 100644
index 0000000..8055c17
--- /dev/null
+++ b/libs/phylogeny/fromInstructionFile.cpp
@@ -0,0 +1,555 @@
+// $Id: fromInstructionFile.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "fromInstructionFile.h"
+#include "treeUtil.h"
+#include "nucleotide.h"
+#include "amino.h"
+#include "uniDistribution.h"
+#include "gammaDistribution.h"
+#include "readDatMatrix.h"
+#include "aaJC.h"
+#include "nucJC.h"
+#include "hky.h"
+#include "trivialAccelerator.h"
+#include "chebyshevAccelerator.h"
+#include "phylipFormat.h"
+#include "maseFormat.h"
+#include "fastaFormat.h"
+#include "clustalFormat.h"
+#include "molphyFormat.h"
+#include "datMatrixHolder.h"
+#include "someUtil.h"
+
+#include <iostream>
+#include <fstream>
+#include <memory>
+#include <iterator>
+#include <cstdio>
+using namespace std;
+
+//#define VERBOS
+
+void fromInstructionFile::readInstructionFile(const string& str){
+ ifstream f;
+ f.open(str.c_str());
+ if (f==NULL) {
+ string tmp = "Unable to open the instraction file : \""+str+"\"";
+ errorMsg::reportError(tmp);
+ }
+ string key, value;
+ while (!f.eof()){
+ f >> key;
+ if (!key.empty()){
+ toLower(key);// put the key in lower case.
+ getline(f,value);
+ value.erase(0,value.find_first_not_of(" \t")); // clear leading white space
+ _lines[key]=value;
+ }
+ }
+ f.close();
+}
+
+fromInstructionFile::fromInstructionFile(const string& str):_maxNumOfFiles(1000){
+ readInstructionFile(str);
+}
+
+// THIS IS NOT WORKING ON SOME OLD VERSIONS OF g++
+//string I2A(const int & v)
+//{
+// stringstream s("");
+// s<<v;
+// return(s.str());
+//}
+//
+//string F2A(const float & v)
+//{
+// stringstream s("");
+// s<<v;
+// return(s.str());
+//}
+
+string I2A(const int & v)
+{
+ char buf[100];
+ sprintf(buf,"%d",v);
+ return buf;
+}
+
+string F2A(const float & v)
+{
+ char buf[100];
+ sprintf(buf,"%f",v);
+ return buf;
+}
+
+
+
+
+bool fromInstructionFile::doesWordExistInLines(const string& key) const{
+ return (_lines.count(key)>0);
+}
+
+const string & fromInstructionFile::searchStringInLines(const string& key) const
+{
+#ifdef VERBOS
+ map<string, string>::const_iterator pos;
+ pos = _lines.begin();
+ for (; pos != _lines.end(); ++pos) {
+ cout << "key: \"" << pos->first << "\" "
+ << "value: " << pos->second << endl;
+ }
+#endif
+
+
+
+ static const string emptystr("");
+ if (_lines.count(key) > 0)
+ return(_lines.find(key)->second);
+ else
+ return(emptystr);
+}
+
+const string& fromInstructionFile::searchStringInLines(const string& key, const int index) const
+{
+ static const string emptystr("");
+
+ string realKey(key+int2string(index));
+
+ if (_lines.count(realKey) > 0)
+ return(_lines.find(realKey)->second);
+ else
+ return(emptystr);
+}
+
+void fromInstructionFile::setLogFile() {
+ string logfilename(searchStringInLines("logfile"));
+ if (logfilename == "") logfilename = "-";
+
+ if (logfilename == "-") {
+ myLog::setLogOstream(&cout);
+ }
+ else{
+ ofstream* outLF = new ofstream(logfilename.c_str());
+ if (!outLF) {
+ errorMsg::reportError("unable to open file for reading");
+ }
+ myLog::setLogOstream(outLF);
+ }
+ string loglvl(searchStringInLines("loglvl"));
+ if (loglvl=="") myLog::setLogLvl(3); // default value
+ else myLog::setLogLvl(atoi(loglvl.c_str()));
+ LOG(3,<<"START OF LOG FILE\n\n");
+}
+
+bool fromInstructionFile::getIntValueConnectedWithWord(const string& wordToSearch,
+ int & val){
+ string p(searchStringInLines(wordToSearch));
+ if (p == "") {
+ return false;
+ }
+ val=atoi(p.c_str());
+ return true;
+}
+
+string fromInstructionFile::getOutFile() {
+ string outfilename(searchStringInLines("outfile"));
+ if (outfilename == "") outfilename = "-";
+ return outfilename;
+}
+
+void fromInstructionFile::getAlphabets(vector<alphabet* >& _alphabets) {
+ if (_alphabets.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getAlphabetSize");}
+ for (int i=1; i < _maxNumOfFiles; ++i ) {
+ string p(searchStringInLines("alphabet",i));
+ if (p == "") return;
+ int alphRes = atoi(p.c_str());
+ if (alphRes == 4) {
+ alphabet* alp = new nucleotide;
+ _alphabets.push_back(alp);
+ }
+ else if (alphRes == 20) {
+ alphabet* alp = new amino;
+ _alphabets.push_back(alp);
+ }
+ else errorMsg::reportError("No relaven number after the word alphabet in the instruction file.");
+ }
+ for (size_t z=1; z< _alphabets.size(); ++z) {
+ if (_alphabets[z]!= _alphabets[0]) {
+ errorMsg::reportError("currently all seq. must be of the same alphabet size");
+ }
+ }
+}
+
+alphabet* fromInstructionFile::getOneAlphabet( ) {
+ alphabet* _alphabet = NULL;
+ int alphRes;
+
+ bool ok = getIntValueConnectedWithWord("alphabet",alphRes);
+ if (!ok) {
+ ok = getIntValueConnectedWithWord("alphabet1",alphRes);
+
+ if (!ok) errorMsg::reportError("didn't find alphabet size in instruction file");
+ }if (ok==true) {
+ if (alphRes == 4) {
+ _alphabet = new nucleotide;
+ }
+ else if (alphRes == 20) {
+ _alphabet = new amino;
+ }
+ else errorMsg::reportError("No number after the word alphabet in the instruction file.");
+ }
+ return _alphabet;
+}
+
+void fromInstructionFile::getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs){
+ bool useGamma = doesWordExistInLines("gamma");
+ distribution *dist = NULL;
+ if (!useGamma) dist = new uniDistribution;
+ else dist = new gammaDistribution(1,4);
+
+ replacementModel *probMod=NULL;
+ pijAccelerator *pijAcc=NULL;
+
+ string wordUse = "model";
+ bool usemodel1 = doesWordExistInLines("model1");
+ if (usemodel1 == true) wordUse="model1";
+
+ string modelName(searchStringInLines(wordUse));// we can use model or model1
+ if (modelName == "") {
+ errorMsg::reportError("could not find model name in instruction file");
+ }
+
+ if (strcmp(modelName.c_str(),"day")==0) {
+ (freqs==NULL)? probMod=new pupAll(datMatrixHolder::dayhoff) : probMod=new pupAll(datMatrixHolder::dayhoff,*freqs);
+ pijAcc = new chebyshevAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"jtt")==0) {
+ (freqs==NULL)? probMod=new pupAll(datMatrixHolder::jones):probMod=new pupAll(datMatrixHolder::jones,*freqs) ;
+ pijAcc =new chebyshevAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"rev")==0) {
+ (freqs==NULL)? probMod=new pupAll(datMatrixHolder::mtREV24) : probMod=new pupAll(datMatrixHolder::mtREV24,*freqs);
+ pijAcc = new chebyshevAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"wag")==0) {
+ (freqs==NULL)? probMod=new pupAll(datMatrixHolder::wag) : probMod=new pupAll(datMatrixHolder::wag, *freqs);
+ pijAcc = new chebyshevAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"cprev")==0) {
+ (freqs==NULL)? probMod=new pupAll(datMatrixHolder::cpREV45) : probMod=new pupAll(datMatrixHolder::cpREV45, *freqs);
+ pijAcc = new chebyshevAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"nucjc")==0) {
+ probMod=new nucJC; pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (strcmp(modelName.c_str(),"aaJC")==0) {
+ probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (modelName=="hky"||modelName=="k2p") {
+ MDOUBLE ratio (atof(searchStringInLines("ratio").c_str())); // get alpha
+ MDOUBLE Ap(0.25), Cp(0.25), Gp(0.25), Tp(0.25);
+ sscanf(searchStringInLines("ACGprob").c_str(),"%lf,%lf,%lf", &Ap, &Cp, &Gp);
+ Tp=1.0-(Ap+Cp+Gp);
+ probMod=new hky(Ap,Cp,Gp,Tp,ratio); pijAcc = new trivialAccelerator(probMod);
+ }
+ else {
+ errorMsg::reportError("This replacement model is not yet available");
+ }
+
+ stochasticProcess s1s(dist, pijAcc);
+ if (probMod) delete probMod;
+ if (pijAcc) delete pijAcc;
+ if (dist) delete dist;
+ sp = s1s;
+}
+
+void fromInstructionFile::getStartingStochasticProcess(vector<stochasticProcess>& spPtrVec, VVdouble* freqs) {
+ if (spPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");}
+ bool useGamma = doesWordExistInLines("gamma");
+ for (int i=0; i < _maxNumOfFiles; ++i) {
+ Vdouble* freq_i = (freqs==NULL) ? NULL: &((*freqs)[i]);
+
+ distribution *dist = NULL;
+ if (!useGamma) dist = new uniDistribution;
+ else dist = new gammaDistribution(1,4);
+
+
+ replacementModel *probMod=NULL;
+ pijAccelerator *pijAcc=NULL;
+ string model(searchStringInLines("model",i+1));
+ if (model == "") return;
+ if (model=="day") {
+ if (freq_i == NULL) {
+ probMod=new pupAll(datMatrixHolder::dayhoff);//pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ probMod=new pupAll(datMatrixHolder::dayhoff,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
+ }
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (model=="jtt") {
+ if (freq_i == NULL) {
+ probMod=new pupAll(datMatrixHolder::jones) ; //pijAcc =new chebyshevAccelerator(probMod);
+ }
+ else {
+ probMod=new pupAll(datMatrixHolder::jones,*freq_i) ; //pijAcc =new chebyshevAccelerator(probMod);
+ }
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (model=="rev") {
+ if (freq_i == NULL) {
+ probMod=new pupAll(datMatrixHolder::mtREV24);//pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ probMod=new pupAll(datMatrixHolder::mtREV24,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
+ }
+ pijAcc = new trivialAccelerator(probMod);
+ } else if (model=="wag") {
+ if (freq_i == NULL) {
+ probMod=new pupAll(datMatrixHolder::wag);//pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ probMod=new pupAll(datMatrixHolder::wag,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
+ }
+ pijAcc = new trivialAccelerator(probMod);
+ } else if (model=="cprev") {
+ if (freq_i == NULL) {
+ probMod=new pupAll(datMatrixHolder::cpREV45);//pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ probMod=new pupAll(datMatrixHolder::cpREV45,*freq_i);//pijAcc = new chebyshevAccelerator(probMod);
+ }
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (model == "nucjc") {
+ probMod=new nucJC; pijAcc = new trivialAccelerator(probMod);
+ }
+ else if (model == "aaJC") {
+ probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
+ }
+ else {errorMsg::reportError("This replacement model is not yet available");
+ }
+
+ stochasticProcess s1s(dist, pijAcc);
+ spPtrVec.push_back(s1s);
+ if (probMod) delete probMod;
+ if (pijAcc) delete pijAcc;
+ if (dist) delete dist;
+ }
+}
+
+bool fromInstructionFile::getStartingEvolTrees(vector<tree>& vtree,vector<char>& constraintsOfT0){
+ if (vtree.size() !=0) {
+ errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees");
+ }
+ string oneTreeFileName(searchStringInLines("treefile"));
+ if (oneTreeFileName =="" ) {
+ errorMsg::reportError("The tree file name must be given in the instruction file");
+ }
+ getStartingTreeVecFromFile(oneTreeFileName,vtree,constraintsOfT0);
+ for (size_t k=0;k<vtree.size();++k) {
+ if (!vtree[k].withBranchLength()) vtree[k].createFlatLengthMatrix(0.05);
+ }
+ return true;
+}
+
+
+bool fromInstructionFile::getStartingEvolTrees(vector<tree>& vtree){
+ if (vtree.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingEvolTrees");}
+// for (int i=1; i < _maxNumOfFiles; ++i ) {
+// auto_ptr<string> treeFileName(searchStringInFile("treefile",i,_instructionFile));
+// if ((treeFileName.get() == NULL) && (i==1)) {
+ string oneTreeFileName(searchStringInLines("treefile"));
+ if (oneTreeFileName=="" ) {
+ errorMsg::reportError("The tree file name must be given in the instruction file");
+ }
+ vtree = getStartingTreeVecFromFile(oneTreeFileName);
+ //tree tmpT(*oneTreeFileName);
+ //vtree.push_back(tmpT);
+ for (size_t k=0;k<vtree.size();++k) {
+ if (!vtree[k].withBranchLength())
+ vtree[k].createFlatLengthMatrix(0.05);
+ }
+ return true;
+// }
+// if (treeFileName.get() == NULL) return true;// found some trees
+// tree t1(*treeFileName);
+// if (!t1.WithBranchLength()) t1.create_flat_length_matrix(0.05);
+// vtree.push_back(t1);
+// }
+// errorMsg::reportError("error in function fromInstructionFile::getStartingEvolTrees");
+// return false;
+}
+
+void fromInstructionFile::getStartingSequenceData(vector<sequenceContainer>& sdPtrVec,
+ const vector<alphabet* >& _alphabets){
+ if (sdPtrVec.size() !=0) {errorMsg::reportError("error in fromInstructionFile::getStartingSequenceData");}
+ for (int i=1; i <= _maxNumOfFiles; ++i ) {
+ string sequenceFileName(searchStringInLines("seqfile",i));
+ if ((sequenceFileName == "") && (i==1)) sequenceFileName="-";
+ else if (sequenceFileName == "") return;
+
+ istream* inPtr;
+ if (sequenceFileName == "-") {
+ LOG(5,<<"in this option, the sequences are inputed from cin\n...");
+ inPtr = &cin;
+ }else{
+ inPtr = new ifstream(sequenceFileName.c_str());
+ }
+ istream& in = *inPtr;
+ sequenceContainer original;
+
+ string sequenceFileFormat(searchStringInLines("format",i));
+ if ((sequenceFileFormat == "") && (i>1)) {// it is probably the format of number 1.
+ string sequenceFileFormatOf1(searchStringInLines("format",1));
+ sequenceFileFormat = sequenceFileFormatOf1;
+ }
+ alphabet* currentAlphabet = NULL;
+ if ((_alphabets.size() == 1) && (i > 1)) currentAlphabet = _alphabets[0];
+ else {
+ currentAlphabet = _alphabets[i-1];
+ }
+ if (sequenceFileFormat== "mase") original= maseFormat:: read(in,currentAlphabet);
+ else if (sequenceFileFormat=="molphy") original= molphyFormat:: read(in,currentAlphabet);
+ else if (sequenceFileFormat=="clustal") original= clustalFormat::read(in,currentAlphabet);
+ else if (sequenceFileFormat=="fasta") original= fastaFormat:: read(in,currentAlphabet);
+ else if (sequenceFileFormat=="phylip") original= phylipFormat:: read(in,currentAlphabet);
+ else errorMsg::reportError(" format not implemented yet in this version... ");
+
+// if (original == NULL) errorMsg::reportError(" unable to find/open input sequence file");
+
+ if (doesWordExistInLines("removeGapPositions")) {
+// vector<int> parCol;
+// original.getParticiantColVecAccordingToGapCols(parCol);
+// sequenceData _sd(*original,parCol);
+// sdPtrVec.push_back(_sd);
+// delete original;
+ errorMsg::reportError("remove gap position is not implemented yet");
+ } //else if (doesWordExistInLines("gapsToMissingData")) {
+ //LOG(5,<<"gaps are changed to missing data..."<<endl);
+ original.changeGaps2MissingData();
+ sdPtrVec.push_back(original);
+ //}
+ }
+
+}
+
+tree* fromInstructionFile::getOneStartingEvolTree(vector<char>* constraintsOfT0) {
+ tree* _tree = NULL;
+
+ string wordUse = "treefile";
+ bool usetreefile1 = doesWordExistInLines("treefile1");
+ if (usetreefile1 == true) wordUse="treefile1";
+
+ string treeFileName(searchStringInLines(wordUse)); // either treefile or treefile1 is OK.
+ if (treeFileName=="" ) {
+ _tree = NULL;
+ constraintsOfT0 = NULL;
+ return _tree;
+ }
+
+ vector<char> constraints;
+ _tree = new tree(treeFileName,constraints);
+ constraintsOfT0 = new vector<char>(constraints);
+ return _tree;
+}
+
+void fromInstructionFile::getOneStartingSequenceData(sequenceContainer& sd,
+ const alphabet* _alphabets) {
+ ifstream ins;
+ istream* inPtr = NULL;
+
+ string wordUse = "seqfile";
+ bool useseqfile1 = doesWordExistInLines("seqfile1");
+ if (useseqfile1 == true) wordUse="seqfile1";
+
+ string sequenceFileName(searchStringInLines(wordUse)); // so it can be used with both seqfile and seqfile1
+ if (sequenceFileName == "") sequenceFileName="-";
+ if (sequenceFileName == "-") {
+ inPtr = &cin;
+ }
+ else{
+ ins.open(sequenceFileName.c_str());
+ if (! ins.is_open())
+ errorMsg::reportError("can not open sequace file");
+ inPtr = &ins;
+ }
+
+ istream& in = *inPtr;
+ sequenceContainer original;
+
+ wordUse = "format";
+ bool useFormat1 = doesWordExistInLines("format1");
+ if (useFormat1 == true) wordUse="format1";
+
+ string sequenceFileFormat(searchStringInLines(wordUse));
+ if (sequenceFileFormat == "") {
+ sequenceFileFormat = "fasta"; // default
+ }
+
+ if (sequenceFileFormat == "mase") original= maseFormat::read(in,_alphabets);
+ else if (sequenceFileFormat == "molphy") original= molphyFormat::read(in,_alphabets);
+ else if (sequenceFileFormat == "clustal") original= clustalFormat::read(in,_alphabets);
+ else if (sequenceFileFormat == "fasta") original= fastaFormat::read(in,_alphabets);
+ else if (sequenceFileFormat == "phylip") original= phylipFormat::read(in,_alphabets);
+ else errorMsg::reportError(" format not implemented yet in this version... ");
+
+ if (doesWordExistInLines("removeGapPositions")) {
+ errorMsg::reportError("remove gap position is not implemented yet");
+ }
+ //LOG(5,<<"gaps are changed to missing data..."<<endl);
+ original.changeGaps2MissingData();
+ sd = original;
+}
+
+void fromInstructionFile::getStartingGammaParameters(vector<stochasticProcess>& spPtrVec) {
+ for (size_t i=0; i < spPtrVec.size(); ++i) {
+ string alphaParam(searchStringInLines("alpha",i+1));
+ if ((alphaParam == "") && (i==0)) {
+ getStartingGammaParameter(spPtrVec);
+ return;
+ }
+ if (alphaParam == "") {
+ MDOUBLE alpha = atof(alphaParam.c_str());
+ (static_cast<gammaDistribution*>(spPtrVec[i].distr()))->setAlpha(alpha);
+ }
+ }
+}
+
+void fromInstructionFile::getOneStartingGammaParameter(stochasticProcess& sp) {
+ MDOUBLE alpha = 0;
+ string alphaParam0(searchStringInLines("alpha",0));
+ if (alphaParam0 != "") {
+ alpha = atof(alphaParam0.c_str());
+ } else {
+ string alphaParam1(searchStringInLines("alpha",1));
+ if (alphaParam1 != "") {
+ alpha = atof(alphaParam1.c_str());
+ } else {
+ string alphaParam2(searchStringInLines("alpha"));
+ if (alphaParam2 != "") {
+ alpha = atof(alphaParam2.c_str());
+ } else { // no alpha parameter given,
+ return;
+ }
+ }
+ }
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(alpha);
+}
+
+void fromInstructionFile::getStartingGammaParameter(vector<stochasticProcess>& spPtrVec) {
+ string alphaParam(searchStringInLines("alpha"));
+ for (size_t i=0; i < spPtrVec.size(); ++i) {
+ if (alphaParam != "") {
+ MDOUBLE alpha = atof(alphaParam.c_str());
+ (static_cast<gammaDistribution*>(spPtrVec[i].distr()))->setAlpha(alpha);
+ }
+ }
+}
+
+void fromInstructionFile::getStartingGlobalRates(vector<stochasticProcess>& spPtrVec) {
+ for (size_t i=0; i < spPtrVec.size(); ++i) {
+ string rate(searchStringInLines("rate",i+1));
+ if (rate != "") {
+ MDOUBLE grate = atof(rate.c_str());
+ spPtrVec[i].setGlobalRate(grate);
+ }
+ }
+}
diff --git a/libs/phylogeny/fromInstructionFile.h b/libs/phylogeny/fromInstructionFile.h
new file mode 100644
index 0000000..63bec89
--- /dev/null
+++ b/libs/phylogeny/fromInstructionFile.h
@@ -0,0 +1,60 @@
+// $Id: fromInstructionFile.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ____FROM_INSTRUCTION__FILE
+#define ____FROM_INSTRUCTION__FILE
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "alphabet.h"
+#include "sequenceContainer.h"
+#include "someUtil.h"
+
+#include <string>
+#include <iostream>
+#include <vector>
+#include <map>
+using namespace std;
+
+
+
+class fromInstructionFile {
+public:
+ explicit fromInstructionFile(const string& instructionFileName);
+ void readInstructionFile(const string& str);
+ const string&searchStringInLines(const string& key) const;
+ bool doesWordExistInLines(const string& key) const;
+ const string& searchStringInLines(const string& key, const int index) const;
+ bool getIntValueConnectedWithWord(const string& wordToSearch, int & res);
+
+
+
+ void setLogFile();
+ void getStartingStochasticProcess(vector<stochasticProcess>& spPtrVec,VVdouble* freqs=NULL);
+ void getOneStartingStochasticProcess(stochasticProcess& sp, Vdouble * freqs = NULL);
+ void getOneStartingGammaParameter(stochasticProcess& sp);
+ bool getStartingEvolTrees(vector<tree>& vtree);// true if thelist tree1 file1, tree2 file2 is found.
+ bool getStartingEvolTrees(vector<tree>& vtree, vector<char>& constraintsOfT0);// true if thelist tree1 file1, tree2 file2 is found.
+ tree* getOneStartingEvolTree(vector<char>* constraintsOfT0);// ALOCATE NEW TREE AND NEW CONSTRAINT VECTOR.
+ void getStartingSequenceData(vector<sequenceContainer>& sdPtrVec,
+ const vector<alphabet* >& _alphabets);
+ void getOneStartingSequenceData(sequenceContainer& sdPtrVec,
+ const alphabet* _alphabets);
+ void getAlphabets(vector<alphabet* >& _alphabets);// alocate with new
+ // have to be deleted by the users!
+ alphabet* getOneAlphabet();
+ bool useGamma() {
+ return doesWordExistInLines("gamma");
+ }
+ void getStartingGammaParameters(vector<stochasticProcess>& spPtrVec);
+ void getStartingGlobalRates(vector<stochasticProcess>& spPtrVec);
+ string getOutFile();
+protected:
+
+ map<string, string> _lines;
+ const int _maxNumOfFiles;// = 1000;
+ void getStartingGammaParameter(vector<stochasticProcess>& spPtrVec);
+// tree getStartingEvolTree();
+
+};
+#endif
diff --git a/libs/phylogeny/fromQtoPt.cpp b/libs/phylogeny/fromQtoPt.cpp
new file mode 100644
index 0000000..28a9914
--- /dev/null
+++ b/libs/phylogeny/fromQtoPt.cpp
@@ -0,0 +1,303 @@
+// $Id: fromQtoPt.cpp 5788 2009-01-19 22:24:16Z rubi $
+
+#include "definitions.h"
+#include "fromQtoPt.h"
+#include "errorMsg.h"
+#include "numRec.h"
+#include "matrixUtils.h"
+#include <iostream>
+using namespace std;
+#include <cassert>
+
+//#define VERBOS
+
+
+
+
+void q2pt::fillFromRateMatrix(const vector<MDOUBLE>& freq,
+ const VVdouble & qMatrix) {
+ // we first decompose Q to (F^0.5) M (F^-0.5)
+ // F is a diagonal matrix of the frequencies
+ // M is the symetrical matrix representation of Q.
+
+ VVdouble q_sym;
+ const int matrix_size = qMatrix.size();
+ q_sym.resize(matrix_size);
+ int k=0;
+ for (k=0; k < q_sym.size(); ++k) q_sym[k].resize(matrix_size);
+ calc_symmetric_q(qMatrix,q_sym,freq);
+ // now we have to find the eigen-vector decomposition of the q_sym.
+ VVdouble v; // v is the eigen vectors of the symetrical matrix.
+ v.resize(matrix_size);
+ for (k=0; k < qMatrix.size(); ++k) v[k].resize(matrix_size);
+ Vdouble eigenValues(matrix_size);
+
+ // symmetric_1pam = [v] [eigenValues] [transpose(v)]
+ //MyJacobi(q_sym,v, eigenValues); // notice that inv([v]) = [v] transpose;
+
+
+ /////i changed
+ computeEigenSystem(q_sym,v,eigenValues);
+
+ ////
+//#ifdef VERBOS
+// LOG(5,<<"The eigen-vector matrix of the decomposition of the symetric matrix\n");
+// for (int k1=0; k1 < v.size(); ++k1) {
+// for (int k2=0; k2<v[k1].size(); ++k2) {
+// LOG(5,<<v[k1][k2]<<" ");
+// }
+// LOG(5,<<endl);
+// }
+//#endif
+
+
+ VVdouble left_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
+ left_eig_of_pam.resize(matrix_size);
+ for (k=0; k < left_eig_of_pam.size(); ++k) left_eig_of_pam[k].resize(matrix_size);
+ VVdouble right_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
+ right_eig_of_pam.resize(matrix_size);
+ for (k=0; k < right_eig_of_pam.size(); ++k) right_eig_of_pam[k].resize(matrix_size);
+
+ calc_left_and_right_eig_of_pam(left_eig_of_pam,right_eig_of_pam,v,freq);
+
+ _leftEigen=left_eig_of_pam;
+ _rightEigen=right_eig_of_pam;
+ _eigenVector=eigenValues;
+ Vdouble _freq=freq;
+ // printing a pij(1);
+ //MDOUBLE t = 1;
+ //string fileName = "D://My Documents//adid//nimrod//inputs//inputs//aligned tce//aligned tce//P.F//P.F. vs P.F//eigenValues1.txt";
+// ofstream out(fileName.c_str());
+// for (int i=0;i<eigenValues.size();i++)
+// out<<eigenValues[i] <<" ";
+// out<<endl;
+ //for (int aa1=0; aa1 < eigenValues.size(); ++aa1) {
+ // for (int aa2=0; aa2 < eigenValues.size(); ++aa2) {
+ /// MDOUBLE sum=0;
+ // for (int k=0 ; k<eigenValues.size() ; ++k) {
+ // sum+=( left_eig_of_pam[aa1][k]*right_eig_of_pam[k][aa2]*exp(eigenValues[k]*t) );
+ // }
+ // LOG(5,<<sum<<" ");
+// }
+// LOG(5,<<endl);
+// }
+}
+
+void q2pt::fillFrom1PAMMatrix(const vector<MDOUBLE>& freq,const VVdouble & onePam)
+{
+ fillFromRateMatrix(freq,onePam);
+ for (int i=0; i < _eigenVector.size(); ++i) {
+ assert(_eigenVector[i]>0);
+ _eigenVector[i] = log(_eigenVector[i])* 100;
+ }
+}
+
+bool q2pt::currectFloatingPointProblems(MDOUBLE& sum) const {
+ if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0;
+ if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1;
+ if (!((sum<=1) && (sum>=0)))
+ return false;
+ return true;
+}
+
+// Pij(t) = Sigma[k]{ [V]ik * [V^-1]kj * e^(Lamda_k*t) }
+const MDOUBLE q2pt::Pij_t(const int i, const int j, const MDOUBLE t) const {
+ if (t<0) errorMsg::reportError("negative length in routine Pij_t");
+// if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0;
+ MDOUBLE sum=0;
+ for (int k=0 ; k<_eigenVector.size() ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t) );
+ }
+ if (currectFloatingPointProblems(sum)) return sum;
+// LOG(1,<<"err Pij_t i="<<i<<" j= "<<j<<" dis= "<<t<<" res= "<<sum<<endl);//sum is not in [0,1]
+ errorMsg::reportError("q2pt::Pij_t error in function pijt... ");return 0;
+}
+
+const MDOUBLE q2pt::dPij_dt(const int i,const int j, const MDOUBLE t) const {
+ MDOUBLE sum=0;
+ for (int k=0 ; k<_eigenVector.size() ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]);
+ }
+ return sum;
+}
+
+
+const MDOUBLE q2pt::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
+ MDOUBLE sum=0;;
+ for (int k=0 ; k<_eigenVector.size() ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]*_eigenVector[k]);
+ }
+ return sum;
+}
+
+void q2pt::calc_symmetric_q(const VVdouble &q_matrix,
+ VVdouble &symmetric_q,
+ const Vdouble & freq)
+//----------------------------------------------------------------------------------
+//input: symmetric_1pam matrix is the output, pam1 is the input
+//output: non
+//doing: procedures to find eigen values work on symetrical matrices.
+// dayhoff 1 pam in a new basis is symetrical
+// the transformation is
+//
+// (1) [symmetric_1pam] = [sqrt(pi)] * [pam1] * [1/sqrt(pi)]
+//
+// [] for matrix. [sqrt(pi)] is a diagonal matrix were a[i][i] is the root of freq[i]
+//reference: JME (1997) 45:696-703 Estimation of reversible substitution matrices from
+// multiple pairs of sequences. Lars Arvestad and William J. Bruno.
+//----------------------------------------------------------------------------------
+{
+ int i,j;
+ for (i=0; i<q_matrix.size(); ++i) {
+ for (j=0; j<q_matrix.size(); ++j) {
+ if (q_matrix[i][j] != 0.0) {
+ symmetric_q[i][j] = q_matrix[i][j]*sqrt(freq[i])/sqrt(freq[j]);
+ }
+ }
+ }
+ /*check OZ
+ LOG(5,<<"sim matrix"<<endl);
+ for (i=0;i<symmetric_q.size();++i) {
+ for (j=0; j<symmetric_q.size(); ++j) {
+ //LOG(5,<<symmetric_q[i][j]<<" ");
+ LOG(5,<< setprecision(3) << setw(5) << symmetric_q[i][j]<<'\t');
+
+ }
+ LOG(5,<<endl);
+ } */
+
+}
+
+void q2pt::calc_left_and_right_eig_of_pam(
+ VVdouble &left_eig_of_pam,
+ VVdouble &right_eig_of_pam,
+ const VVdouble &v,
+ const Vdouble& freq) {
+//----------------------------------------------------------------------------------
+//input: left_eig_of_pam, right_eig_of_pam they will be the eigenvectors of pam1;
+// freq is the vector of amino acid frequencies of the model.
+// v is the eigen vector matrix of the symetrical matrix
+//output: non
+//doing: now [SYM] = [SqrtFreq] * [pam1] * inv([SqrtFreq])
+// so [pam1] = inv([SqrtFreq]) * [SYM] * [SqrtFreq]
+// SYM = [V] * [D] * transp([V])
+// hence [pam1] = {inv([SqrtFreq]) * [V]} * [D] * {transp([V]) * [SqrtFreq]}
+// {inv([SqrtFreq]) * [V]} is left_eig_of_pam, and the above one ^ is right.
+//----------------------------------------------------------------------------------
+ int i,j;
+ for (i=0;i<v.size();++i) {
+ for (j=0;j<v.size();++j)
+ {
+ if ((freq[i] != 0.0) &&(freq[j] != 0.0)) {
+ left_eig_of_pam[i][j] = (1/sqrt(freq[i]))* v[i][j];
+ right_eig_of_pam[i][j]= sqrt(freq[j]) * v[j][i];
+ }
+ }
+ }
+
+// LOG(5,<<"left_eig_of_pam"<<endl);
+// for (i=0;i<4;++i) {
+// for (j=0; j<4; ++j) {
+// LOG(5,<<left_eig_of_pam[i][j]<<" ");
+// LOG(5,<<pam1[i][i]<<" ");
+// }
+// LOG(5,<<endl);
+// }
+//
+// LOG(5,<<"right eig_of_pam"<<endl);
+// for (i=0;i<4;++i) {
+// for (j=0; j<4; ++j) {
+// LOG(5,<<right_eig_of_pam[i][j]<<" ");
+// LOG(5,<<pam1[i][i]<<" ");
+// }
+// LOG(5,<<endl);
+// }
+//
+// LOG(5,<<"press anykey"<<endl);
+// char lll;
+// cin>>lll;
+
+
+}
+
+VVdouble get1PamFromCountMatrix(const vector<MDOUBLE>& freq,
+ const VVdouble & sub_matrix){
+//----------------------------------------------------------------------------------
+//input: pam1 : a pointer to the matrix where pam1 will be.
+// sub_matrix: the substitution matrix
+// freq vector: the amino acid's frequenceis.
+//output: non
+//doing: fill in 1 pam from sub matrix and freq vector
+//calculation: sub_matrix[a][b] is the substitution matrix, between a and b
+// (sub_matrix[a][b]=sub_matrix[b][a])
+// we use f[a][b] insted of sub_matrix[a][b] to be the same as the book
+//(reference) "introduction to computational molecular biology by setubal and meidanis pg 80;
+// let f[a] be sigma f[a][b] on all b (we made f[a][a] = 0;)
+// i.e. f[a] is the number of mutation from a observed
+// let f be sigma f[a] on all a; (=the total mutations*2)
+// now, the mutaibility of a is defined as
+//
+// (1) m[a] = f[a] / (100*f*freq[a])
+//
+// 100*f is a scaling factor for 1 pam.
+// then pam1[a][b] will be pr(a->b/a changed) * pr(a changed)
+//
+// (2) pam1[a][b] = (f[a][b]/f[a])*m[a]
+//
+// (3) f[a][a] = 1-m[a] (easy to show)
+//
+// notice that sigma 1pam[a][b] over all b is 1 and that
+// sigma freq[a]*1pam[a][a] over all a is 0.99
+//----------------------------------------------------------------------------------
+ const int _alphabetSize=sub_matrix.size();
+ VVdouble pam1;
+ pam1.resize(_alphabetSize);
+ for (int z=0; z < _alphabetSize; ++z) {
+ pam1[z].resize(_alphabetSize,0);
+ }
+
+ int i,j;//indices
+ MDOUBLE total=0; // i.e.f in the above explanation
+ for (i=0;i<_alphabetSize;++i) {
+ for (j=0; j<_alphabetSize; ++j){
+ total+=sub_matrix[i][j];
+ }
+ }
+
+ MDOUBLE tmsum;
+ for (i=0;i<_alphabetSize;++i) {
+ tmsum = 0.0;
+ for (j=i+1; j<_alphabetSize; ++j){
+ if ((freq[i] == 0.0) || (freq[j] == 0.0)) {
+ pam1[i][j] = 0.0;pam1[j][i] = 0.0;
+ } else {
+ pam1[i][j] = sub_matrix[i][j]/(100.0*total*freq[i]);
+ pam1[j][i] = sub_matrix[i][j]/(100.0*total*freq[j]);
+ }
+ }
+ }
+
+ for (i=0;i<_alphabetSize;++i) {
+ tmsum = 0.0;
+ for (j=0;j<_alphabetSize;++j) {
+ if (j!=i) tmsum += pam1[i][j];
+ }
+
+ if (freq[i] != 0.0) {
+ pam1[i][i]=1.0-tmsum;
+ }
+ }
+
+#ifdef VERBOS
+ LOG(5,<<" priting the 4*4 top-left corner of the 1pam matrix * 10^6 "<<endl);
+ for (int a=0; a < 4; ++a) {
+ for (int b=0; b < 4; ++b) {
+ LOG(5,<<pam1[a][b]*1000000.0<<" ");
+ }
+ LOG(5,<<endl);
+ }
+#endif
+ return pam1;
+
+}
+
diff --git a/libs/phylogeny/fromQtoPt.h b/libs/phylogeny/fromQtoPt.h
new file mode 100644
index 0000000..1d2f696
--- /dev/null
+++ b/libs/phylogeny/fromQtoPt.h
@@ -0,0 +1,67 @@
+// $Id: fromQtoPt.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___FROM_Q_TO_PT
+#define ___FROM_Q_TO_PT
+
+#include "replacementModel.h"
+#include <cmath>
+#include <iomanip>
+
+int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues);// num rec
+
+VVdouble get1PamFromCountMatrix(const vector<MDOUBLE>& freq,
+ const VVdouble & sub_matrix);
+
+class q2pt : public replacementModel {
+public:
+ void fillFromRateMatrix(const vector<MDOUBLE>& freq,
+ const VVdouble & qMatrix);
+ void fillFrom1PAMMatrix(const vector<MDOUBLE>& freq,
+ const VVdouble & onePam);
+
+
+ explicit q2pt(): err_allow_for_pijt_function(1e-4){}
+
+ // @@@@ I'm not sure why I had to implement this operator=, but it doesn't work without it.
+ q2pt& operator=(const q2pt &other) {
+ _freq = other._freq;
+ _leftEigen = other._leftEigen;
+ _rightEigen = other._rightEigen;
+ _eigenVector = other._eigenVector;
+ return (*this);
+ }
+
+ virtual replacementModel* clone() const { return new q2pt(*this); }
+// virtual nucJC* clone() const { return new nucJC(*this); } // see note down:
+
+ const int alphabetSize() const {return _freq.size();}
+
+
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE err_allow_for_pijt_function; //1e-4
+
+ VVdouble getLeftEigen() const {return _leftEigen;} ;
+ VVdouble getRightEigen() const {return _rightEigen;};
+ Vdouble getEigenVec() const {return _eigenVector;};
+
+private:
+ Vdouble _freq;
+ VVdouble _leftEigen;
+ VVdouble _rightEigen;
+ Vdouble _eigenVector;
+ bool currectFloatingPointProblems(MDOUBLE& sum) const;
+
+public: // to become private:
+ void calc_symmetric_q(const VVdouble &q_matrix,VVdouble &symmetric_q,const Vdouble & freq);
+ void calc_left_and_right_eig_of_pam(
+ VVdouble &left_eig_of_pam,
+ VVdouble &right_eig_of_pam,
+ const VVdouble &v,
+ const Vdouble& freq);
+};
+
+#endif
+
diff --git a/libs/phylogeny/gainLossAlphabet.cpp b/libs/phylogeny/gainLossAlphabet.cpp
new file mode 100644
index 0000000..d1b145d
--- /dev/null
+++ b/libs/phylogeny/gainLossAlphabet.cpp
@@ -0,0 +1,59 @@
+#include "gainLossAlphabet.h"
+
+gainLossAlphabet::gainLossAlphabet() {}
+
+int gainLossAlphabet::fromChar(const char s) const{
+ switch (s) {
+ case '0': return 0; break;
+ case '1': return 1; break;
+ case '-' : case'_' : return -1; break;
+
+ default:
+ vector<string> err;
+ err.push_back(" The gainLoss sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" gainLoss was not one of the following: ");
+ err.push_back(" 0, 1");
+ errorMsg::reportError(err);
+ }// end of switch
+ return -99; // never suppose to be here.
+}// end of function
+
+vector<int> gainLossAlphabet::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+string gainLossAlphabet::fromInt(const int in_id) const{
+ char res = 0;
+ switch (in_id) {
+ case 0 : res = '0' ; break;
+ case 1 : res = '1' ; break;
+ case -2 : res = '-'; break;
+ default:
+ vector<string> err;
+ err.push_back("unable to print gainLoss_id. gainLossl_id was not one of the following: ");
+ err.push_back("0,1,2");
+ errorMsg::reportError(err);
+ }//end of switch
+ string vRes;
+ vRes.append(1,res);
+ return vRes;
+}// end of function
+
+// There are no relations here.
+int gainLossAlphabet::relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == charToCheck)
+ return 1;
+ return 0;
+}
+
+int gainLossAlphabet::fromChar(const string& str, const int pos) const{
+ return fromChar(str[pos]);
+}
+
+
+
+
diff --git a/libs/phylogeny/gainLossAlphabet.h b/libs/phylogeny/gainLossAlphabet.h
new file mode 100644
index 0000000..3f10442
--- /dev/null
+++ b/libs/phylogeny/gainLossAlphabet.h
@@ -0,0 +1,25 @@
+#ifndef ___GAIN_LOSS_ALPH
+#define ___GAIN_LOSS_ALPH
+
+#include "alphabet.h"
+#include "errorMsg.h"
+
+class gainLossAlphabet : public alphabet {
+public:
+ explicit gainLossAlphabet();
+ virtual ~gainLossAlphabet() {}
+ virtual alphabet* clone() const { return new gainLossAlphabet(*this); }
+ int unknown() const {return -2;}
+ int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
+ int size() const {return 2;} // presence or absence only
+ int stringSize() const {return 1;} // one letter code.
+ int relations(const int charInSeq, const int charToCheck) const;
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ string fromInt(const int in_id) const;
+ vector<int> fromString(const string& str) const;
+ bool isSpecific(const int id) const {return (id>=0 && id < size());}
+
+};
+
+#endif
diff --git a/libs/phylogeny/gammaDistribution.cpp b/libs/phylogeny/gammaDistribution.cpp
new file mode 100644
index 0000000..a99a928
--- /dev/null
+++ b/libs/phylogeny/gammaDistribution.cpp
@@ -0,0 +1,36 @@
+// $Id: gammaDistribution.cpp 2862 2007-11-27 10:59:03Z itaymay $
+
+ #include "definitions.h"
+#include "gammaDistribution.h"
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include <cmath>
+
+
+gammaDistribution::gammaDistribution(MDOUBLE alpha,int in_number_of_categories) :
+ generalGammaDistribution(alpha,alpha,in_number_of_categories) {}
+
+gammaDistribution::gammaDistribution(const gammaDistribution& other) :
+ generalGammaDistribution(other) {}
+
+void gammaDistribution::setAlpha(MDOUBLE in_alpha) {
+ if (in_alpha == _alpha) return;
+ setGammaParameters( categories(), in_alpha);
+}
+
+//this function builds the gamma distribution
+void gammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha) {
+ generalGammaDistribution::setGammaParameters(in_number_of_categories,in_alpha,in_alpha);
+}
+
+void gammaDistribution::change_number_of_categories(int in_number_of_categories) {
+ if (in_number_of_categories == categories())
+ return;
+ setGammaParameters( in_number_of_categories, _alpha, _alpha);
+}
+
+void gammaDistribution::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) {
+ if (alpha!=beta)
+ errorMsg::reportError("gammaDistribution::setGammaParameters : can not set beta because alpha must be equal to beta");
+ generalGammaDistribution::setGammaParameters(numOfCategories,alpha,beta);
+}
diff --git a/libs/phylogeny/gammaDistribution.h b/libs/phylogeny/gammaDistribution.h
new file mode 100644
index 0000000..09f6503
--- /dev/null
+++ b/libs/phylogeny/gammaDistribution.h
@@ -0,0 +1,33 @@
+// $Id: gammaDistribution.h 2862 2007-11-27 10:59:03Z itaymay $
+
+#ifndef ___GAMMA_DIST
+#define ___GAMMA_DIST
+/************************************************************
+This distribution can take several forms depending on its free parameter alpha
+(beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution
+see http://mathworld.wolfram.com/GammaDistribution.html.
+please note that the borders of the categories are defined according to calculation of
+the gamma integral, according to numerical recipes in gammaUtilities
+_globalRate represents the rate for two joint genes.
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistribution.h"
+#include "errorMsg.h"
+
+class gammaDistribution : public generalGammaDistribution {
+
+public:
+ explicit gammaDistribution() {}
+ explicit gammaDistribution(MDOUBLE alpha,int in_number_of_categories);
+ explicit gammaDistribution(const gammaDistribution& other);
+ virtual ~gammaDistribution() {}
+ virtual distribution* clone() const { return new gammaDistribution(*this); }
+
+ virtual void setAlpha(MDOUBLE newAlpha);
+ virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1);
+ virtual void change_number_of_categories(int in_number_of_categories);
+ // to prevent the user from using alpha!=beta
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+ virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistribution::setBeta : can not set beta because alpha=beta");}
+};
+#endif
diff --git a/libs/phylogeny/gammaDistributionFixedCategories.cpp b/libs/phylogeny/gammaDistributionFixedCategories.cpp
new file mode 100644
index 0000000..13898e8
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionFixedCategories.cpp
@@ -0,0 +1,35 @@
+#include "gammaDistributionFixedCategories.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+#include "matrixUtils.h"
+
+gammaDistributionFixedCategories::gammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha)
+: generalGammaDistributionFixedCategories(fixedBoundaries,alpha,alpha)
+{
+
+}
+
+gammaDistributionFixedCategories::gammaDistributionFixedCategories(const gammaDistributionFixedCategories& other)
+: generalGammaDistributionFixedCategories(other) {
+}
+
+gammaDistributionFixedCategories::gammaDistributionFixedCategories(MDOUBLE alpha, int catNum)
+: generalGammaDistributionFixedCategories(alpha, alpha,catNum)
+{
+}
+
+void gammaDistributionFixedCategories::setGammaParameters(int in_number_of_categories, MDOUBLE alpha)
+{
+ generalGammaDistributionFixedCategories::setGammaParameters(in_number_of_categories,alpha,alpha);
+}
+
+
+void gammaDistributionFixedCategories::setAlpha(MDOUBLE in_alpha) {
+ if (in_alpha == _alpha) return;
+ setGammaParameters( categories(), in_alpha);
+}
+
+void gammaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
+{
+ generalGammaDistributionFixedCategories::change_number_of_categories(in_number_of_categories);
+}
diff --git a/libs/phylogeny/gammaDistributionFixedCategories.h b/libs/phylogeny/gammaDistributionFixedCategories.h
new file mode 100644
index 0000000..1b21872
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionFixedCategories.h
@@ -0,0 +1,38 @@
+#ifndef ___GAMMA_DISTR_FIXED_CATEGORIES
+#define ___GAMMA_DISTR_FIXED_CATEGORIES
+/************************************************************
+This class differ from the regular GammaDistribution in that
+the rateCategories are fixed according to the user's decision.
+Thus, only the probability of each category changes for each specific alpha value but
+the rate categories themselves are constant.
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistributionFixedCategories.h"
+#include "errorMsg.h"
+
+class gammaDistributionFixedCategories : public generalGammaDistributionFixedCategories {
+
+public:
+ explicit gammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha);
+ explicit gammaDistributionFixedCategories(const gammaDistributionFixedCategories& other);
+ explicit gammaDistributionFixedCategories(MDOUBLE alpha, int catNum);
+ virtual ~gammaDistributionFixedCategories() {}
+ virtual distribution* clone() const { return new gammaDistributionFixedCategories(*this); }
+ virtual void setGammaParameters(int in_number_of_categories, MDOUBLE alpha);
+ virtual void setAlpha(MDOUBLE newAlpha);
+ virtual void change_number_of_categories(int in_number_of_categories);
+ // to prevent the user from using alpha!=beta
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta) {
+ if (alpha!=beta)
+ errorMsg::reportError("gammaDistributionFixedCategories::setGammaParameters : can not set beta because alpha must be equal to beta");
+ generalGammaDistributionFixedCategories::setGammaParameters(numOfCategories,alpha,beta);
+ }
+ virtual void setBeta(MDOUBLE newBeta) {
+ errorMsg::reportError("generalGammaDistributionFixedCategories::setBeta : can not set beta because alpha=beta");
+ }
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/gammaDistributionLaguerre.cpp b/libs/phylogeny/gammaDistributionLaguerre.cpp
new file mode 100644
index 0000000..f454be1
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionLaguerre.cpp
@@ -0,0 +1,42 @@
+#include "gammaDistributionLaguerre.h"
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include <cmath>
+
+
+gammaDistributionLaguerre::gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories)
+: generalGammaDistributionLaguerre(alpha,alpha,in_number_of_categories)
+{
+}
+
+gammaDistributionLaguerre::gammaDistributionLaguerre(const gammaDistributionLaguerre& other)
+: generalGammaDistributionLaguerre(other)
+{
+}
+
+void gammaDistributionLaguerre::setAlpha(MDOUBLE in_alpha)
+{
+ if (in_alpha == _alpha)
+ return;
+ setGammaParameters(categories(), in_alpha);
+}
+
+//this function builds the gamma distribution
+void gammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha)
+{
+ generalGammaDistributionLaguerre::setGammaParameters(in_number_of_categories, in_alpha, in_alpha);
+}
+
+void gammaDistributionLaguerre::change_number_of_categories(int in_number_of_categories)
+{
+ if (in_number_of_categories == categories())
+ return;
+ setGammaParameters(in_number_of_categories, _alpha, _alpha);
+}
+
+void gammaDistributionLaguerre::setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta)
+{
+ if (alpha != beta)
+ errorMsg::reportError("gammaDistributionLaguerre::setGammaParameters : can not set beta because alpha must be equal to beta");
+ generalGammaDistributionLaguerre::setGammaParameters(numOfCategories, alpha, alpha);
+}
diff --git a/libs/phylogeny/gammaDistributionLaguerre.h b/libs/phylogeny/gammaDistributionLaguerre.h
new file mode 100644
index 0000000..904445a
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionLaguerre.h
@@ -0,0 +1,34 @@
+// $Id: gammaDistribution.h 2768 2007-11-22 12:57:44Z osnatz $
+
+#ifndef ___GAMMA_DIST_LAGUERRE
+#define ___GAMMA_DIST_LAGUERRE
+/************************************************************
+This distribution can take several forms depending on its free parameter alpha
+(beta is assumed to be equal to alpha). For an extensive exlpanation of this distribution
+see http://mathworld.wolfram.com/GammaDistribution.html.
+please note that the borders of the categories are defined according to calculation of
+the gamma integral, according to numerical recipes in gammaUtilities
+_globalRate represents the rate for two joint genes.
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistributionLaguerre.h"
+#include "errorMsg.h"
+
+class gammaDistributionLaguerre : public generalGammaDistributionLaguerre {
+
+public:
+ explicit gammaDistributionLaguerre() {}
+ explicit gammaDistributionLaguerre(MDOUBLE alpha,int in_number_of_categories);
+ explicit gammaDistributionLaguerre(const gammaDistributionLaguerre& other);
+ virtual ~gammaDistributionLaguerre() {}
+ virtual distribution* clone() const { return new gammaDistributionLaguerre(*this); }
+
+ virtual void setAlpha(MDOUBLE newAlpha);
+ virtual void setGammaParameters(int numOfCategories=1 ,MDOUBLE alpha=1);
+ virtual void change_number_of_categories(int in_number_of_categories);
+ // to prevent the user from using alpha!=beta
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+ virtual void setBeta(MDOUBLE newBeta) {errorMsg::reportError("gammaDistributionLaguerre::setBeta : can not set beta because alpha=beta");
+ }
+};
+#endif
diff --git a/libs/phylogeny/gammaDistributionPlusInvariant.cpp b/libs/phylogeny/gammaDistributionPlusInvariant.cpp
new file mode 100644
index 0000000..522ec57
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionPlusInvariant.cpp
@@ -0,0 +1,13 @@
+#include "gammaDistributionPlusInvariant.h"
+
+
+
+
+//#define RATE_INVARIANT 1e-10
+
+
+
+
+
+
+
diff --git a/libs/phylogeny/gammaDistributionPlusInvariant.h b/libs/phylogeny/gammaDistributionPlusInvariant.h
new file mode 100644
index 0000000..5f30f20
--- /dev/null
+++ b/libs/phylogeny/gammaDistributionPlusInvariant.h
@@ -0,0 +1,35 @@
+#ifndef ___GAMMA_DIST_PLUSINV
+#define ___GAMMA_DIST_PLUSINV
+/************************************************************
+This class describes a combination of a predefined dsitrubtion ,
+with an additional invariant category of probability _Pinv
+This category is always the last rate category (i.e., rate(categories()) == 0)
+************************************************************/
+#include "definitions.h"
+#include "distributionPlusInvariant.h"
+#include "distribution.h"
+#include "gammaDistribution.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include <cmath>
+
+
+
+class gammaDistributionPlusInvariant : public distributionPlusInvariant {
+public:
+ explicit gammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){}
+ explicit gammaDistributionPlusInvariant();
+ gammaDistributionPlusInvariant(const gammaDistributionPlusInvariant& other) {(*this) = other;}
+ //virtual gammaDistributionPlusInvariant& operator=(const gammaDistributionPlusInvariant& other);
+ gammaDistributionPlusInvariant* clone() const {return new gammaDistributionPlusInvariant(*this);}
+ virtual ~gammaDistributionPlusInvariant(){}
+
+
+
+// get GammaDistribution params
+ virtual void setAlpha(MDOUBLE newAlpha) {return static_cast<gammaDistribution*>(_pBaseDist)->setAlpha(newAlpha);};
+ virtual MDOUBLE getAlpha() const {return static_cast<gammaDistribution*>(_pBaseDist)->getAlpha();}
+
+};
+#endif
diff --git a/libs/phylogeny/gammaUtilities.cpp b/libs/phylogeny/gammaUtilities.cpp
new file mode 100644
index 0000000..dd47931
--- /dev/null
+++ b/libs/phylogeny/gammaUtilities.cpp
@@ -0,0 +1,170 @@
+// $Id: gammaUtilities.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include "errorMsg.h"
+#include <cmath>
+
+
+//gser: returns the incomplete Gamma function evaluated by its series representation
+void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln)
+{
+ //MDOUBLE gammln(MDOUBLE xx);
+
+ int n;
+ MDOUBLE sum,del,ap;
+
+ *gln=gammln(a);
+ if (x <= 0.0) {
+ if (x < 0.0) LOG(1,<<"x less than 0 in routine gser");
+ *gamser=0.0;
+ return;
+ } else {
+ ap=a;
+ del=sum=1.0/a;
+ for (n=1;n<=ITMAX;n++) {
+ ++ap;
+ del *= x/ap;
+ sum += del;
+ if (fabs(del) < fabs(sum)*EPS) {
+ *gamser=sum*exp(-x+a*log(x)-(*gln));
+ return;
+ }
+ }
+ LOG(1,<<"Too many interations in routine gser");
+ return;
+ }
+}
+
+//gcf: returns the complement of the incomplete Gamma function evaluated by its continued fraction representation
+void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln)
+{
+ //MDOUBLE gammln(MDOUBLE xx);
+ int i;
+ MDOUBLE an,b,c,d,del,h;
+
+ *gln=gammln(a);
+ b=x+1.0-a;
+ c=1.0/FPMIN;
+ d=1.0/b;
+ h=d;
+ for (i=1;i<=ITMAX;i++) {
+ an = -i*(i-a);
+ b += 2.0;
+ d=an*d+b;
+ if (fabs(d) < FPMIN) d=FPMIN;
+ c=b+an/c;
+ if (fabs(c) < FPMIN) c=FPMIN;
+ d=1.0/d;
+ del=d*c;
+ h *= del;
+ if (fabs(del-1.0) < EPS) break;
+ }
+ if (i > ITMAX) LOG(1,<<"a too large, ITMAX too small in gcf");
+ *gammcf=exp(-x+a*log(x)-(*gln))*h;
+}
+
+//gammp(a, x): computes the incomplete Gamma function which is:
+// 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt)
+//gammp can be computed in two different ways: by a series representation (gser(..))
+//or by a continued fraction representation (gcf(..))
+//gammp chooses to function will be used, according to the values of a and x
+MDOUBLE gammp(MDOUBLE a, MDOUBLE x)
+{
+ //void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+ //void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+ MDOUBLE gamser,gammcf,gln;
+
+ if (x < 0.0 || a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp");
+ if (x < (a+1.0)) {
+ gser(&gamser,a,x,&gln);
+ return gamser;
+ } else {
+ gcf(&gammcf,a,x,&gln);
+ return 1.0-gammcf;
+ }
+}
+
+
+
+//I add////////////
+
+
+MDOUBLE gammq(MDOUBLE a, MDOUBLE x)
+{
+ void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+ void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+ MDOUBLE gamser,gammcf,gln;
+
+ if (x < 0.0 || a <= 0.0) LOG(1,<<"Invalid arguments in routine gammp");
+ if (x < (a+1.0)) {
+ gser(&gamser,a,x,&gln);
+ return 1.0 - gamser;
+ } else {
+ gcf(&gammcf,a,x,&gln);
+ return gammcf;
+ }
+}
+/*************************************************************************
+// this function computed the ln of the gamma function
+// The Gamma funnction: Gamma(xx) = integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt.
+*************************************************************************/
+MDOUBLE gammln(MDOUBLE xx)
+{
+ MDOUBLE x,y,tmp,ser;
+ static MDOUBLE cof[6]={
+ static_cast<MDOUBLE>(76.18009172947146),
+ static_cast<MDOUBLE>(-86.50532032941677),
+ static_cast<MDOUBLE>(24.01409824083091),
+ static_cast<MDOUBLE>(-1.231739572450155),
+ static_cast<MDOUBLE>(0.1208650973866179e-2),
+ static_cast<MDOUBLE>(-0.5395239384953e-5)
+ };
+ int j;
+
+ y=x=xx;
+ tmp=x+5.5;
+ tmp -= (x+0.5)*log(tmp);
+ ser=1.000000000190015f;
+ for (j=0;j<6;j++) ser += cof[j]/++y;
+ return -tmp+log(2.5066282746310005*ser/x);
+}
+
+//
+MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson)
+{
+ return (search_for_z_in_dis_with_beta_1(alpha,ahoson)/beta);
+}
+
+MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson)
+{
+ if ( ahoson>1 || ahoson<0 ) errorMsg::reportError("Error in function search_for_z_in_dis_with_beta_1");
+ MDOUBLE left=0;
+ MDOUBLE right=99999.0;
+ MDOUBLE tmp=5000.0;
+ MDOUBLE results=0.0;
+
+ for (int i=0;i<100000000 ; i++)
+ {
+ results=gammp(alpha,tmp);
+ if (fabs(ahoson-results)<ERR_FOR_GAMMA_CALC) {
+ return tmp;
+ }
+ if (results>ahoson) {
+ right=tmp;
+ }
+ else left=tmp;
+ tmp=(right+left)/2;
+ }
+ cout << "ERROR in search_for_z_in_dis_with_beta_1() Alpha is: "<< alpha <<endl;
+ errorMsg::reportError("Error in function search_for_z_in_dis_with_beta_1 - first bonderi is 0");// also quit the program
+ return 0;
+}
+
+MDOUBLE the_avarage_r_in_category_between_a_and_b(MDOUBLE left, MDOUBLE right, MDOUBLE alpha, MDOUBLE beta, int k)
+{// and and b are the border of percentile k)
+ MDOUBLE tmp;
+ tmp= gammp(alpha+1,right*beta) - gammp(alpha+1,left*beta);
+ tmp= (tmp*alpha/beta)*k;
+ return tmp;
+}
diff --git a/libs/phylogeny/gammaUtilities.h b/libs/phylogeny/gammaUtilities.h
new file mode 100644
index 0000000..e82ccd3
--- /dev/null
+++ b/libs/phylogeny/gammaUtilities.h
@@ -0,0 +1,48 @@
+// $Id: gammaUtilities.h 4191 2008-06-12 19:03:36Z cohenofi $
+
+ #ifndef ___GAMMA_UTILITIES
+#define ___GAMMA_UTILITIES
+
+#include "definitions.h"
+#include "numRec.h" //fot the ITMAX
+
+/******************************************************************************
+gamma utilities include calculating ln gamma and integral of gamma.
+used mainly in building the gamma function and creating categories within it
+******************************************************************************/
+
+//gammln(xx): computes the ln of the Gamma function
+//the Gamma function is the integral from 0 to infinity of (t^(xx-1)*e^(-t)) dt.
+MDOUBLE gammln(MDOUBLE xx);
+
+//gammp(a, x): computes the incomplete Gamma function which is:
+// 1/Gamma(a) * (the integral from 0 to x of (t^(a-1)*e^(-t)) dt)
+//gammp can be computed in two different ways: by a series representation (gser(..))
+//or by a continued fraction representation (gcf(..))
+//gammp chooses to function will be used, according to the values of a and x
+MDOUBLE gammp(MDOUBLE a, MDOUBLE x);
+void gser(MDOUBLE *gamser, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+void gcf(MDOUBLE *gammcf, MDOUBLE a, MDOUBLE x, MDOUBLE *gln);
+
+MDOUBLE search_for_z_in_dis_with_any_beta(MDOUBLE alpha,MDOUBLE beta, MDOUBLE ahoson);
+MDOUBLE search_for_z_in_dis_with_beta_1(MDOUBLE alpha, MDOUBLE ahoson);
+MDOUBLE the_avarage_r_in_category_between_a_and_b(MDOUBLE a, MDOUBLE b, MDOUBLE alpha, MDOUBLE beta, int k);
+
+//const int ITMAX = 100;
+const MDOUBLE EPS = static_cast<MDOUBLE>(0.0000003);
+const MDOUBLE FPMIN = static_cast<MDOUBLE>(1.0e-30);
+const MDOUBLE ERR_FOR_GAMMA_CALC = static_cast<MDOUBLE>(0.00001);
+const MDOUBLE MINIMUM_ALPHA_PARAM = static_cast<MDOUBLE>(0.05); //was 0.05
+const MDOUBLE MAXIMUM_ALPHA_PARAM = static_cast<MDOUBLE>(5.0);
+const MDOUBLE MINIMUM_BETA_PARAM = static_cast<MDOUBLE>(0.05); //was 0.05
+const MDOUBLE MAXIMUM_BETA_PARAM = static_cast<MDOUBLE>(5.0);
+
+
+
+//gammq(a, x) : computes 1 - the incomplete Gamma function (1-gammp(a,x)) which is:
+//1/Gamma(a) * (the integral from infinite to x of (t^(a-1)*e^(-t)) dt).
+//use for computing Chi-Square probability function (for the LRT):
+//chiSquareProb(df,chiSquare) = gammq(df/2.0,chiSquare/2.0)
+MDOUBLE gammq(MDOUBLE a, MDOUBLE x);
+
+#endif
diff --git a/libs/phylogeny/generalGammaDistribution.cpp b/libs/phylogeny/generalGammaDistribution.cpp
new file mode 100644
index 0000000..661d4de
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistribution.cpp
@@ -0,0 +1,115 @@
+// $Id: generalGammaDistribution.cpp 2768 2007-11-22 12:57:44Z osnatz $
+
+#include "generalGammaDistribution.h"
+#include "gammaUtilities.h"
+#include "errorMsg.h"
+#include "logFile.h"
+#include <cmath>
+
+
+generalGammaDistribution::generalGammaDistribution() :
+_alpha(0.0),
+_beta(0.0),
+_globalRate(1.0)
+{
+ _bonderi.resize(0,0);
+ _rates.resize(0,0);
+ _ratesProb.resize(0,0);
+}
+
+generalGammaDistribution::generalGammaDistribution(const generalGammaDistribution& other) :
+
+ _alpha(other._alpha),
+ _beta(other._beta),
+ _rates(other._rates),
+ _ratesProb(other._ratesProb),
+ _globalRate(other._globalRate),
+ _bonderi(other._bonderi)
+ {}
+
+
+generalGammaDistribution::generalGammaDistribution(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories) :
+ _globalRate(1.0)
+{
+ setGammaParameters(in_number_of_categories,alpha,beta);
+}
+
+void generalGammaDistribution::setAlpha(MDOUBLE in_alpha) {
+ if (in_alpha == _alpha)
+ return;
+ setGammaParameters(categories(), in_alpha, _beta);
+}
+
+void generalGammaDistribution::setBeta(MDOUBLE in_beta) {
+ if (in_beta == _beta)
+ return;
+ setGammaParameters( categories(), _alpha, in_beta);
+}
+
+void generalGammaDistribution::change_number_of_categories(int in_number_of_categories) {
+ if (in_number_of_categories == categories())
+ return;
+ setGammaParameters( in_number_of_categories, _alpha, _beta);
+}
+
+void generalGammaDistribution::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
+ if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
+ return;
+
+
+ if (in_alpha < MINIMUM_ALPHA_PARAM)
+ in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
+ if (in_beta < MINIMUM_ALPHA_PARAM)
+ in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
+
+ _alpha = in_alpha;
+ _beta = in_beta;
+ _rates.clear();
+ _rates.resize(in_number_of_categories);
+ _ratesProb.clear();
+ _ratesProb.resize(in_number_of_categories, 1.0/in_number_of_categories);
+ _bonderi.clear();
+ _bonderi.resize(in_number_of_categories+1);
+ if (in_number_of_categories==1) {
+ _rates[0] = 1.0;
+ return;
+ }
+ if (categories() > 1) {
+ fill_mean();
+ return ;
+ }
+
+}
+void generalGammaDistribution::fill_mean() {
+ fill_bonderi();
+ int i;
+ //for (i=0; i<=categories(); ++i) cout<<endl<<bonderi[i];
+ //LOG(5,<<"\n====== the r categories are =====\n");
+ for (i=0; i<categories(); ++i) {
+ _rates[i]=the_avarage_r_in_category_between_a_and_b(_bonderi[i], _bonderi[i+1], _alpha, _beta, categories());
+ //LOG(5,<<meanG[i]<<endl);
+ }
+ //LOG(5,<<endl<<alpha<<endl);
+ //return 0;
+}
+
+void generalGammaDistribution::fill_bonderi() {
+ int i;
+ for (i=1; i<categories(); ++i)
+ {
+ _bonderi[i]=search_for_z_in_dis_with_any_beta(_alpha, _beta,static_cast<MDOUBLE>(i)/categories());
+ }
+ _bonderi[0]=0;
+ _bonderi[i]=VERYBIG/10000.0;// this is becuase we multiply bondei[i] by alpha or beta, and
+ // by this manipulation we avoid overflows...;
+
+ //return 0;
+}
+
+
+const MDOUBLE generalGammaDistribution::getCumulativeProb(const MDOUBLE x) const
+{//
+ //since r~gamma(alpha, beta) then beta*r~ gamma(alpha,1)=gammp
+ //here we assume alpha=beta
+ return gammp(_alpha, x*_beta);
+}
diff --git a/libs/phylogeny/generalGammaDistribution.h b/libs/phylogeny/generalGammaDistribution.h
new file mode 100644
index 0000000..be9f80b
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistribution.h
@@ -0,0 +1,61 @@
+// $Id: generalGammaDistribution.h 3044 2007-12-18 15:54:50Z itaymay $
+
+#ifndef ___GENERAL_GAMMA_DIST
+#define ___GENERAL_GAMMA_DIST
+/************************************************************
+This distribution can take several forms depending on its free parameters alpha,beta
+(unalike gammaDist. alpha is not necessarily equal to beta).
+For an extensive exlpanation of this distribution
+see http://mathworld.wolfram.com/GammaDistribution.html
+************************************************************/
+#include "definitions.h"
+#include "distribution.h"
+
+enum quadratureType {QUANTILE, LAGUERRE};
+
+class generalGammaDistribution : public distribution {
+
+public:
+ explicit generalGammaDistribution();
+ explicit generalGammaDistribution(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories);
+ explicit generalGammaDistribution(const generalGammaDistribution& other);
+ virtual ~generalGammaDistribution() {}
+ virtual distribution* clone() const { return new generalGammaDistribution(*this); }
+
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+ virtual const int categories() const {return _rates.size();}
+ virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
+ virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
+
+ virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
+ virtual MDOUBLE getGlobalRate()const {return _globalRate;}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual void setAlpha(MDOUBLE newAlpha);
+ virtual MDOUBLE getAlpha() const {return _alpha;}
+ virtual void setBeta(MDOUBLE newBeta);
+ virtual MDOUBLE getBeta() const {return _beta;}
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual MDOUBLE getBorder(const int i) const {return _bonderi[i];} //return the ith border. Note: _bonderi[0] = 0, _bondery[categories()] = infinite
+
+ virtual Vdouble getBorders() const {return _bonderi;}
+ virtual Vdouble getRates() const {return _rates;}
+
+protected:
+ virtual void fill_mean();
+ virtual void fill_bonderi();
+
+
+protected:
+ MDOUBLE _alpha;
+ MDOUBLE _beta;
+
+ vector<MDOUBLE> _rates;
+ vector<MDOUBLE> _ratesProb;
+ MDOUBLE _globalRate;
+ vector<MDOUBLE> _bonderi; //Note: _bonderi[0] = 0, _bondery[categories()] = infinite
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/generalGammaDistributionFixedCategories.cpp b/libs/phylogeny/generalGammaDistributionFixedCategories.cpp
new file mode 100644
index 0000000..b292959
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionFixedCategories.cpp
@@ -0,0 +1,360 @@
+#include "generalGammaDistributionFixedCategories.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+
+
+generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta) :
+generalGammaDistribution()
+{
+ _alpha = alpha;
+ _beta = beta;
+ setFixedCategories(fixedBoundaries);
+}
+
+generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) :
+generalGammaDistribution()
+{
+ if ((fixedRates.size() + 1) != boundaries.size())
+ errorMsg::reportError("error in generalGammaDistributionFixedCategories constructor");
+ _alpha = alpha;
+ _beta = beta;
+ _rates = fixedRates;
+ _bonderi = boundaries;
+ computeRatesProbs();
+}
+
+
+
+generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum)
+: generalGammaDistribution()
+{
+ _alpha = alpha;
+ _beta = beta;
+ setDefaultBoundaries(catNum);
+}
+
+
+
+generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const generalGammaDistributionFixedCategories& other)
+: generalGammaDistribution(other)
+{}
+void generalGammaDistributionFixedCategories::change_number_of_categories(int in_number_of_categories)
+{
+ setDefaultBoundaries(in_number_of_categories);
+}
+
+
+void generalGammaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){
+
+ if (fixedBoundaries.size()<2)
+ errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required");
+ if (fixedBoundaries[0] > 0.0)
+ errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero");
+
+ _bonderi = fixedBoundaries;
+ if (_bonderi[_bonderi.size()] > VERYBIG/10000.0)
+ _bonderi[_bonderi.size()] = VERYBIG/10000.0; // to avoid overflow
+
+ setFixedCategories();
+}
+
+void generalGammaDistributionFixedCategories::setFixedCategories() {
+ fill_mean();
+ computeRatesProbs();
+}
+
+void generalGammaDistributionFixedCategories::fill_mean()
+{
+ int numOfCategories = _bonderi.size()-1;
+ if (numOfCategories == 0)
+ errorMsg::reportError("Error in gammaDistributionFixedCategories::fill_mean, fixed boundaries must be first initialized");
+ _rates.clear();
+ _rates.resize(numOfCategories,0.0);
+ int cat;
+ for (cat=0; cat<numOfCategories-1; ++cat) {
+ _rates[cat] = (_bonderi[cat]+_bonderi[cat+1])/2.0;
+ }
+ if (numOfCategories>1) {
+ //the rate of the last category cannot be the middle of its boundaries, since the upper bound is infinite
+ MDOUBLE increment = _bonderi[cat] - _rates[cat-1];
+ _rates[cat] = _bonderi[cat] + 2*increment;
+ } else {
+ _rates[0] = 1;
+ }
+}
+
+
+// this function is here to override the inherited function
+// note that the rates themselves and the boundaries do not change.
+// the number of categories cannot be changed, since fixed categories must be given before
+void generalGammaDistributionFixedCategories::setGammaParameters (int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
+ if (in_number_of_categories==1) {
+ _rates[0] = 1.0;
+ return;
+ }
+ if (in_number_of_categories != categories())
+ errorMsg::reportError("generalGammaDistributionFixedCategories::setGammaParameters: the number of categories cannot be changed, first call setFixedCategories");
+ if ((in_alpha == _alpha) && (in_beta == _beta))
+ return;
+
+ if (in_alpha < MINIMUM_ALPHA_PARAM)
+ in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflow problems
+ if (in_beta < MINIMUM_ALPHA_PARAM)
+ in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
+
+ _alpha = in_alpha;
+ _beta = in_beta;
+ computeRatesProbs();
+}
+
+void generalGammaDistributionFixedCategories::computeRatesProbs(){
+ MDOUBLE totalProb = 0.0;
+ MDOUBLE catProb = 0.0;
+ MDOUBLE lowerBoundaryProb = 0.0;
+ MDOUBLE upperBoundaryProb = 0.0;
+ int cat;
+ _ratesProb.clear();
+ _ratesProb.resize(categories());
+ for (cat = 0; cat < categories()-1; ++cat) {
+ upperBoundaryProb = getCumulativeProb(_bonderi[cat+1]);
+ catProb = upperBoundaryProb - lowerBoundaryProb;
+ _ratesProb[cat] = catProb;
+ totalProb += catProb;
+ lowerBoundaryProb = upperBoundaryProb;
+ }
+ _ratesProb[cat] = 1.0 - totalProb;
+}
+
+void generalGammaDistributionFixedCategories::setDefaultBoundaries(int catNum)
+{
+ _bonderi.clear();
+ _bonderi.resize(catNum+1,0.0);
+ _bonderi[0] = 0;
+ _bonderi[catNum] = VERYBIG/10000.0; //to avoid overflow
+ switch (catNum)
+ {
+ case 1:
+ break;
+ case 2:
+ _bonderi[1] = 1.0;
+ break;
+ case 3:
+ _bonderi[1] = 0.5;
+ _bonderi[2] = 1.0;
+ break;
+ case 4:
+ _bonderi[1] = 0.5;
+ _bonderi[2] = 1.0;
+ _bonderi[3] = 1.5;
+ break;
+ case 5:
+ _bonderi[1] = 0.4;
+ _bonderi[2] = 0.8;
+ _bonderi[3] = 1.2;
+ _bonderi[4] = 1.6;
+ break;
+ case 10:
+ _bonderi[1] = 0.01;
+ _bonderi[2] = 0.1;
+ _bonderi[3] = 0.25;
+ _bonderi[4] = 0.55;
+ _bonderi[5] = 0.95;
+ _bonderi[6] = 1.5;
+ _bonderi[7] = 3.0;
+ _bonderi[8] = 5.0;
+ _bonderi[9] = 7.0;
+ break;
+ case 16:
+ _bonderi[1] = 0.001;
+ _bonderi[2] = 0.01;
+ _bonderi[3] = 0.1;
+ _bonderi[4] = 0.15;
+ _bonderi[5] = 0.35;
+ _bonderi[6] = 0.55;
+ _bonderi[7] = 0.75;
+ _bonderi[8] = 0.95;
+ _bonderi[9] = 1.5;
+ _bonderi[10] = 3.0;
+ _bonderi[11] = 4.5;
+ _bonderi[12] = 6.0;
+ _bonderi[13] = 7.5;
+ _bonderi[14] = 9.0;
+ _bonderi[15] = 12.0;
+ break;
+ default:
+ errorMsg::reportError("error in generalGammaDistributionFixedCategories::setDefaultBoundaries");
+ }
+
+ setFixedCategories();
+}
+
+//void generalGammaDistributionFixedCategories::getDefaultRates(int catNum, Vdouble& fixedRates)
+//{
+// fixedRates.resize(catNum, 0.0);
+// switch (catNum)
+// {
+// case 1:
+// fixedRates[0] = 1.0;
+// break;
+// case 2:
+// fixedRates[0] = 0.5;
+// fixedRates[1] = 1.5;
+// break;
+// case 3:
+// fixedRates[0] = 0.05;
+// fixedRates[1] = 0.5;
+// fixedRates[2] = 1.5;
+// break;
+// case 5:
+// fixedRates[0] = 0.05;
+// fixedRates[1] = 0.3;
+// fixedRates[2] = 0.6;
+// fixedRates[3] = 1.5;
+// fixedRates[4] = 5.0;
+// break;
+// case 8:
+// fixedRates[0] = 0.05;
+// fixedRates[1] = 0.15;
+// fixedRates[2] = 0.35;
+// fixedRates[3] = 0.6;
+// fixedRates[4] = 0.85;
+// fixedRates[5] = 1.5;
+// fixedRates[6] = 3.0;
+// fixedRates[7] = 5.0;
+// break;
+// case 12:
+// fixedRates[0] = 0.05;
+// fixedRates[1] = 0.15;
+// fixedRates[2] = 0.35;
+// fixedRates[3] = 0.55;
+// fixedRates[4] = 0.75;
+// fixedRates[5] = 0.95;
+// fixedRates[6] = 1.5;
+// fixedRates[7] = 3.0;
+// fixedRates[8] = 4.5;
+// fixedRates[9] = 6.0;
+// fixedRates[10] = 7.5;
+// fixedRates[11] = 9.0;
+// break;
+// case 16:
+// fixedRates[0] = 0.00000001;
+// fixedRates[1] = 0.001;
+// fixedRates[2] = 0.01;
+// fixedRates[3] = 0.1;
+// fixedRates[4] = 0.15;
+// fixedRates[5] = 0.35;
+// fixedRates[6] = 0.55;
+// fixedRates[7] = 0.75;
+// fixedRates[8] = 0.95;
+// fixedRates[9] = 1.5;
+// fixedRates[10] = 3.0;
+// fixedRates[11] = 4.5;
+// fixedRates[12] = 6.0;
+// fixedRates[13] = 7.5;
+// fixedRates[14] = 9.0;
+// fixedRates[15] = 12.0;
+// break;
+// case 24:
+// fixedRates[0] = 0.000000000000001;
+// fixedRates[1] = 1;
+// fixedRates[2] = 2;
+// fixedRates[3] = 3;
+// fixedRates[4] = 4;
+// fixedRates[5] = 5;
+// fixedRates[6] = 6;
+// fixedRates[7] = 7;
+// fixedRates[8] = 8;
+// fixedRates[9] = 9;
+// fixedRates[10] = 10;
+// fixedRates[11] = 11;
+// fixedRates[12] = 12;
+// fixedRates[13] = 13;
+// fixedRates[14] = 14;
+// fixedRates[15] = 15;
+// fixedRates[16] = 16;
+// fixedRates[17] = 17;
+// fixedRates[18] = 18;
+// fixedRates[19] = 19;
+// fixedRates[20] = 20;
+// fixedRates[21] = 21;
+// fixedRates[22] = 22;
+// fixedRates[23] = 23;
+// break;
+// case 32:
+// fixedRates[0] = 0.00000001;
+// fixedRates[1] = 0.0000001;
+// fixedRates[2] = 0.000001;
+// fixedRates[3] = 0.00001;
+// fixedRates[4] = 0.0001;
+// fixedRates[5] = 0.001;
+// fixedRates[6] = 0.01;
+// fixedRates[7] = 0.1;
+// fixedRates[8] = 0.15;
+// fixedRates[9] = 0.2;
+// fixedRates[10] = 0.25;
+// fixedRates[11] = 0.3;
+// fixedRates[12] = 0.35;
+// fixedRates[13] = 0.4;
+// fixedRates[14] = 0.45;
+// fixedRates[15] = 0.5;
+// fixedRates[16] = 0.6;
+// fixedRates[17] = 0.7;
+// fixedRates[18] = 0.8;
+// fixedRates[19] = 0.9;
+// fixedRates[20] = 1.0;
+// fixedRates[21] = 1.2;
+// fixedRates[22] = 1.4;
+// fixedRates[23] = 1.6;
+// fixedRates[24] = 1.8;
+// fixedRates[25] = 2.0;
+// fixedRates[26] = 2.5;
+// fixedRates[27] = 3.0;
+// fixedRates[28] = 4.0;
+// fixedRates[29] = 5.0;
+// fixedRates[30] = 7.5;
+// fixedRates[31] = 15.0;
+// break;
+// case 36:
+// fixedRates[0] = 0.00000001;
+// fixedRates[1] = 0.0000001;
+// fixedRates[2] = 0.000001;
+// fixedRates[3] = 0.00001;
+// fixedRates[4] = 0.0001;
+// fixedRates[5] = 0.001;
+// fixedRates[6] = 0.01;
+// fixedRates[7] = 0.1;
+// fixedRates[8] = 0.15;
+// fixedRates[9] = 0.2;
+// fixedRates[10] = 0.25;
+// fixedRates[11] = 0.3;
+// fixedRates[12] = 0.35;
+// fixedRates[13] = 0.4;
+// fixedRates[14] = 0.45;
+// fixedRates[15] = 0.5;
+// fixedRates[16] = 0.6;
+// fixedRates[17] = 0.7;
+// fixedRates[18] = 0.8;
+// fixedRates[19] = 0.9;
+// fixedRates[20] = 1.0;
+// fixedRates[21] = 1.2;
+// fixedRates[22] = 1.4;
+// fixedRates[23] = 1.6;
+// fixedRates[24] = 1.8;
+// fixedRates[25] = 2.0;
+// fixedRates[26] = 2.5;
+// fixedRates[27] = 3.0;
+// fixedRates[28] = 4.0;
+// fixedRates[29] = 5.0;
+// fixedRates[30] = 7.5;
+// fixedRates[31] = 10.0;
+// fixedRates[32] = 12.5;
+// fixedRates[33] = 15.0;
+// fixedRates[34] = 20.0;
+// fixedRates[35] = 30.0;
+// break;
+//
+// default:
+// errorMsg::reportError("error in generalGammaDistributionFixedCategories::getFixedCategories");
+// }
+//
+//}
diff --git a/libs/phylogeny/generalGammaDistributionFixedCategories.h b/libs/phylogeny/generalGammaDistributionFixedCategories.h
new file mode 100644
index 0000000..71d90be
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionFixedCategories.h
@@ -0,0 +1,36 @@
+#ifndef ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES
+#define ___GENERAL_GAMMA_DIST_LAGUERRE_FIXED_CATEGORIES
+/************************************************************
+This class differ from the regular generalGammaDistribution in that
+the rateCategories are fixed according to the user's decision.
+Thus, only the probability of each category change for each specific alpha and beta values but
+the rate categories themselves are constant.
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistribution.h"
+#include "errorMsg.h"
+class generalGammaDistributionFixedCategories : public generalGammaDistribution {
+
+public:
+ explicit generalGammaDistributionFixedCategories(const Vdouble& fixedBoundaries, MDOUBLE alpha, MDOUBLE beta);
+ explicit generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta);
+ explicit generalGammaDistributionFixedCategories(MDOUBLE alpha, MDOUBLE beta, int catNum);
+ explicit generalGammaDistributionFixedCategories(const generalGammaDistributionFixedCategories& other);
+ virtual ~generalGammaDistributionFixedCategories() {}
+ virtual distribution* clone() const { return new generalGammaDistributionFixedCategories(*this); }
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+ virtual void setFixedCategories(const Vdouble& fixedBoundaries);
+
+protected:
+ virtual void setDefaultBoundaries(int catNum);
+ virtual void setFixedCategories();
+ virtual void fill_mean();
+ virtual void computeRatesProbs();
+
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/generalGammaDistributionLaguerre.cpp b/libs/phylogeny/generalGammaDistributionLaguerre.cpp
new file mode 100644
index 0000000..e804e86
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionLaguerre.cpp
@@ -0,0 +1,113 @@
+// $Id: generalGammaDistributionLaguerre.cpp 2865 2007-11-27 11:00:26Z itaymay $
+#include "generalGammaDistributionLaguerre.h"
+#include "gammaUtilities.h"
+#include "errorMsg.h"
+#include "GLaguer.h"
+#include <cmath>
+
+generalGammaDistributionLaguerre::generalGammaDistributionLaguerre()
+: generalGammaDistribution()
+{
+}
+
+generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other) :
+ generalGammaDistribution(other)
+{
+}
+
+generalGammaDistributionLaguerre::generalGammaDistributionLaguerre(MDOUBLE alpha,MDOUBLE beta,int in_number_of_categories)
+: generalGammaDistribution()
+{
+ //The Laguerre function returns NULL values for very large numebr of categories (for example 700 categories with alpha = 1.5 and beta = 1.3)
+// if (in_number_of_categories > 200)
+// errorMsg::reportError("generalGammaDistributionLaguerre cannot work with more than 200 categories");
+ _globalRate=1.0;
+ setGammaParameters(in_number_of_categories,alpha,beta);
+}
+
+generalGammaDistributionLaguerre::~generalGammaDistributionLaguerre()
+{
+}
+
+
+void generalGammaDistributionLaguerre::setGammaParameters(int in_number_of_categories, MDOUBLE in_alpha, MDOUBLE in_beta) {
+ if ((in_alpha == _alpha) && (in_beta == _beta) && (in_number_of_categories == categories()))
+ return;
+
+
+ if (in_alpha < MINIMUM_ALPHA_PARAM)
+ in_alpha = MINIMUM_ALPHA_PARAM;// when alpha is very small there are underflaw problems
+ if (in_beta < MINIMUM_ALPHA_PARAM)
+ in_beta = MINIMUM_ALPHA_PARAM;// when beta is very small there are underflaw problems
+
+ _alpha = in_alpha;
+ _beta = in_beta;
+ _rates.clear();
+ //_rates.resize(in_number_of_categories);
+ _rates.resize(0);
+ _ratesProb.clear();
+ //_ratesProb.resize(in_number_of_categories);
+ _ratesProb.resize(0);
+ if (in_number_of_categories==1) {
+ _rates.push_back(1.0);
+ _ratesProb.push_back(1.0);
+ return;
+ }
+ if (in_number_of_categories > 1) {
+ fillRatesAndProbs(in_number_of_categories);
+ return ;
+ }
+
+}
+
+
+MDOUBLE generalGammaDistributionLaguerre::getBorder(const int i) const
+{
+ errorMsg::reportError("With the Laguerre method the categories do not have a well defined border");
+ return -1;
+}
+
+
+void generalGammaDistributionLaguerre::fillRatesAndProbs(int catNum)
+{
+ Vdouble weights, abscissas;
+ GLaguer lg(catNum, _alpha - 1, abscissas, weights);
+ MDOUBLE sumP = 0.0;
+
+ MDOUBLE gamAlpha = exp(gammln(_alpha));
+ for (int i = 0; i < catNum; ++i)
+ {
+ //if (sumP > 0.99)
+ //{
+ // _ratesProb.push_back(1-sumP);
+ // _rates.push_back(abscissas[i] / _beta);
+ // break;
+ //}
+
+ _ratesProb.push_back(weights[i] / gamAlpha);
+ _rates.push_back(abscissas[i] / _beta);
+ sumP += _ratesProb[i];
+ //cerr<<i<<" rate = "<<_rates[i]<<" Pr = "<<_ratesProb[i]<<" sum = "<<sumP<<endl;
+ }
+ for (int j = 0; j < _ratesProb.size(); ++j)
+ {
+ _ratesProb[j] /= sumP;
+ }
+}
+
+
+/*
+void generalGammaDistributionLaguerre::fillRatesAndProbs(int catNum)
+{
+ Vdouble weights, abscissas;
+ GLaguer lg(categories(), _alpha - 1, abscissas, weights);
+
+ MDOUBLE gamAlpha = exp(gammln(_alpha));
+ for (int i = 0; i < categories(); ++i)
+ {
+ _ratesProb[i] = weights[i] / gamAlpha;
+ _rates[i] = abscissas[i] / _beta;
+ }
+}
+*/
+
diff --git a/libs/phylogeny/generalGammaDistributionLaguerre.h b/libs/phylogeny/generalGammaDistributionLaguerre.h
new file mode 100644
index 0000000..f39307c
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionLaguerre.h
@@ -0,0 +1,47 @@
+// $Id: generalGammaDistributionLaguerre.h 2865 2007-11-27 11:00:26Z itaymay $
+// version 1.00
+// last modified Sep 2004
+
+#ifndef ___GENERAL_GAMMA_DIST_LAGUERRE
+#define ___GENERAL_GAMMA_DIST_LAGUERRE
+/************************************************************
+This class differ from the regular generalGammaDistribution in that
+the rateCategories and their probabilities are not constructed using Yang's quantile method.
+Instead the general Guass-Laguerre quadrature method is used.
+For example, if we want to compute the likelihood over the rate distribution,
+then we need to solve the integral
+
+I[0_to_infinity]{P(data|r)*P(r)}
+ = I[0_to_infinity]{P(data|r)*b^a / Gamma(a)* exp(-b*r) * r^(a-1)dr} //a = alpha, b = beta
+ = b^(a)/Gamma(a) * I[0_to_infinity]{P(data|m/b) * exp(-m) * (m/b)^(a')/bdm} ///substitute m=b*r, a'=a-1
+ = 1/Gamma(a) * I[0_to_infinity]{P(data|m/b) * exp(-m) * m^a' dm} //
+Now - we can use the Guass-Laguerre formula, to get an approximation for the Integral.
+The Xj and Wj are the absicassas and weights of the Laguerre polynoms
+ = 1/Gamma(a) * sum[j = 0_to_catNum]{P(data|Xj/b) * Wj}
+
+The rates are the Xj/b and their priors is Wj/Gamma(a)
+The quadrature method is explained in Numerical Recipes (Press et al.; chapter 4.5)
+and is also mentioned in Felsenstein 2001 (JME 53: 447-455).
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistribution.h"
+class generalGammaDistributionLaguerre : public generalGammaDistribution {
+
+public:
+ explicit generalGammaDistributionLaguerre();
+ explicit generalGammaDistributionLaguerre(MDOUBLE alpha, MDOUBLE beta, int in_number_of_categories);
+ explicit generalGammaDistributionLaguerre(const generalGammaDistributionLaguerre& other);
+ virtual ~generalGammaDistributionLaguerre();
+ virtual void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta);
+
+ virtual distribution* clone() const { return new generalGammaDistributionLaguerre(*this); }
+ virtual MDOUBLE getBorder(const int i) const;
+
+protected:
+ virtual void fillRatesAndProbs(int catNum);
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/generalGammaDistributionPlusInvariant.cpp b/libs/phylogeny/generalGammaDistributionPlusInvariant.cpp
new file mode 100644
index 0000000..de3842a
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionPlusInvariant.cpp
@@ -0,0 +1,13 @@
+#include "generalGammaDistributionPlusInvariant.h"
+
+
+
+
+//#define RATE_INVARIANT 1e-8 //1e-10
+
+
+
+
+
+
+
diff --git a/libs/phylogeny/generalGammaDistributionPlusInvariant.h b/libs/phylogeny/generalGammaDistributionPlusInvariant.h
new file mode 100644
index 0000000..ab0c3e7
--- /dev/null
+++ b/libs/phylogeny/generalGammaDistributionPlusInvariant.h
@@ -0,0 +1,51 @@
+#ifndef __GENERAL_GAMMA_DIST_PLUSINV
+#define __GENERAL_GAMMA_DIST_PLUSINV
+/************************************************************
+This class describes a combination of a predefined dsitrubtion ,
+with an additional invariant category of probability _Pinv
+This category is always the last rate category (i.e., rate(categories()) == 0)
+************************************************************/
+#include "definitions.h"
+#include "distributionPlusInvariant.h"
+#include "distribution.h"
+#include "generalGammaDistribution.h"
+#include "errorMsg.h"
+#include "gammaUtilities.h"
+#include "logFile.h"
+#include <cmath>
+
+
+
+class generalGammaDistributionPlusInvariant : public distributionPlusInvariant {
+public:
+ explicit generalGammaDistributionPlusInvariant(distribution* pDist, const MDOUBLE pInv, const MDOUBLE globalRate=1, MDOUBLE rateInvariantVal=1e-10): distributionPlusInvariant(pDist,pInv,globalRate,rateInvariantVal){}
+ explicit generalGammaDistributionPlusInvariant();
+ generalGammaDistributionPlusInvariant(const generalGammaDistributionPlusInvariant& other) {(*this) = other;}
+ //virtual generalGammaDistributionPlusInvariant& operator=(const generalGammaDistributionPlusInvariant& other);
+ generalGammaDistributionPlusInvariant* clone() const {return new generalGammaDistributionPlusInvariant(*this);}
+ virtual ~generalGammaDistributionPlusInvariant(){}
+
+// distribution* getBaseDistribution(){return _pBaseDist;}
+////get/set the parameters of the mixture
+// const int categories() const;
+// void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
+// MDOUBLE getGlobalRate() const {return _globalRate;}
+// virtual void setInvProb(const MDOUBLE p) {_Pinv = p;}
+// const MDOUBLE getInvProb() const {return _Pinv;}
+//
+////get distribution statistics
+// virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+// virtual const MDOUBLE rates(const int category) const;
+// virtual const MDOUBLE ratesProb(const int i) const;
+
+// get generalGammaDistribution params
+ virtual void setAlpha(MDOUBLE newAlpha) {return static_cast<generalGammaDistribution*>(_pBaseDist)->setAlpha(newAlpha);};
+ virtual MDOUBLE getAlpha() const {return static_cast<generalGammaDistribution*>(_pBaseDist)->getAlpha();}
+ virtual void setBeta(MDOUBLE newBeta) {return static_cast<generalGammaDistribution*>(_pBaseDist)->setBeta(newBeta);};
+ virtual MDOUBLE getBeta() const {return static_cast<generalGammaDistribution*>(_pBaseDist)->getBeta();}
+//protected:
+ //MDOUBLE _globalRate;
+ //MDOUBLE _Pinv;
+ //distribution* _pBaseDist;
+};
+#endif
diff --git a/libs/phylogeny/geneticCodeHolder.cpp b/libs/phylogeny/geneticCodeHolder.cpp
new file mode 100644
index 0000000..ee6ca0e
--- /dev/null
+++ b/libs/phylogeny/geneticCodeHolder.cpp
@@ -0,0 +1,49 @@
+// $Id: geneticCodeHolder.cpp 962 2006-11-07 15:13:34Z privmane $
+
+
+#include "geneticCodeHolder.h"
+
+const geneticCodeString geneticCodeHolder::nuclearStandard(
+#include "replacementMatrixSource/nuclearStandard.code"
+);
+
+const geneticCodeString geneticCodeHolder::nuclearEuplotid(
+#include "replacementMatrixSource/nuclearEuplotid.code"
+);
+
+const geneticCodeString geneticCodeHolder::nuclearCiliate(
+#include "replacementMatrixSource/nuclearCiliate.code"
+);
+
+const geneticCodeString geneticCodeHolder::nuclearBlepharisma(
+#include "replacementMatrixSource/nuclearBlepharisma.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaYeast(
+#include "replacementMatrixSource/mitochondriaYeast.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaVertebrate(
+#include "replacementMatrixSource/mitochondriaVertebrate.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaProtozoan(
+#include "replacementMatrixSource/mitochondriaProtozoan.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaInvertebrate(
+#include "replacementMatrixSource/mitochondriaInvertebrate.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaFlatworm(
+#include "replacementMatrixSource/mitochondriaFlatworm.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaEchinoderm(
+#include "replacementMatrixSource/mitochondriaEchinoderm.code"
+);
+
+const geneticCodeString geneticCodeHolder::mitochondriaAscidian(
+#include "replacementMatrixSource/mitochondriaAscidian.code"
+);
+
diff --git a/libs/phylogeny/geneticCodeHolder.h b/libs/phylogeny/geneticCodeHolder.h
new file mode 100644
index 0000000..0de8a4d
--- /dev/null
+++ b/libs/phylogeny/geneticCodeHolder.h
@@ -0,0 +1,33 @@
+// $Id: geneticCodeHolder.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___GENMATRIXHOLDER
+#define ___GENMATRIXHOLDER
+
+#include <string>
+using namespace std;
+
+// THIS CONSTRUCT IS USED TO KEEP A STRING THAT IS THE AA SUBSTITUTION MATRIX
+// THE datMatrixString IS TO BE USED WHENEVER WE USE ONE OF THE BUILD-IN AA SUBSTITUTION MATRICES.
+
+class geneticCodeString {
+public:
+ const string Val;
+ explicit geneticCodeString(const char * str): Val(str){};
+};
+
+class geneticCodeHolder {
+public:
+ static const geneticCodeString nuclearStandard;
+ static const geneticCodeString nuclearEuplotid;
+ static const geneticCodeString nuclearCiliate;
+ static const geneticCodeString nuclearBlepharisma;
+ static const geneticCodeString mitochondriaYeast;
+ static const geneticCodeString mitochondriaVertebrate;
+ static const geneticCodeString mitochondriaProtozoan;
+ static const geneticCodeString mitochondriaInvertebrate;
+ static const geneticCodeString mitochondriaFlatworm;
+ static const geneticCodeString mitochondriaEchinoderm;
+ static const geneticCodeString mitochondriaAscidian;
+};
+
+#endif // ___GENMATRIXHOLDER
diff --git a/libs/phylogeny/getRandomWeights.cpp b/libs/phylogeny/getRandomWeights.cpp
new file mode 100644
index 0000000..93eaef3
--- /dev/null
+++ b/libs/phylogeny/getRandomWeights.cpp
@@ -0,0 +1,53 @@
+// $Id: getRandomWeights.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "getRandomWeights.h"
+#include "talRandom.h"
+
+
+
+void swapRand(Vdouble& weights) {
+ int j;
+ int i = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
+ do {
+ j = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
+ } while ( weights[j] <= 0 );
+
+ weights[i]++;
+ weights[j]--;
+}
+
+void getRandomWeights::randomWeights(Vdouble& weights,
+ const MDOUBLE expectedNumberOfSwapsPerPosition) {
+ // note that some positions will change more than once, and some won't.
+ // thus the second argument is an average of sites swaped
+ int i;
+ const double DefaultWeight = 1;
+ for (i=0; i< weights.size(); ++i) weights[i] = DefaultWeight;
+
+ for ( i = 0 ; i < expectedNumberOfSwapsPerPosition*weights.size() ; ++i ) {
+ swapRand(weights);
+ }
+}
+
+void getRandomWeights::standardBPWeights(Vdouble& weights) {
+ int i;
+ for (i=0; i< weights.size(); ++i) weights[i] = 0.0;
+ for (i=0; i< weights.size(); ++i) {
+ int k = talRandom::giveIntRandomNumberBetweenZeroAndEntry(weights.size());
+ weights[k]++;
+ }
+}
+
+#define MIN_WEIGHT (0.00001)
+void getRandomWeights::randomWeightsGamma(Vdouble& weights,
+ const MDOUBLE temperature) {
+ int i;
+ const double oneOverT = 1.0/temperature;
+ for (i=0; i< weights.size(); ++i) {
+ weights[i] = talRandom::SampleGamma(oneOverT,oneOverT);
+ if (weights[i]<MIN_WEIGHT) {
+ weights[i] = MIN_WEIGHT;
+ }
+ }
+}
+
diff --git a/libs/phylogeny/getRandomWeights.h b/libs/phylogeny/getRandomWeights.h
new file mode 100644
index 0000000..a9c420b
--- /dev/null
+++ b/libs/phylogeny/getRandomWeights.h
@@ -0,0 +1,31 @@
+// $Id: getRandomWeights.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef __GET_RANDOM_WEIGHTS
+#define __GET_RANDOM_WEIGHTS
+
+#include "definitions.h"
+
+
+class getRandomWeights {
+public:
+ // this function starts with a vector of weights like that (1,1,1,1,1,1,...1)
+ // it then take two positions by random
+ // add 1 to the first, and substract 1 from the second.
+ // if it can not substract 1 from the second, it draw a new "second"
+ static void randomWeights(Vdouble& weights,
+ const MDOUBLE expectedNumberOfSwapsPerPosition);
+
+ // a position is chosen randomly and the weight of this position is
+ // sampled from a gamma distribution with parameters alpha = 1/temperature
+ // and beta = 1/temperature.
+ static void randomWeightsGamma(Vdouble& weights,
+ const MDOUBLE temperature);
+
+ // this function starts with a vector of weights like that (0,0,0,...,0)
+ // a position is chosen randomly and the weight of this position
+ // is increased by 1. This process is repeated weights.size() times.
+ static void standardBPWeights(Vdouble& weights);
+};
+
+#endif
+
diff --git a/libs/phylogeny/givenRatesMLDistance.cpp b/libs/phylogeny/givenRatesMLDistance.cpp
new file mode 100644
index 0000000..95b84c4
--- /dev/null
+++ b/libs/phylogeny/givenRatesMLDistance.cpp
@@ -0,0 +1,139 @@
+// $Id: givenRatesMLDistance.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "givenRatesMLDistance.h"
+#include "numRec.h"
+
+class C_eval_likelihoodOfDistanceGivenRates{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const Vdouble& _rates;
+ const Vdouble* _weights;
+
+public:
+ C_eval_likelihoodOfDistanceGivenRates(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const Vdouble& rates,
+ const Vdouble * weights)
+ : _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
+ {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where _s1 = ?, _s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ } else {
+ MDOUBLE rate = _rates[pos];
+ MDOUBLE pij= 0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
+ // the simple case, where AA i is changing to AA j
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ posLikelihood += pij * _sp.freq(_s1[pos]);
+ } else {// this is the most complicated case, when you have
+ // combinations of letters, for example B in one
+ // sequence and ? in the other.
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ posLikelihood += _sp.freq(iS1)*_sp.Pij_t(iS1,iS2,dist*rate);
+ }
+ }
+ }
+ }
+ }
+ assert(posLikelihood>0.0);
+ sumL += log(posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
+ }
+ return -sumL;
+ };
+};
+
+class C_eval_likelihoodOfDistanceGivenRates_d{ // derivative.
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const Vdouble& _rates;
+ const Vdouble* _weights;
+
+public:
+ C_eval_likelihoodOfDistanceGivenRates_d(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const Vdouble& rates,
+ const Vdouble * weights)
+ : _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
+ {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ posLikelihood_d = 0.0;
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where _s1 = ?, _s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ posLikelihood_d =0.0;
+ } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ posLikelihood_d =0.0;
+ } else {
+ MDOUBLE rate = _rates[pos];
+ MDOUBLE pij= 0.0;
+ MDOUBLE dpij=0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
+ // the simple case, where AA i is changing to AA j
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate;
+ MDOUBLE tmp = _sp.freq(_s1[pos]);
+ posLikelihood += pij *tmp;
+ posLikelihood_d += dpij*tmp;
+ } else {// this is the most complicated case, when you have
+ // combinations of letters, for example B in one
+ // sequence and ? in the other.
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ MDOUBLE exp = _sp.freq(iS1);
+ posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
+ posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate;
+ }
+ }
+ }
+ }
+ }
+ assert(posLikelihood>0.0);
+ sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
+ }
+ return -sumL;
+ };
+};
+
+const MDOUBLE givenRatesMLDistance::giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const
+{
+ const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance;
+ MDOUBLE dist=-1.0;
+ MDOUBLE resL = -dbrent(ax,bx,cx,
+ C_eval_likelihoodOfDistanceGivenRates(_sp,s1,s2,_rates,weights),
+ C_eval_likelihoodOfDistanceGivenRates_d(_sp,s1,s2,_rates,weights),
+ _toll,
+ &dist);
+ if (score) *score = resL;
+ return dist;
+};
diff --git a/libs/phylogeny/givenRatesMLDistance.h b/libs/phylogeny/givenRatesMLDistance.h
new file mode 100644
index 0000000..23e59ef
--- /dev/null
+++ b/libs/phylogeny/givenRatesMLDistance.h
@@ -0,0 +1,61 @@
+// $Id: givenRatesMLDistance.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___GIVEN_RATES_ML_DISTANCE_H
+#define ___GIVEN_RATES_ML_DISTANCE_H
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "likeDist.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+#include <cmath>
+using namespace std;
+
+class givenRatesMLDistance : public likeDist {
+public:
+ explicit givenRatesMLDistance(const stochasticProcess& sp,
+ const Vdouble& rates,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0
+ )
+ : likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {}
+
+ explicit givenRatesMLDistance(stochasticProcess& sp,
+ const Vdouble& rates,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0
+ )
+ : likeDist(sp, toll,maxPairwiseDistance),_rates(rates) {}
+
+ explicit givenRatesMLDistance(const stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0
+ )
+ : likeDist(sp, toll,maxPairwiseDistance),_rates(0) {}
+
+ explicit givenRatesMLDistance(stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0
+ )
+ : likeDist(sp, toll,maxPairwiseDistance),_rates(0) {}
+
+ givenRatesMLDistance(const givenRatesMLDistance& other):
+ likeDist(static_cast<likeDist>(other)), _rates(other._rates) {}
+
+ virtual givenRatesMLDistance* clone() const {return new givenRatesMLDistance(*this);}
+
+ void setRates(const Vdouble &rates) {_rates = rates;}
+
+ // Returns the estimated ML distance between the 2 sequences.
+ // if score is given, it will be assigned the log-likelihood.
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+
+private:
+ Vdouble _rates;
+};
+
+#endif
+
diff --git a/libs/phylogeny/goldmanYangModel.cpp b/libs/phylogeny/goldmanYangModel.cpp
new file mode 100644
index 0000000..411a898
--- /dev/null
+++ b/libs/phylogeny/goldmanYangModel.cpp
@@ -0,0 +1,144 @@
+// $Id: goldmanYangModel.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "goldmanYangModel.h"
+#include "codon.h"
+#include "readDatMatrix.h" // for the normalizeQ function.
+
+
+goldmanYangModel::goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const bool globalV):
+ _v(inV),_k(inK),_globalV(_globalV),_codonAlph(inCodonAlph){
+ homogenousFreq();
+ _Q.resize(_codonAlph.size());
+ for (int z=0; z < _Q.size();++z) _Q[z].resize(_codonAlph.size(),0);
+ updateQ();
+
+}
+
+
+goldmanYangModel::goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK, codon & inCodonAlph,const Vdouble& freq,const bool globalV):
+ _freq(freq),_v(inV),_k(inK),_globalV(_globalV),_codonAlph(inCodonAlph){
+ _Q.resize(_codonAlph.size());
+ for (int z=0; z < _Q.size();++z) _Q[z].resize(_codonAlph.size(),0);
+ updateQ();
+}
+
+
+void goldmanYangModel::updateQ() {
+
+ // building q.
+ int i,j;
+ MDOUBLE sum=0.0;
+ MDOUBLE epsilon=0.00000001;//0.00000000001;
+ MDOUBLE factor = 1000.0;
+ for (i=0; i < _Q.size();++i) {
+ sum=0;
+ for (j=0; j < _Q.size();++j) {
+ if (j==i) continue; //same codon
+ if (codonUtility::codonDiff(i,j,_codonAlph) == codonUtility::tr) {
+ _Q[i][j] = _k*exp(-(1/factor)*_gcd.getGranthamDistance(codonUtility::aaOf(i,_codonAlph),codonUtility::aaOf(j,_codonAlph))*_v);
+ if (_Q[i][j]<epsilon) _Q[i][j] = epsilon;
+ }else if (codonUtility::codonDiff(i,j,_codonAlph) == codonUtility::tv) {
+ _Q[i][j] = exp(-(1/factor)*_gcd.getGranthamDistance(codonUtility::aaOf(i,_codonAlph),codonUtility::aaOf(j,_codonAlph))*_v);
+ if (_Q[i][j]<epsilon) _Q[i][j] = epsilon;
+ }
+ else _Q[i][j] = 0;//more than one substitution.
+
+ _Q[i][j]*=_freq[j];
+ sum += _Q[i][j];
+
+ }
+ _Q[i][i]=-sum;
+ }
+
+
+ // check:
+/* LOG(5,<<"\n\n\n ===================================== \n");
+ int a1,a2;
+ for (a1=0;a1<4;++a1){
+ for (a2=0;a2<4;++a2){
+ LOG(5,<<qMatrix[a1][a2]<<"\t");
+ }
+ LOG(5,<<endl);
+ }
+*/
+
+
+ if (_globalV == true)
+ normalizeQ(_Q,_freq);
+
+ // check:
+/* LOG(5,<<"\n\n\n ===================================== \n");
+ for (a1=0;a1<4;++a1){
+ for ( a2=0;a2<4;++a2){
+ LOG(5,<<qMatrix[a1][a2]<<"\t");
+ }
+ LOG(5,<<endl);
+ }
+*/
+
+
+ // updating _q2Pt;
+// _Q = qMatrix;
+ _q2pt.fillFromRateMatrix(_freq,_Q);
+
+
+
+}
+
+
+
+// original with V and not 1/V
+/*
+void goldmanYangModel::updateQ() {
+ // building q.
+ VVdouble qMatrix(_codonAlph.size());
+ int i,j,z;
+ MDOUBLE sum=0.0;
+ for (z=0; z < qMatrix.size();++z) qMatrix[z].resize(_codonAlph.size(),0);
+ for (i=0; i < qMatrix.size();++i) {
+ sum=0;
+ for (j=0; j < qMatrix.size();++j) {
+ if (j==i) continue;
+ if (codonUtility::codonDiff(i,j) == codonUtility::different) {
+ qMatrix[i][j] =0;
+ } else if (codonUtility::codonDiff(i,j) == codonUtility::transition) {
+ qMatrix[i][j] =_k*exp(-_gcd.getGranthamDistance(codonUtility::aaOf(i),codonUtility::aaOf(j))/_v);
+ } else if (codonUtility::codonDiff(i,j) == codonUtility::transversion) {
+ qMatrix[i][j] = exp(-_gcd.getGranthamDistance(codonUtility::aaOf(i),codonUtility::aaOf(j))/_v);
+ }
+ qMatrix[i][j]*=_freq[j];
+ sum += qMatrix[i][j];
+ }
+ qMatrix[i][i]=-sum;
+ }
+ // check:
+ //LOG(5,<<"\n\n\n ===================================== \n");
+ //int a1,a2;
+ //for (a1=0;a1<4;++a1){
+ // for (a2=0;a2<4;++a2){
+ // LOG(5,<<qMatrix[a1][a2]<<"\t");
+ // }
+ // LOG(5,<<endl);
+ //}
+
+ if (_globalV == true)
+ normalizeQ(qMatrix,_freq);
+
+ //LOG(5,<<"\n\n\n ===================================== \n");
+ //LOG(5,<<endl<<endl);
+ //for (a1=0;a1<4;++a1){
+ // for (a2=0;a2<4;++a2){
+ // LOG(5,<<qMatrix[a1][a2]<<"\t");
+ // }
+ // LOG(5,<<endl);
+ //}
+
+ // updating _q2Pt;
+ _Q = qMatrix;
+ _q2pt.fillFromRateMatrix(_freq,qMatrix);
+}
+
+
+*/
+
+
diff --git a/libs/phylogeny/goldmanYangModel.h b/libs/phylogeny/goldmanYangModel.h
new file mode 100644
index 0000000..96a5e92
--- /dev/null
+++ b/libs/phylogeny/goldmanYangModel.h
@@ -0,0 +1,56 @@
+// $Id: goldmanYangModel.h 1841 2007-03-11 15:19:14Z adist $
+
+#ifndef ___GOLDMAN_YANG_MODEL
+#define ___GOLDMAN_YANG_MODEL
+
+#include "definitions.h"
+#include "replacementModel.h"
+#include "fromQtoPt.h"
+#include "granthamChemicalDistances.h"
+#include "codon.h"
+
+class goldmanYangModel : public replacementModel {
+public:
+ explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const bool globalV=true);
+ explicit goldmanYangModel(const MDOUBLE inV, const MDOUBLE inK,codon & inCodonAlph, const Vdouble& freq,const bool globalV=true);
+ virtual replacementModel* clone() const { return new goldmanYangModel(*this); }
+ const int alphabetSize() const {return _codonAlph.size();}
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+ return _q2pt.Pij_t(i,j,d);
+ }
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.dPij_dt(i,j,d);
+ }
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.d2Pij_dt2(i,j,d);
+ }
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ void setK(const MDOUBLE newK) { _k = newK;updateQ();}
+ void setV(const MDOUBLE newV) { _v = newV;updateQ();}
+ void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(_codonAlph.size(),1.0/_codonAlph.size());}
+
+ MDOUBLE getK() {return _k;}
+ MDOUBLE getV() {return _v;}
+
+ void setGlobalV(const bool globalV){ _globalV=globalV;}
+ const granthamChemicalDistances& getGCD(){return _gcd;}
+ MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];}
+
+ VVdouble getQ() const { return _Q;}
+ Vdouble getFreqs() const {return _freq;}
+
+private:
+ Vdouble _freq;
+ MDOUBLE _v; //selection factor.
+ MDOUBLE _k; // Tr/Tv ratio.
+ void updateQ();
+ q2pt _q2pt;
+ granthamChemicalDistances _gcd;
+ bool _globalV; //false when compute v per site
+ VVdouble _Q;
+ codon & _codonAlph;
+
+};
+
+
+#endif
diff --git a/libs/phylogeny/granthamChemicalDistances.cpp b/libs/phylogeny/granthamChemicalDistances.cpp
new file mode 100644
index 0000000..84891a3
--- /dev/null
+++ b/libs/phylogeny/granthamChemicalDistances.cpp
@@ -0,0 +1,187 @@
+// $Id: granthamChemicalDistances.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "granthamChemicalDistances.h"
+#include <cmath>
+
+granthamChemicalDistances::granthamChemicalDistances() {
+ for (int i=0; i<20;++i) GranChemDist[i][i]=0;
+ GranChemDist[0][1]=112; GranChemDist[0][2]=111; GranChemDist[0][3]=126; GranChemDist[0][4]=195; GranChemDist[0][5]=91; GranChemDist[0][6]=107;
+ GranChemDist[0][7]=60; GranChemDist[0][8]=86; GranChemDist[0][9]=94; GranChemDist[0][10]=96; GranChemDist[0][11]=106; GranChemDist[0][12]=84;
+ GranChemDist[0][13]=113; GranChemDist[0][14]=27; GranChemDist[0][15]=99; GranChemDist[0][16]=58; GranChemDist[0][17]=148; GranChemDist[0][18]=112;
+ GranChemDist[0][19]=64;
+
+ GranChemDist[1][2]=86; GranChemDist[1][3]=96; GranChemDist[1][4]=180; GranChemDist[1][5]=43; GranChemDist[1][6]=54; GranChemDist[1][7]=125;
+ GranChemDist[1][8]=29; GranChemDist[1][9]=97; GranChemDist[1][10]=102; GranChemDist[1][11]=26; GranChemDist[1][12]=91; GranChemDist[1][13]=97;
+ GranChemDist[1][14]=103; GranChemDist[1][15]=110; GranChemDist[1][16]=71; GranChemDist[1][17]=101; GranChemDist[1][18]=77; GranChemDist[1][19]=96;
+
+ GranChemDist[2][3]=23; GranChemDist[2][4]=139; GranChemDist[2][5]=46; GranChemDist[2][6]=42; GranChemDist[2][7]=80; GranChemDist[2][8]=68;
+ GranChemDist[2][9]=149; GranChemDist[2][10]=153; GranChemDist[2][11]=94; GranChemDist[2][12]=142; GranChemDist[2][13]=158; GranChemDist[2][14]=91;
+ GranChemDist[2][15]=46; GranChemDist[2][16]=65; GranChemDist[2][17]=174; GranChemDist[2][18]=143; GranChemDist[2][19]=133;
+
+ GranChemDist[3][4]=154; GranChemDist[3][5]=61; GranChemDist[3][6]=45; GranChemDist[3][7]=94; GranChemDist[3][8]=81;
+ GranChemDist[3][9]=168; GranChemDist[3][10]=172; GranChemDist[3][11]=101; GranChemDist[3][12]=160; GranChemDist[3][13]=177; GranChemDist[3][14]=108;
+ GranChemDist[3][15]=65; GranChemDist[3][16]=85; GranChemDist[3][17]=181; GranChemDist[3][18]=160; GranChemDist[3][19]=152;
+
+ GranChemDist[4][5]=154; GranChemDist[4][6]=170; GranChemDist[4][7]=159; GranChemDist[4][8]=174;
+ GranChemDist[4][9]=198; GranChemDist[4][10]=198; GranChemDist[4][11]=202; GranChemDist[4][12]=196; GranChemDist[4][13]=205; GranChemDist[4][14]=169;
+ GranChemDist[4][15]=112; GranChemDist[4][16]=149; GranChemDist[4][17]=215; GranChemDist[4][18]=194; GranChemDist[4][19]=192;
+
+ GranChemDist[5][6]=29; GranChemDist[5][7]=87; GranChemDist[5][8]=24;
+ GranChemDist[5][9]=109; GranChemDist[5][10]=113; GranChemDist[5][11]=53; GranChemDist[5][12]=101; GranChemDist[5][13]=116; GranChemDist[5][14]=76;
+ GranChemDist[5][15]=68; GranChemDist[5][16]=42; GranChemDist[5][17]=130; GranChemDist[5][18]=99; GranChemDist[5][19]=96;
+
+ GranChemDist[6][7]=98; GranChemDist[6][8]=40;
+ GranChemDist[6][9]=134; GranChemDist[6][10]=138; GranChemDist[6][11]=56; GranChemDist[6][12]=126; GranChemDist[6][13]=140; GranChemDist[6][14]=93;
+ GranChemDist[6][15]=80; GranChemDist[6][16]=65; GranChemDist[6][17]=152; GranChemDist[6][18]=122; GranChemDist[6][19]=121;
+
+ GranChemDist[7][8]=89;
+ GranChemDist[7][9]=135; GranChemDist[7][10]=138; GranChemDist[7][11]=127; GranChemDist[7][12]=127; GranChemDist[7][13]=153; GranChemDist[7][14]=42;
+ GranChemDist[7][15]=56; GranChemDist[7][16]=59; GranChemDist[7][17]=184; GranChemDist[7][18]=147; GranChemDist[7][19]=109;
+
+ GranChemDist[8][9]=94; GranChemDist[8][10]=99; GranChemDist[8][11]=32; GranChemDist[8][12]=87; GranChemDist[8][13]=100; GranChemDist[8][14]=77;
+ GranChemDist[8][15]=89; GranChemDist[8][16]=47; GranChemDist[8][17]=115; GranChemDist[8][18]=83; GranChemDist[8][19]=84;
+
+ GranChemDist[9][10]=5; GranChemDist[9][11]=102; GranChemDist[9][12]=10; GranChemDist[9][13]=21; GranChemDist[9][14]=95;
+ GranChemDist[9][15]=142; GranChemDist[9][16]=89; GranChemDist[9][17]=61; GranChemDist[9][18]=33; GranChemDist[9][19]=29;
+
+ GranChemDist[10][11]=107; GranChemDist[10][12]=15; GranChemDist[10][13]=22; GranChemDist[10][14]=98;
+ GranChemDist[10][15]=145; GranChemDist[10][16]=92; GranChemDist[10][17]=61; GranChemDist[10][18]=36; GranChemDist[10][19]=32;
+
+ GranChemDist[11][12]=95; GranChemDist[11][13]=102; GranChemDist[11][14]=103;
+ GranChemDist[11][15]=121; GranChemDist[11][16]=78; GranChemDist[11][17]=110; GranChemDist[11][18]=85; GranChemDist[11][19]=97;
+
+ GranChemDist[12][13]=28; GranChemDist[12][14]=87;
+ GranChemDist[12][15]=135; GranChemDist[12][16]=81; GranChemDist[12][17]=67; GranChemDist[12][18]=36; GranChemDist[12][19]=21;
+
+ GranChemDist[13][14]=114;
+ GranChemDist[13][15]=155; GranChemDist[13][16]=103; GranChemDist[13][17]=40; GranChemDist[13][18]=22; GranChemDist[13][19]=50;
+
+ GranChemDist[14][15]=74; GranChemDist[14][16]=38; GranChemDist[14][17]=147; GranChemDist[14][18]=110; GranChemDist[14][19]=68;
+
+ GranChemDist[15][16]=58; GranChemDist[15][17]=177; GranChemDist[15][18]=144; GranChemDist[15][19]=124;
+
+ GranChemDist[16][17]=128; GranChemDist[16][18]=92; GranChemDist[16][19]=69;
+
+ GranChemDist[17][18]=37; GranChemDist[17][19]=88;
+
+ GranChemDist[18][19]=55;
+
+
+ GranPolarityTable[0]=8.1 ; //A
+ GranPolarityTable[1]=10.5 ; //R
+ GranPolarityTable[2]=11.6 ; //N
+ GranPolarityTable[3]=13.0 ; //D
+ GranPolarityTable[4]=5.5 ; //C
+ GranPolarityTable[5]=10.5 ; //Q
+ GranPolarityTable[6]=12.3 ; //E
+ GranPolarityTable[7]=9.0 ; //G
+ GranPolarityTable[8]=10.4 ; //H
+ GranPolarityTable[9]=5.2 ; //I
+ GranPolarityTable[10]=4.9 ; //L
+ GranPolarityTable[11]=11.3; //K
+ GranPolarityTable[12]=5.7 ; //M
+ GranPolarityTable[13]=5.2 ; //F
+ GranPolarityTable[14]=8.0 ; //P
+ GranPolarityTable[15]=9.2 ; //S
+ GranPolarityTable[16]=8.6 ; //T
+ GranPolarityTable[17]=5.4 ; //W
+ GranPolarityTable[18]=6.2 ; //Y
+ GranPolarityTable[19]=5.9 ; //V
+
+/*
+ GranVolumeTable[0]=8.1 ; //A
+ GranVolumeTable[1]=10.5 ; //R
+ GranVolumeTable[2]=11.6 ; //N
+ GranVolumeTable[3]=13.0 ; //D
+ GranVolumeTable[4]=5.5 ; //C
+ GranVolumeTable[5]=10.5 ; //Q
+ GranVolumeTable[6]=12.3 ; //E
+ GranVolumeTable[7]=9.0 ; //G
+ GranVolumeTable[8]=10.4 ; //H
+ GranVolumeTable[9]=5.2 ; //I
+ GranVolumeTable[10]=4.9 ; //L
+ GranVolumeTable[11]=11.3; //K
+ GranVolumeTable[12]=5.7 ; //M
+ GranVolumeTable[13]=5.2 ; //F
+ GranVolumeTable[14]=8.0 ; //P
+ GranVolumeTable[15]=9.2 ; //S
+ GranVolumeTable[16]=8.6 ; //T
+ GranVolumeTable[17]=5.4 ; //W
+ GranVolumeTable[18]=6.2 ; //Y
+ GranVolumeTable[19]=5.9 ; //V
+*/
+}
+
+MDOUBLE granthamChemicalDistances::getHughesHydrophobicityDistance(
+ const int aa1,const int aa2) const {
+ int v1=0;
+ int v2=0;
+ if ((aa1==0) || (aa1==4) || (aa1==13) || //acf
+ (aa1==7) || (aa1==8) || (aa1==9) || //ghi
+ (aa1==11) || (aa1==10) || (aa1==12) || //klm
+ (aa1==16) || (aa1==19) || (aa1==17)
+ || (aa1==18)) //tvwy
+ v1=1;
+ if ((aa2==0) || (aa2==4) || (aa2==13) || //acf
+ (aa2==7) || (aa2==8) || (aa2==9) || //ghi
+ (aa2==11) || (aa2==10) || (aa2==12) || //klm
+ (aa2==16) || (aa2==19) || (aa2==17)
+ || (aa2==18)) //tvwy
+ v2=1;
+
+ if (v1!=v2) return 1;
+ return 0;
+}
+
+MDOUBLE granthamChemicalDistances::getHughesPolarityDistance(
+ const int aa1,const int aa2) const {
+ int v1=0;
+ int v2=0;
+ if ((aa1==4) || (aa1==3) || (aa1==6) || //cde
+ (aa1==8) || (aa1==11) || (aa1==2) || //hkn
+ (aa1==5) || (aa1==1) || (aa1==15) || //qrs
+ (aa1==16) || (aa1==17) || (aa1==18)) //tyw
+ v1=1;
+ if ((aa2==4) || (aa2==3) || (aa2==6) || //cde
+ (aa2==8) || (aa2==11) || (aa2==2) || //hkn
+ (aa2==5) || (aa2==1) || (aa2==15) || //qrs
+ (aa2==16) || (aa2==17) || (aa2==18)) //tyw
+ v2=1;
+
+ if (v1!=v2) return 1;
+ return 0;
+}
+MDOUBLE granthamChemicalDistances::getHughesChargeDistance(
+ const int aa1,const int aa2) const {
+ int v1=0;
+ int v2=0;
+ if ((aa1==8) || (aa1==11) || (aa1==1)) v1=1;
+ if ( (aa1==3) || (aa1==6)) v1=2;
+ else v1=3;
+
+ if ((aa2==8) || (aa2==11) || (aa2==1)) v2=1;
+ if ( (aa2==3) || (aa2==6)) v2=2;
+ else v2=3;
+
+ if (v1!=v2) return 1;
+ return 0;
+}
+
+
+
+MDOUBLE granthamChemicalDistances::getGranthamDistance(const int aa1, const int aa2) const {
+ if (aa1>aa2) return GranChemDist[aa2][aa1] ;
+ else return GranChemDist[aa1][aa2];
+}
+
+MDOUBLE granthamChemicalDistances::getGranthamPolarityDistance(const int aa1,const int aa2) const{
+ return fabs(GranPolarityTable[aa1]-GranPolarityTable[aa2]);
+}
+
+MDOUBLE granthamChemicalDistances::getGranthamPolarity(const int aa1) const{
+ return GranPolarityTable[aa1];
+}
+
+
+
+
diff --git a/libs/phylogeny/granthamChemicalDistances.h b/libs/phylogeny/granthamChemicalDistances.h
new file mode 100644
index 0000000..8475942
--- /dev/null
+++ b/libs/phylogeny/granthamChemicalDistances.h
@@ -0,0 +1,32 @@
+// $Id: granthamChemicalDistances.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___GRANTHAM_CHEMICAL_DISTANCES
+#define ___GRANTHAM_CHEMICAL_DISTANCES
+
+#include "definitions.h"
+
+class granthamChemicalDistances {
+public:
+ explicit granthamChemicalDistances();
+ MDOUBLE getGranthamDistance(const int aa1,const int aa2) const ;
+ MDOUBLE getGranthamPolarityDistance(const int aa1,const int aa2) const;
+ MDOUBLE getGranthamPolarity(const int aa1) const;
+ virtual ~granthamChemicalDistances() {}
+
+ MDOUBLE getHughesChargeDistance(const int aa1,const int aa2) const;// page 520
+ MDOUBLE getHughesPolarityDistance(const int aa1,const int aa2) const;// page 520
+ MDOUBLE getHughesHydrophobicityDistance(const int aa1,const int aa2) const;// page 520
+
+
+private:
+
+ // private members:
+ MDOUBLE GranChemDist[20][20];
+ MDOUBLE GranPolarityTable[20];
+
+};
+
+
+#endif
+
+
diff --git a/libs/phylogeny/gtrModel.cpp b/libs/phylogeny/gtrModel.cpp
new file mode 100644
index 0000000..db5e455
--- /dev/null
+++ b/libs/phylogeny/gtrModel.cpp
@@ -0,0 +1,210 @@
+#include "gtrModel.h"
+#include "readDatMatrix.h" // for the normalizeQ function.
+#include "matrixUtils.h"
+
+gtrModel::gtrModel(const Vdouble& freq,
+ const MDOUBLE a2c,
+ const MDOUBLE a2g,
+ const MDOUBLE a2t,
+ const MDOUBLE c2g,
+ const MDOUBLE c2t,
+ const MDOUBLE g2t)
+ :_a2c(a2c),_a2g(a2g),_a2t(a2t),_c2g(c2g),_c2t(c2t),_g2t(g2t),_freq(freq)
+ {
+ _Q.resize(alphabetSize());
+ for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
+ updateQ(a2c,a2g,a2t,c2g,c2t,g2t);
+}
+
+
+gtrModel& gtrModel::operator=(const gtrModel &other)
+{
+ _Q = other._Q;
+ _freq = other._freq;
+ _q2pt = other._q2pt;
+ _a2c = other._a2c;
+ _a2g = other._a2g;
+ _a2t = other._a2t;
+ _c2g = other._c2g;
+ _c2t = other._c2t;
+ _g2t = other._g2t;
+ return *this;
+}
+
+gtrModel::gtrModel(const gtrModel &other)
+{
+ _Q = other._Q;
+ _freq = other._freq;
+ _q2pt = other._q2pt;
+ _a2c = other._a2c;
+ _a2g = other._a2g;
+ _a2t = other._a2t;
+ _c2g = other._c2g;
+ _c2t = other._c2t;
+ _g2t = other._g2t;
+}
+
+void gtrModel::norm(const MDOUBLE scale)
+{
+ for (int i=0; i < _Q.size(); ++i) {
+ for (int j=0; j < _Q.size(); ++j) {
+ _Q[i][j] *= scale;
+ }
+ }
+}
+
+MDOUBLE gtrModel::sumPijQij(){
+ MDOUBLE sum=0.0;
+ for (int i=0; i < _Q.size(); ++i) {
+ sum -= (_Q[i][i])*_freq[i];
+ }
+ return sum;
+}
+
+void gtrModel::updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t)
+{
+ _a2c = a2c;
+ _Q[a][c] = (_a2c);
+ _Q[c][a] = (_freq[a]*_a2c/_freq[c]);
+ _a2g = a2g;
+ _Q[a][g] = (_a2g);
+ _Q[g][a] = (_freq[a]*_a2g/_freq[g]);
+ _a2t = a2t;
+ _Q[a][t] = (_a2t);
+ _Q[t][a] = (_freq[a]*_a2t/_freq[t]);
+ _c2g = c2g;
+ _Q[c][g] = (_c2g);
+ _Q[g][c] = (_freq[c]*_c2g/_freq[g]);
+ _c2t = c2t;
+ _Q[c][t] = (_c2t);
+ _Q[t][c] = (_freq[c]*_c2t/_freq[t]);
+ _g2t = g2t;
+ _Q[g][t] = (_g2t);
+ _Q[t][g] = (_freq[g]*_g2t/_freq[t]);
+ _Q[a][a] = -1.0*(_Q[a][c]+_Q[a][g]+_Q[a][t]);
+ _Q[c][c] = -1.0*(_Q[c][a]+_Q[c][g]+_Q[c][t]);
+ _Q[g][g] = -1.0*(_Q[g][a]+_Q[g][c]+_Q[g][t]);
+ _Q[t][t] = -1.0*(_Q[t][a]+_Q[t][c]+_Q[t][g]);
+ norm(1.0/sumPijQij());
+ _q2pt.fillFromRateMatrix(_freq,_Q);
+}
+
+void gtrModel::set_a2c(const MDOUBLE a2c)
+{
+ _a2c = a2c;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+void gtrModel::set_a2g(const MDOUBLE a2g)
+{
+ _a2g = a2g;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+void gtrModel::set_a2t(const MDOUBLE a2t)
+{
+ _a2t = a2t;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+void gtrModel::set_c2g(const MDOUBLE c2g)
+{
+ _c2g = c2g;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+void gtrModel::set_c2t(const MDOUBLE c2t)
+{
+ _c2t = c2t;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+void gtrModel::set_g2t(const MDOUBLE g2t)
+{
+ _g2t = g2t;
+ updateQ(_a2c,_a2g,_a2t,_c2g,_c2t,_g2t);
+}
+
+MDOUBLE gtrModel::get_a2c() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2c");
+ else{
+ if((_Q[a].size() < alphabetSize())||(_Q[c].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2c");
+ else
+ result = _a2c;
+ }
+ return result;
+}
+
+MDOUBLE gtrModel::get_a2g() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2g");
+ else{
+ if((_Q[a].size() < alphabetSize())||(_Q[g].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2g");
+ else
+ result = _a2g;
+ }
+ return result;
+}
+
+MDOUBLE gtrModel::get_a2t() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_a2t");
+ else{
+ if((_Q[a].size() < alphabetSize())||(_Q[t].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_a2t");
+ else
+ result = _a2t;
+ }
+ return result;
+}
+
+MDOUBLE gtrModel::get_c2g() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2g");
+ else{
+ if((_Q[c].size() < alphabetSize())||(_Q[g].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2g");
+ else
+ result = _c2g;
+ }
+ return result;
+}
+
+MDOUBLE gtrModel::get_c2t() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_c2t");
+ else{
+ if((_Q[c].size() < alphabetSize())||(_Q[t].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_c2t");
+ else
+ result = _c2t;
+ }
+ return result;
+}
+
+MDOUBLE gtrModel::get_g2t() const
+{
+ MDOUBLE result;
+ if(_Q.size() < alphabetSize())
+ errorMsg::reportError("Attempting to reach an uninitiallized Q matrix in gtrModel::get_g2t");
+ else{
+ if((_Q[g].size() < alphabetSize())||(_Q[t].size() < alphabetSize()))
+ errorMsg::reportError("Attempting to reach an uninitiallzed Q matrix element in Model::get_g2t");
+ else
+ result = _g2t;
+ }
+ return result;
+}
diff --git a/libs/phylogeny/gtrModel.h b/libs/phylogeny/gtrModel.h
new file mode 100644
index 0000000..f62ed5e
--- /dev/null
+++ b/libs/phylogeny/gtrModel.h
@@ -0,0 +1,62 @@
+#ifndef _GTR_MODEL
+#define _GTR_MODEL
+
+#include "replacementModel.h"
+#include "fromQtoPt.h"
+
+class gtrModel : public replacementModel {
+public:
+ enum modelElements {a = 0,c,g,t};
+ explicit gtrModel(const Vdouble& freq,
+ const MDOUBLE a2c = 0.25,
+ const MDOUBLE a2g = 0.25,
+ const MDOUBLE a2t = 0.25,
+ const MDOUBLE c2g = 0.25,
+ const MDOUBLE c2t = 0.25,
+ const MDOUBLE g2t = 0.25);
+ virtual replacementModel* clone() const { return new gtrModel(*this); }
+ virtual gtrModel& operator=(const gtrModel &other);
+ explicit gtrModel(const gtrModel &other);
+ const int alphabetSize() const {return _freq.size();}
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _q2pt.Pij_t(i,j,d);}
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _q2pt.dPij_dt(i,j,d);}
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _q2pt.d2Pij_dt2(i,j,d);}
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ void set_a2c(const MDOUBLE a2c);
+ void set_a2g(const MDOUBLE a2g);
+ void set_a2t(const MDOUBLE a2t);
+ void set_c2g(const MDOUBLE c2g);
+ void set_c2t(const MDOUBLE c2t);
+ void set_g2t(const MDOUBLE g2t);
+ MDOUBLE get_a2c() const;
+ MDOUBLE get_a2g() const;
+ MDOUBLE get_a2t() const;
+ MDOUBLE get_c2g() const;
+ MDOUBLE get_c2t() const;
+ MDOUBLE get_g2t() const;
+ const VVdouble& getQ() const {return _Q;}
+
+
+private:
+ void updateQ(const MDOUBLE a2c,const MDOUBLE a2g,const MDOUBLE a2t,const MDOUBLE c2g,const MDOUBLE c2t,const MDOUBLE g2t);
+ void norm(const MDOUBLE scale);
+ MDOUBLE sumPijQij();
+
+private:
+ VVdouble _Q;
+ Vdouble _freq;
+ q2pt _q2pt;
+ MDOUBLE _a2c;
+ MDOUBLE _a2g;
+ MDOUBLE _a2t;
+ MDOUBLE _c2g;
+ MDOUBLE _c2t;
+ MDOUBLE _g2t;
+};
+#endif
+
+
+
+
+
+
diff --git a/libs/phylogeny/hky.cpp b/libs/phylogeny/hky.cpp
new file mode 100644
index 0000000..36315af
--- /dev/null
+++ b/libs/phylogeny/hky.cpp
@@ -0,0 +1,593 @@
+// $Id: hky.cpp 4291 2008-06-23 10:23:10Z itaymay $
+
+#include "hky.h"
+#include "errorMsg.h"
+
+hky::hky(const MDOUBLE inProb_a,
+ const MDOUBLE inProb_c,
+ const MDOUBLE inProb_g,
+ const MDOUBLE inProb_t,
+ const MDOUBLE TrTv) {
+ _freq.resize(4);
+ _freq[0] = inProb_a; _freq[1] = inProb_c;
+ _freq[2] = inProb_g; _freq[3] = inProb_t;
+ initParams(TrTv);
+}
+
+
+hky::hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv) : _freq(inProbs)
+{
+ if (inProbs.size()!=4)
+ errorMsg::reportError("hky::hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv) : the size of inProbs is not 4");
+ initParams(TrTv);
+}
+
+void hky::initParams(MDOUBLE TrTv) // init _a, _b, _c, and _y by using _freq and TrTv
+{
+ MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta.
+ // In k2p Tr/Tv = alpha / 2*beta.
+
+ _c = 2*(_freq[0]*_freq[2]+_freq[3]*_freq[1]);
+ _y = 2*(_freq[0]+_freq[2])*(_freq[1]+_freq[3]);
+ // c*_a + y*_b = 1;
+ //_a/_b = k;
+ _b = 1.0 / (_c*In_k+_y);
+ _a = _b*In_k;
+}
+
+void hky::changeTrTv(const MDOUBLE TrTv){
+ MDOUBLE In_k = TrTv*2; // k is defined as alpha / beta.
+ // In k2p Tr/Tv = alpha / 2*beta.
+ _b = 1.0 / (_c*In_k+_y);
+ _a = _b*In_k;
+}
+
+MDOUBLE hky::getTrTv() const {
+ return (_a/(2.0*_b));
+}
+
+const MDOUBLE hky::Pij_t(const int i, const int j, const MDOUBLE t) const {
+ const MDOUBLE &pa = _freq[0];
+ const MDOUBLE &pc = _freq[1];
+ const MDOUBLE &pg = _freq[2];
+ const MDOUBLE &pt = _freq[3];
+ const MDOUBLE py = pc+pt;
+ const MDOUBLE pr = pa+pg;
+
+ const MDOUBLE &b = _b;
+ const MDOUBLE &a = _a;
+ const MDOUBLE lamda3 = -(py*b+pr*a);
+ const MDOUBLE lamda4 = -(py*a+pr*b);
+
+ MDOUBLE term1=0.0;
+ MDOUBLE term2=0.0;
+ MDOUBLE term3=0.0;
+ MDOUBLE termAll=0.0;
+ switch (i) {
+ case 0:
+ switch (j) {
+ case 0:
+ term1 = pa;
+ term2 = exp(-b*t)*(py)*pa/pr;
+ term3 = pg*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+ break;
+ case 1:
+ termAll = pc - exp(-b*t)*pc;
+ return termAll;
+
+ break;
+ case 2:
+ term1 = pg;
+ term2 = exp(-b*t)*py*pg/pr;
+ term3 = -pg*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+ break;
+ case 3:
+ termAll = pt - exp(-b*t)*pt;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ case 1:
+ switch (j) {
+ case 0:
+ termAll = pa - exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1:
+ term1 = pc;
+ term2 = exp(-b*t)*pr*pc/py;
+ term3 = pt*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+
+ break;
+ case 2:
+ termAll = pg - exp(-b*t)*pg;
+ return termAll;
+ break;
+
+ case 3:
+ term1 = pt;
+ term2 = exp(-b*t)*pr*pt/py;
+ term3 = -pt*exp(t*lamda4)/py;
+ termAll = term1 + term2 + term3;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ case 2:
+ switch (j) {
+ case 0:
+ term1 = pa;
+ term2 = exp(-b*t)*py*pa/pr;
+ term3 = -pa*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+
+ return termAll;
+ break;
+ case 1:
+ termAll = pc - exp(-b*t)*pc;
+ return termAll;
+ break;
+ case 2:
+ term1 = pg;
+ term2 = exp(-b*t)*py*pg/pr;
+ term3 = pa*exp(t*lamda3)/pr;
+ termAll = term1 + term2 + term3;
+
+ return termAll;
+ break;
+
+ case 3:
+ termAll = pt - exp(-b*t)*pt;
+
+ return termAll;
+ break;
+ }
+ break;
+ case 3:
+ switch (j) {
+ case 0:
+ termAll = pa - exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1:
+ term1 = pc;
+ term2 = exp(-b*t)*pr*pc/py;
+ term3 = -pc*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+
+ break;
+ case 2:
+ termAll = pg - exp(-b*t)*pg;
+ return termAll;
+ break;
+
+ case 3:
+ term1 = pt;
+ term2 = exp(-b*t)*(pr)*pt/(py);
+ term3 = pc*exp(t*lamda4)/(py);
+ termAll = term1 + term2 + term3;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ }
+ return -1;
+}
+
+const MDOUBLE hky::dPij_dt(const int i,const int j, const MDOUBLE t) const {
+ const MDOUBLE &pa = _freq[0];
+ const MDOUBLE &pc = _freq[1];
+ const MDOUBLE &pg = _freq[2];
+ const MDOUBLE &pt = _freq[3];
+ const MDOUBLE py = pc+pt;
+ const MDOUBLE pr = pa+pg;
+
+ const MDOUBLE &b = _b;
+ const MDOUBLE &a = _a;
+ const MDOUBLE lamda3 = -(py*b+pr*a);
+ const MDOUBLE lamda4 = -(py*a+pr*b);
+
+ MDOUBLE term1, term2, term3,termAll;
+
+ switch (i) {
+ case 0:
+ switch (j) {
+ case 0://ok
+ term1 = 0;
+ term2 = exp(-b*t)*(py)*pa/pr;
+ term2 *= -b;
+ term3 = pg*exp(t*lamda3)/pr;
+ term3*= lamda3;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+ break;
+ case 1://ok
+ termAll = b* exp(-b*t)*pc;
+ return termAll;
+
+ break;
+ case 2://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*py*pg/pr;
+ term3 = -pg*exp(t*lamda3)/pr;
+ term3*=lamda3;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+ break;
+ case 3://ok
+ termAll = b*exp(-b*t)*pt;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ case 1:
+ switch (j) {
+ case 0://ok
+ termAll = b*exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*pr*pc/py;
+ term3 = lamda4*pt*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 2://ok
+ termAll = b*exp(-b*t)*pg;
+ return termAll;
+ break;
+ case 3://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*pr*pt/py;
+ term3 = (lamda4)*(-pt)*exp(t*lamda4)/py;
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ }
+ break;
+ case 2:
+ switch (j) {
+ case 0://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*py*pa/pr;
+ term3 = lamda3*(-pa)*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 1://ok
+ termAll = b*exp(-b*t)*pc;
+ return termAll;
+ break;
+ case 2://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*py*pg/pr;
+ term3 = lamda3*pa*exp(t*lamda3)/pr;
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ case 3://ok
+ termAll = b*exp(-b*t)*pt;
+ return termAll;
+ break;
+ }
+ break;
+ case 3:
+ switch (j) {
+ case 0://ok
+ termAll = b*exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*pr*pc/py;
+ term3 = lamda4*(-pc)*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 2://ok
+ termAll = b* exp(-b*t)*pg;
+ return termAll;
+ break;
+ case 3://ok
+ term1 = 0;
+ term2 = (-b)*exp(-b*t)*(pr)*pt/(py);
+ term3 = (lamda4)*pc*exp(t*lamda4)/(py);
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ }
+ break;
+ }
+ return -1;
+}
+
+const MDOUBLE hky::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
+ const MDOUBLE &pa = _freq[0];
+ const MDOUBLE &pc = _freq[1];
+ const MDOUBLE &pg = _freq[2];
+ const MDOUBLE &pt = _freq[3];
+ const MDOUBLE py = pc+pt;
+ const MDOUBLE pr = pa+pg;
+
+ const MDOUBLE &b = _b;
+ const MDOUBLE &a = _a;
+ const MDOUBLE lamda3 = -(py*b+pr*a);
+ const MDOUBLE lamda4 = -(py*a+pr*b);
+
+ MDOUBLE term1, term2, term3,termAll;
+
+ switch (i) {
+ case 0:
+ switch (j) {
+ case 0://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*(py)*pa/pr;
+ term3 = lamda3*lamda3*pg*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+
+ break;
+ case 1://ok2
+ termAll = -b*b* exp(-b*t)*pc;
+ return termAll;
+ break;
+ case 2://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*py*pg/pr;
+ term3 = lamda3*lamda3*(-pg)*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 3://ok2
+ termAll = -b*b*exp(-b*t)*pt;
+ return termAll;
+ break;
+ }
+ break;
+ case 1:
+ switch (j) {
+ case 0://ok2
+ termAll = -b*b*exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*pr*pc/py;
+ term3 = lamda4*lamda4*pt*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 2://ok2
+ termAll = -b*b*exp(-b*t)*pg;
+ return termAll;
+ break;
+ case 3://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*pr*pt/py;
+ term3 = lamda4*lamda4*(-pt)*exp(t*lamda4)/py;
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ }
+ break;
+ case 2:
+ switch (j) {
+ case 0://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*py*pa/pr;
+ term3 = lamda3*lamda3*(-pa)*exp(t*lamda3)/pr;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 1://ok2
+ termAll = -b*b*exp(-b*t)*pc;
+ return termAll;
+ break;
+ case 2://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*py*pg/pr;
+ term3 = lamda3*lamda3*pa*exp(t*lamda3)/pr;
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ case 3://ok2
+ termAll = -b*b*exp(-b*t)*pt;
+ return termAll;
+ break;
+ }
+ break;
+ case 3:
+ switch (j) {
+ case 0://ok2
+ termAll = -b*b*exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*pr*pc/py;
+ term3 = lamda4*lamda4*(-pc)*exp(t*lamda4)/py;
+ termAll = term1 + term2+term3;
+ return termAll;
+ break;
+ case 2://ok2
+ termAll = -b*b* exp(-b*t)*pg;
+ return termAll;
+ break;
+ case 3://ok2
+ term1 = 0;
+ term2 = b*b*exp(-b*t)*(pr)*pt/(py);
+ term3 = lamda4*lamda4*pc*exp(t*lamda4)/(py);
+ termAll = term1 + term2 + term3;
+ return termAll;
+ break;
+ }
+ break;
+ }
+ return -1;
+}
+
+const MDOUBLE hky::dPij_tdBeta(const int i, const int j, const MDOUBLE t) const {
+ const MDOUBLE &pa = _freq[0];
+ const MDOUBLE &pc = _freq[1];
+ const MDOUBLE &pg = _freq[2];
+ const MDOUBLE &pt = _freq[3];
+ const MDOUBLE &py = pc+pt;
+ const MDOUBLE &pr = pa+pg;
+
+ const MDOUBLE &b = _b;
+ const MDOUBLE &a = _a;
+ const MDOUBLE &lamda3 = -(py*b+pr*a);
+ const MDOUBLE &lamda4 = -(py*a+pr*b);
+
+ MDOUBLE term2, term3,termAll;
+
+ const MDOUBLE& dlamda3= -py+_y*pr/_c;
+ const MDOUBLE& dlamda4= -pr+_y*py/_c;
+
+ switch (i) {
+
+ case 0:
+ switch (j) {
+ case 0:
+ term2 = (-t)*exp(-b*t)*(py)*pa/pr;
+ term3 = t*dlamda3*pg*exp(t*lamda3)/pr;
+ termAll = term2+term3;
+ return termAll;
+
+ break;
+ case 1:
+ termAll = t* exp(-b*t)*pc;
+ return termAll;
+
+ break;
+ case 2:
+ term2 = (-t)*exp(-b*t)*py*pg/pr;
+ term3 = t*dlamda3*(-pg)*exp(t*lamda3)/pr;
+ termAll = term2+term3;
+ return termAll;
+
+ break;
+ case 3:
+ termAll = t* exp(-b*t)*pt;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ case 1:
+ switch (j) {
+ case 0:
+ termAll = t* exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1:
+ term2 = (-t)*exp(-b*t)*pr*pc/py;
+ term3 = t*dlamda4*pt*exp(t*lamda4)/py;
+ termAll = term2+term3;
+ return termAll;
+
+
+ break;
+ case 2:
+ termAll = t* exp(-b*t)*pg;
+ return termAll;
+ break;
+
+ case 3:
+ term2 = (-t)*exp(-b*t)*pr*pt/py;
+ term3 = t*dlamda4*(-pt)*exp(t*lamda4)/py;
+ termAll = term2 + term3;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ case 2:
+ switch (j) {
+ case 0:
+ term2 = (-t)*exp(-b*t)*py*pa/pr;
+ term3 = t*dlamda3*(-pa)*exp(t*lamda3)/pr;
+ termAll = term2+term3;
+
+ return termAll;
+ break;
+ case 1:
+ termAll = t*exp(-b*t)*pc;
+ return termAll;
+ break;
+ case 2:
+ term2 = (-t)*exp(-b*t)*py*pg/pr;
+ term3 = t*dlamda3*pa*exp(t*lamda3)/pr;
+ termAll = term2 + term3;
+
+ return termAll;
+ break;
+
+ case 3:
+ termAll = t* exp(-b*t)*pt;
+ return termAll;
+ break;
+ }
+ break;
+ case 3:
+ switch (j) {
+ case 0:
+ termAll = t* exp(-b*t)*pa;
+ return termAll;
+ break;
+ case 1:
+ term2 = (-t)*exp(-b*t)*pr*pc/py;
+ term3 = t*dlamda4*(-pc)*exp(t*lamda4)/py;
+ termAll = term2+term3;
+ return termAll;
+
+
+ break;
+ case 2:
+ termAll = t* exp(-b*t)*pg;
+ return termAll;
+ break;
+
+ case 3:
+ term2 = (-t)*exp(-b*t)*(pr)*pt/(py);
+ term3 = t*dlamda4*pc*exp(t*lamda4)/(py);
+ termAll = term2 + term3;
+ return termAll;
+
+ break;
+ }
+ break;
+
+ }
+ return -1;
+}
+
+//Q[0][1] = freq[1]*_b ; Q[0][2] = freq[2]*_a ; Q[0][3] = freq[3]*_b;
+//Q[1][0] = freq[0]*_b; ; Q[1][2] = freq[2]*_b ; Q[1][3] = freq[3]*_a;
+//Q[2][0] = freq[0]*_a; Q[2][1] = freq[1]*_b ; ; Q[2][3] = freq[3]*_b;
+//Q[3][0] = freq[0]*_b; Q[3][1] = freq[1]*_a ; Q[3][2] = freq[2]*_b;
+
diff --git a/libs/phylogeny/hky.h b/libs/phylogeny/hky.h
new file mode 100644
index 0000000..8d20c14
--- /dev/null
+++ b/libs/phylogeny/hky.h
@@ -0,0 +1,46 @@
+// $Id: hky.h 4291 2008-06-23 10:23:10Z itaymay $
+
+#ifndef ___HKY
+#define ___HKY
+
+#include "replacementModel.h"
+#include <cmath>
+
+class hky : public replacementModel {
+public:
+ explicit hky(const MDOUBLE inProb_a,
+ const MDOUBLE inProb_c,
+ const MDOUBLE inProb_g,
+ const MDOUBLE inProb_t,
+ const MDOUBLE TrTv);
+
+ explicit hky(vector<MDOUBLE> inProbs, const MDOUBLE TrTv);
+
+ virtual replacementModel* clone() const { return new hky(*this); }
+// virtual nucJC* clone() const { return new nucJC(*this); } // see note down:
+
+ const int alphabetSize() const {return 4;}
+
+
+ void changeTrTv(const MDOUBLE In_TrTv);
+ MDOUBLE getTrTv() const;
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
+
+ const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const;
+
+private:
+ void initParams(MDOUBLE TrTv); // init _a, _b, _c, and _y by using _freq and TrTv
+
+private:
+ Vdouble _freq;
+ MDOUBLE _a; //
+ MDOUBLE _b; //
+
+ MDOUBLE _c,_y; // relationship between probA, probC, prob G, prob T.
+};
+
+#endif
+
diff --git a/libs/phylogeny/indel.cpp b/libs/phylogeny/indel.cpp
new file mode 100644
index 0000000..6b01c43
--- /dev/null
+++ b/libs/phylogeny/indel.cpp
@@ -0,0 +1,58 @@
+// $Id: indel.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "indel.h"
+
+indel::indel() {}
+
+int indel::fromChar(const char s) const{
+ switch (s) {
+ case 'x' : case'X' : return 0; break;
+ case '-' : case'_' : return 1; break;
+ default:
+ vector<string> err;
+ err.push_back(" The indel sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" Indel was not one of the following: ");
+ err.push_back(" -, X");
+ err.push_back(" _, x");
+ errorMsg::reportError(err);
+ }// end of switch
+ return -99; // never suppose to be here.
+}// end of function
+
+vector<int> indel::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+string indel::fromInt(const int in_id) const{
+ char res = 0;
+ switch (in_id) {
+ case 0 : res = 'X' ; break;
+ case 1 : res = '-' ; break;
+ default:
+ vector<string> err;
+ err.push_back("unable to print indel_id. indel_id was not one of the following: ");
+ err.push_back("X, -");
+ err.push_back("x, _");
+ errorMsg::reportError(err);
+ }//end of switch
+ string vRes;
+ vRes.append(1,res);
+ return vRes;
+}// end of function
+
+// There are no relations here.
+int indel::relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == charToCheck)
+ return 1;
+ return 0;
+}
+
+int indel::fromChar(const string& str, const int pos) const{
+ return fromChar(str[pos]);
+}
+
+
diff --git a/libs/phylogeny/indel.h b/libs/phylogeny/indel.h
new file mode 100644
index 0000000..ee6f73a
--- /dev/null
+++ b/libs/phylogeny/indel.h
@@ -0,0 +1,28 @@
+// $Id: indel.h 1901 2007-03-15 13:21:06Z nimrodru $
+#ifndef ____INDEL
+#define ____INDEL
+
+#include "definitions.h"
+#include "errorMsg.h"
+#include "alphabet.h"
+
+
+class indel : public alphabet {
+public:
+ explicit indel();
+ virtual ~indel() {}
+ virtual alphabet* clone() const { return new indel(*this); }
+ int unknown() const {return -2;}
+ int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
+ int size() const {return 2;}
+ int stringSize() const {return 1;} // one letter code.
+ int relations(const int charInSeq, const int charToCheck) const;
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ string fromInt(const int in_id) const;
+ vector<int> fromString(const string& str) const;
+ bool isSpecific(const int id) const {return (id>=0 && id < size());}
+
+};//end of class
+
+#endif
diff --git a/libs/phylogeny/indelModel.cpp b/libs/phylogeny/indelModel.cpp
new file mode 100644
index 0000000..bc38697
--- /dev/null
+++ b/libs/phylogeny/indelModel.cpp
@@ -0,0 +1,15 @@
+// $Id: indelModel.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "indelModel.h"
+
+
+void indelModel::setFreqX(const MDOUBLE freq_x)
+{
+ _freq[0] =freq_x ;
+ _alpha = 1/(2*_freq[0]*_freq[1]) ;
+}
+
+void indelModel::setFreqG(const MDOUBLE freq_g)
+{
+ _freq[0] =freq_g ;
+ _alpha = 1/(2*_freq[0]*_freq[1]) ;
+}
diff --git a/libs/phylogeny/indelModel.h b/libs/phylogeny/indelModel.h
new file mode 100644
index 0000000..8ea19e8
--- /dev/null
+++ b/libs/phylogeny/indelModel.h
@@ -0,0 +1,61 @@
+// $Id: indelModel.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___INDEL_MODEL
+#define ___INDEL_MODEL
+
+#include "replacementModel.h"
+#include <cmath>
+using namespace std;
+
+class indelModel : public replacementModel
+{
+public:
+ explicit indelModel(const MDOUBLE freq_x, const MDOUBLE freq_g)
+ {
+ _alpha = 1/(2*freq_x*freq_g);
+ _freq.push_back(freq_x);
+ _freq.push_back(freq_g);
+ }
+
+ virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const
+ {
+ if (i==j)
+ return exp(-t*_alpha);
+ return (1-exp(-t*_alpha));
+ }
+
+ virtual const MDOUBLE freq(const int i) const { return _freq[i];}
+
+ virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const
+ {
+ // [e^(-t/2PxPg)] / 2PxPg
+ return (exp(-t*_alpha)*_alpha);
+ }
+ virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const
+ {
+ // [-e^(-t/2PxPg)] / [(2PxPg)^2]
+ return ( -exp(-t*_alpha) * _alpha * _alpha);
+ }
+
+ virtual replacementModel* clone() const { return new indelModel(*this);}
+
+ virtual const int alphabetSize() const {return 2;};
+
+
+ void setFreqX(const MDOUBLE freq_x);
+ void setFreqG(const MDOUBLE freq_g);
+
+
+private:
+ Vdouble _freq; // [0] X [1] -
+ // save _alpha to make things faster. _alpha depends on _freq
+ MDOUBLE _alpha;
+};
+
+
+#endif
+
+
+
+
+
+
diff --git a/libs/phylogeny/integerAlphabet.cpp b/libs/phylogeny/integerAlphabet.cpp
new file mode 100644
index 0000000..2b3ff9f
--- /dev/null
+++ b/libs/phylogeny/integerAlphabet.cpp
@@ -0,0 +1,61 @@
+#include "integerAlphabet.h"
+#include "logFile.h"
+#include "someUtil.h"
+#include <cctype>
+
+//return -99 if not succeeds.
+int integerAlphabet::fromChar(const string& s, const int pos) const {
+ if (s.size() <= (pos + stringSize()-1)) {
+ string textToPrint("integerAlphabet::fromChar: Trying to read a character past the end of the string. ");
+ LOG(1,<<textToPrint<<endl);
+ return -99;
+ }
+
+ string s_sub=s.substr(pos,stringSize());
+ int leftMostDigit(0);
+ // find the left most digit. (s_sub can contain for example "0032" and so the left most digit is '3' and the number that should be returned is 32.
+ for (leftMostDigit=0; leftMostDigit < s_sub.size(); ++leftMostDigit) {
+ if (s_sub[leftMostDigit]!='0')
+ break;
+ }
+ s_sub =s_sub.substr(leftMostDigit);
+
+ return (atoi(s_sub.c_str()));
+}
+
+vector<int> integerAlphabet::fromString(const string &str) const {
+ vector<int> vec;
+ if (str.size()%stringSize()!=0) {
+ errorMsg::reportError("error in integerAlphabet::fromString. String length should be a multiplication of stringSize");
+ }
+ for (int i=0;i<str.size();i+=stringSize())
+ vec.push_back(fromChar(str,i));
+ return vec;
+}
+
+
+int integerAlphabet::stringSize() const {
+ int countDigits(1);
+ int wholeNum = _size/10;
+ while (wholeNum > 0) {
+ countDigits++;
+ wholeNum /=10;
+ }
+ return (countDigits);
+}
+
+
+string integerAlphabet::fromInt(const int in_id) const{
+
+ string res = int2string(in_id);
+ while (res.size() <= stringSize()) {
+ }
+ return res;
+}
+
+// There are no relations here.
+int integerAlphabet::relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == charToCheck)
+ return 1;
+ return 0;
+}
diff --git a/libs/phylogeny/integerAlphabet.h b/libs/phylogeny/integerAlphabet.h
new file mode 100644
index 0000000..4a82822
--- /dev/null
+++ b/libs/phylogeny/integerAlphabet.h
@@ -0,0 +1,29 @@
+#ifndef ___INTEGER_ALPH
+#define ___INTEGER_ALPH
+
+#include "alphabet.h"
+#include "errorMsg.h"
+
+
+class integerAlphabet : public alphabet {
+public:
+ explicit integerAlphabet(int size): _size(size){};
+ virtual ~integerAlphabet() {}
+ virtual alphabet* clone() const { return new integerAlphabet(*this); }
+ int unknown() const {return -2;}
+ int gap() const {errorMsg::reportError("The method integerAlphabet::gap() is used"); return -1;}
+ int size() const {return _size;}
+ int stringSize() const; // one letter code.
+ int relations(const int charInSeq, const int charToCheck) const;
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ string fromInt(const int in_id) const;
+ vector<int> fromString(const string& str) const;
+ bool isSpecific(const int id) const {return true;}
+
+private:
+ int _size;
+
+};
+
+#endif
diff --git a/libs/phylogeny/jcDistance.h b/libs/phylogeny/jcDistance.h
new file mode 100644
index 0000000..312c708
--- /dev/null
+++ b/libs/phylogeny/jcDistance.h
@@ -0,0 +1,141 @@
+// $Id: jcDistance.h 1928 2007-04-04 16:46:12Z privmane $
+
+#ifndef ___JC_DISTANCE
+#define ___JC_DISTANCE
+
+#include "definitions.h"
+#include "distanceMethod.h"
+#include <typeinfo>
+#include <cmath>
+/*********************************************************
+Jukes-Cantor distance method.
+Assumes no constraints on replacement from one state to another.
+Receives size of alphabet in constructor, and this enables
+to have one class for JC-distance for nucleotides, a.a., and codons
+Weights are an input vector for giving additional weight to positions in the sequences.
+*******************************************************/
+class jcDistance : public distanceMethod {
+
+public:
+ explicit jcDistance() {}
+ virtual jcDistance* clone() const{ return new jcDistance(*this);}
+
+ const MDOUBLE giveDistance( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const {//score is not used here
+
+ if (typeid(s1.getAlphabet()) != typeid(s2.getAlphabet()))
+ errorMsg::reportError("Error in jcDistance::giveDistance, s1 and s2 contain different type of alphabet");
+
+ // pS1Base and pS2Base are references to s1 and s2 respectively.
+ // The method uses seq1 and seq2 and not s1 and s2, because when
+ // the sequences contain mulAlphabet we must first convert them to the base alphabet
+ const sequence* pS1Base(&s1);
+ const sequence* pS2Base(&s2);
+ const alphabet* alph = s1.getAlphabet();
+ // if s1 and contains mulAlphabet
+ const mulAlphabet* mulAlph = dynamic_cast<const mulAlphabet*>(alph);
+ if (mulAlph!=NULL) {
+ pS1Base = new sequence(s1,mulAlph->getBaseAlphabet());
+ pS2Base = new sequence(s2,mulAlph->getBaseAlphabet());
+ }
+
+ int alphabetSize = pS1Base->getAlphabet()->size();
+
+ // const MDOUBLE MAXDISTANCE=2.0;
+ const MDOUBLE MAXDISTANCE=15;
+
+ MDOUBLE p =0;
+ MDOUBLE len=0.0;
+ if (weights == NULL) {
+ for (int i = 0; i < pS1Base->seqLen() ; ++i) {
+ if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data.
+ len+=1.0;
+ if ((*pS1Base)[i] != (*pS2Base)[i]) p++;
+ }
+ if (len==0) p=1;
+ else p = p/len;
+ } else {
+ for (int i = 0; i < pS1Base->seqLen() ; ++i) {
+ if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data.
+ len += (*weights)[i];
+ if ((*pS1Base)[i] != (*pS2Base)[i]) p+=((*weights)[i]);
+ }
+ if (len==0) p=1;
+ else {
+ p = p/len;
+ }
+ }
+ if (pS1Base != &s1) {
+ delete pS1Base;
+ delete pS2Base;
+ }
+
+ const MDOUBLE inLog = 1 - (MDOUBLE)alphabetSize*p/(alphabetSize-1.0);
+ if (inLog<=0) {
+// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
+// LOG(6,<<" p="<<p<<endl);
+ return MAXDISTANCE;
+ }
+ MDOUBLE dis = -1.0 * (1.0 - 1.0/alphabetSize) * log (inLog);
+ return dis;
+ }
+};
+
+class jcDistanceOLD : public distanceMethod {
+// in this version, if you have
+// a gap in front of a letter - it will be taken as a different
+// and also the length of the pairwise comparison will be increased.
+// in case of a gap-gap, it won't be a difference, but the length will
+// be increase.
+
+private:
+ const int _alphabetSize;
+
+public:
+ explicit jcDistanceOLD(const int alphabetSize) : _alphabetSize(alphabetSize) {
+ }
+ explicit jcDistanceOLD(const jcDistanceOLD& other) : _alphabetSize(other._alphabetSize) {
+ }
+ virtual jcDistanceOLD* clone() const{ return new jcDistanceOLD(*this);}
+
+ const MDOUBLE giveDistance( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const {//score is not used here
+// const MDOUBLE MAXDISTANCE=2.0;
+ const MDOUBLE MAXDISTANCE=15;
+
+ MDOUBLE p =0;
+ MDOUBLE len=0.0;
+ if (weights == NULL) {
+ for (int i = 0; i < s1.seqLen() ; ++i) {
+ //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data.
+ len+=1.0;
+ if (s1[i] != s2[i]) p++;
+ }
+ if (len==0) p=1;
+ else p = p/len;
+ } else {
+ for (int i = 0; i < s1.seqLen() ; ++i) {
+ //if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data.
+ len += (*weights)[i];
+ if (s1[i] != s2[i]) p+=((*weights)[i]);
+ }
+ if (len==0) p=1;
+ else {
+ p = p/len;
+ }
+ }
+ const MDOUBLE inLog = 1 - (MDOUBLE)_alphabetSize*p/(_alphabetSize-1.0);
+ if (inLog<=0) {
+// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
+// LOG(6,<<" p="<<p<<endl);
+ return MAXDISTANCE;
+ }
+ MDOUBLE dis = -1.0 * (1.0 - 1.0/_alphabetSize) * log (inLog);
+ return dis;
+ }
+};
+#endif
diff --git a/libs/phylogeny/jones.dat.q b/libs/phylogeny/jones.dat.q
new file mode 100644
index 0000000..a4d7349
--- /dev/null
+++ b/libs/phylogeny/jones.dat.q
@@ -0,0 +1,131 @@
+" 58 "
+" 54 45 "
+" 81 16 528 "
+" 56 113 34 10 "
+" 57 310 86 49 9 "
+" 105 29 58 767 5 323 "
+" 179 137 81 130 59 26 119 "
+" 27 328 391 112 69 597 26 23 "
+" 36 22 47 11 17 9 12 6 16 "
+" 30 38 12 7 23 72 9 6 56 229 "
+" 35 646 263 26 7 292 181 27 45 21 14 "
+" 54 44 30 15 31 43 18 14 33 479 388 65 "
+" 15 5 10 4 78 4 5 5 40 89 248 4 43 "
+" 194 74 15 15 14 164 18 24 115 10 102 21 16 17 "
+" 378 101 503 59 223 53 30 201 73 40 59 47 29 92 285 "
+" 475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477 "
+" 9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12 "
+" 11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71 "
+" 298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16 "
+" 0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830 "
+" 0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126 "
+" 0.050901 0.068765 0.058565 0.014261 0.032102 0.066005 "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" S_ij = S_ji and PI_i for the Jones model based on the SWISSPROT "
+" Version 22 data. "
+" Rate Q_ij=S_ij*PI_j. "
+" The rest of the file is not used. "
+" Prepared by Z. Yang, March 1995. "
+" See the following reference for notation: "
+" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
+" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "
+" ----------------------------------------------------------------------- "
+" 426 "
+" 333 185 "
+" 596 80 2134 "
+" 159 214 54 20 "
+" 332 1203 277 192 14 "
+" 920 176 286 4497 11 1497 "
+" 1853 954 470 907 158 144 999 "
+" 88 716 704 244 58 1027 69 71 "
+" 286 114 198 59 34 37 72 44 37 "
+" 394 332 88 62 79 497 101 80 217 2086 "
+" 294 3606 1209 148 15 1289 1210 215 115 121 140 "
+" 185 100 56 34 27 78 50 47 33 1129 1567 167 "
+" 84 21 33 16 115 14 23 28 69 354 1690 17 76 "
+" 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 "
+" 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 "
+" 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 "
+" 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 "
+" 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 "
+" 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 "
+" A R N D C Q E G H I L K M F P S T W Y V "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" Accepted point mutations (x10), similar to Figure 80 of Dayhoff et "
+" al. (1978). SwissProt version 22 data. "
+" ------------------------------------------------------------------------------ "
+" 256458 426 333 596 159 332 920 1853 88 286 394 294 185 84 1395 3664 3920 19 49 2771 "
+" 426 182302 185 80 214 1203 176 954 716 114 332 3606 100 21 360 661 360 171 62 111 "
+" 333 185 150772 2134 54 277 286 470 704 198 88 1209 56 33 64 2706 1069 9 178 86 "
+" 596 80 2134 178390 20 192 4497 907 244 59 62 148 34 16 74 390 216 5 142 195 "
+" 159 214 54 20 68120 14 11 158 58 34 79 15 27 115 27 559 91 60 246 150 "
+" 332 1203 277 192 14 139546 1497 144 1027 37 497 1289 78 14 629 278 227 20 59 100 "
+" 920 176 286 4497 11 1497 218432 999 69 72 101 1210 50 23 106 236 217 17 26 336 "
+" 1853 954 470 907 158 144 999 255274 71 44 80 215 47 28 171 1861 266 106 34 420 "
+" 88 716 704 244 58 1027 69 71 77124 37 217 115 33 69 249 214 116 5 777 32 "
+" 286 114 198 59 34 37 72 44 37 191018 2086 121 1129 354 54 274 1420 13 102 6260 "
+" 394 332 88 62 79 497 101 80 217 2086 319504 140 1567 1690 882 691 256 127 131 2020 "
+" 294 3606 1209 148 15 1289 1210 215 115 121 140 206568 167 17 117 351 653 16 30 99 "
+" 185 100 56 34 27 78 50 47 33 1129 1567 167 84670 76 36 89 579 15 25 937 "
+" 84 21 33 16 115 14 23 28 69 354 1690 17 76 143088 66 468 54 56 1276 307 "
+" 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 175488 1839 653 8 32 142 "
+" 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 234536 3527 64 259 320 "
+" 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 203636 18 73 805 "
+" 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 50486 60 44 "
+" 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 114728 63 "
+" 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 223724 "
+" Observed difference counts from pairwise comparisons, with ancestral sequences "
+" constructed by parsimony. F(t) = PI*P(t). "
+" Based on the SwissProt 22 data, kindly provided by D. Jones (Jones et al. 1992) "
+" ------------------------------------------------------------------------------- "
+" Ala 0.98754 0.00030 0.00023 0.00042 0.00011 0.00023 0.00065 0.00130 0.00006 0.00020 0.00028 0.00021 0.00013 0.00006 0.00098 0.00257 0.00275 0.00001 0.00003 0.00194 "
+" Arg 0.00044 0.98974 0.00019 0.00008 0.00022 0.00125 0.00018 0.00099 0.00075 0.00012 0.00035 0.00376 0.00010 0.00002 0.00037 0.00069 0.00037 0.00018 0.00006 0.00012 "
+" Asn 0.00042 0.00023 0.98720 0.00269 0.00007 0.00035 0.00036 0.00059 0.00089 0.00025 0.00011 0.00153 0.00007 0.00004 0.00008 0.00342 0.00135 0.00001 0.00022 0.00011 "
+" Asp 0.00062 0.00008 0.00223 0.98954 0.00002 0.00020 0.00470 0.00095 0.00025 0.00006 0.00006 0.00015 0.00004 0.00002 0.00008 0.00041 0.00023 0.00001 0.00015 0.00020 "
+" Cys 0.00043 0.00058 0.00015 0.00005 0.99432 0.00004 0.00003 0.00043 0.00016 0.00009 0.00021 0.00004 0.00007 0.00031 0.00007 0.00152 0.00025 0.00016 0.00067 0.00041 "
+" Gln 0.00044 0.00159 0.00037 0.00025 0.00002 0.98955 0.00198 0.00019 0.00136 0.00005 0.00066 0.00170 0.00010 0.00002 0.00083 0.00037 0.00030 0.00003 0.00008 0.00013 "
+" Glu 0.00080 0.00015 0.00025 0.00392 0.00001 0.00130 0.99055 0.00087 0.00006 0.00006 0.00009 0.00105 0.00004 0.00002 0.00009 0.00021 0.00019 0.00001 0.00002 0.00029 "
+" Gly 0.00136 0.00070 0.00035 0.00067 0.00012 0.00011 0.00074 0.99350 0.00005 0.00003 0.00006 0.00016 0.00003 0.00002 0.00013 0.00137 0.00020 0.00008 0.00003 0.00031 "
+" His 0.00021 0.00168 0.00165 0.00057 0.00014 0.00241 0.00016 0.00017 0.98864 0.00009 0.00051 0.00027 0.00008 0.00016 0.00058 0.00050 0.00027 0.00001 0.00182 0.00008 "
+" Ile 0.00029 0.00011 0.00020 0.00006 0.00003 0.00004 0.00007 0.00004 0.00004 0.98729 0.00209 0.00012 0.00113 0.00035 0.00005 0.00027 0.00142 0.00001 0.00010 0.00627 "
+" Leu 0.00023 0.00019 0.00005 0.00004 0.00005 0.00029 0.00006 0.00005 0.00013 0.00122 0.99330 0.00008 0.00092 0.00099 0.00052 0.00040 0.00015 0.00007 0.00008 0.00118 "
+" Lys 0.00027 0.00331 0.00111 0.00014 0.00001 0.00118 0.00111 0.00020 0.00011 0.00011 0.00013 0.99100 0.00015 0.00002 0.00011 0.00032 0.00060 0.00001 0.00003 0.00009 "
+" Met 0.00042 0.00023 0.00013 0.00008 0.00006 0.00018 0.00011 0.00011 0.00007 0.00255 0.00354 0.00038 0.98818 0.00017 0.00008 0.00020 0.00131 0.00003 0.00006 0.00212 "
+" Phe 0.00011 0.00003 0.00004 0.00002 0.00015 0.00002 0.00003 0.00004 0.00009 0.00047 0.00227 0.00002 0.00010 0.99360 0.00009 0.00063 0.00007 0.00008 0.00171 0.00041 "
+" Pro 0.00148 0.00038 0.00007 0.00008 0.00003 0.00067 0.00011 0.00018 0.00026 0.00006 0.00093 0.00012 0.00004 0.00007 0.99270 0.00194 0.00069 0.00001 0.00003 0.00015 "
+" Ser 0.00287 0.00052 0.00212 0.00031 0.00044 0.00022 0.00018 0.00146 0.00017 0.00021 0.00054 0.00027 0.00007 0.00037 0.00144 0.98556 0.00276 0.00005 0.00020 0.00025 "
+" Thr 0.00360 0.00033 0.00098 0.00020 0.00008 0.00021 0.00020 0.00024 0.00011 0.00131 0.00024 0.00060 0.00053 0.00005 0.00060 0.00324 0.98665 0.00002 0.00007 0.00074 "
+" Trp 0.00007 0.00065 0.00003 0.00002 0.00023 0.00008 0.00006 0.00040 0.00002 0.00005 0.00048 0.00006 0.00006 0.00021 0.00003 0.00024 0.00007 0.99686 0.00023 0.00017 "
+" Tyr 0.00008 0.00010 0.00030 0.00024 0.00041 0.00010 0.00004 0.00006 0.00130 0.00017 0.00022 0.00005 0.00004 0.00214 0.00005 0.00043 0.00012 0.00010 0.99392 0.00011 "
+" Val 0.00226 0.00009 0.00007 0.00016 0.00012 0.00008 0.00027 0.00034 0.00003 0.00511 0.00165 0.00008 0.00076 0.00025 0.00012 0.00026 0.00066 0.00004 0.00005 0.98761 "
+" P(0.01), amino acid exchange data generated from SWISSPROT Release 22.0 "
+" Ref. Jones D.T., Taylor W.R. and Thornton J.M. (1992) CABIOS 8:275-282 "
+" Usable sequences: 23824 "
+" Final alignments: 5437 "
+" Accepted point mutations: 92883 "
+" A R N D C Q E G H I L K M F P S T W Y V "
+" 0.0767477 100 "
+" 0.0516907 82.3263 "
+" 0.0426448 102.697 "
+" 0.0515445 83.8924 "
+" 0.0198027 45.6097 "
+" 0.0407523 83.8825 "
+" 0.0618296 75.7914 "
+" 0.0731516 52.1273 "
+" 0.0229438 91.1374 "
+" 0.0537609 101.99 "
+" 0.0919042 53.7672 "
+" 0.0586762 72.2308 "
+" 0.0238262 94.8144 "
+" 0.0401265 51.3146 "
+" 0.0509007 58.5874 "
+" 0.0687652 115.899 "
+" 0.0585647 107.092 "
+" 0.0142613 25.2297 "
+" 0.0321015 48.7629 "
+" 0.0660051 99.4571 "
+" "
+" Normalized Relative "
+" frequency mutabilities "
+" (SUM m*f) = 80.240436 "
+" ------------------------------------------- "
diff --git a/libs/phylogeny/khTest.cpp b/libs/phylogeny/khTest.cpp
new file mode 100644
index 0000000..a266498
--- /dev/null
+++ b/libs/phylogeny/khTest.cpp
@@ -0,0 +1,56 @@
+// $Id: khTest.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "logFile.h"
+#include "errorMsg.h"
+#include <cmath>
+
+void makekhTest(const VVdouble & likelihoodVal, MDOUBLE diffNumOfFreeParam) {
+ // assume that 2 trees are here.
+ bool logValue = true;
+
+ if (likelihoodVal.size() !=2) {errorMsg::reportError("errir un ");}
+
+ const int n = likelihoodVal[0].size();
+
+ MDOUBLE tmp1a = 0.0;
+ MDOUBLE tmp1b = 0.0;
+ MDOUBLE tmp1 = 0.0;
+ MDOUBLE tmp2 = 0.0;
+ MDOUBLE sum_k = 0.0;
+
+ int k;
+ for (k=0; k<n; ++k) {
+ if (logValue==false) {
+ tmp1a += log(likelihoodVal[1][k]);
+ tmp1b += log(likelihoodVal[0][k]);
+ }
+ else {
+ tmp1a += likelihoodVal[1][k];
+ tmp1b += likelihoodVal[0][k];
+ }
+ }
+ tmp1 = tmp1a-tmp1b;
+ MDOUBLE difL = tmp1;
+ for (k=0; k<n; ++k) {
+ if (logValue==false) tmp2 = log(likelihoodVal[1][k])-log(likelihoodVal[0][k])-tmp1/static_cast<MDOUBLE>(n);
+ else tmp2 = likelihoodVal[1][k]-likelihoodVal[0][k]-tmp1/static_cast<MDOUBLE>(n);
+ sum_k += (tmp2*tmp2);
+ }
+ sum_k = sum_k * static_cast<MDOUBLE>(n) / static_cast<MDOUBLE>(n-1);
+ LOG(1,<<" L1= "<<tmp1a<<" L2= "<<tmp1b<<endl);
+ LOG(1,<<" delta L is "<<difL<<endl);
+ LOG(1,<<" delta AIC is "<<difL-diffNumOfFreeParam<<endl);
+ LOG(1,<<" var is "<<sum_k<<endl);
+ LOG(1,<<" std is "<<sqrt(sum_k)<<endl);
+ LOG(1,<<" z is "<<(difL -diffNumOfFreeParam )/sqrt(sum_k)<<endl);
+
+// LOG(5,<<" L1= "<<tmp1a<<" L2= "<<tmp1b<<endl);
+// LOG(5,<<" delta L is "<<difL<<endl);
+// LOG(5,<<" delta AIC is "<<difL-diffNumOfFreeParam<<endl);
+// LOG(5,<<" var is "<<sum_k<<endl;);
+// LOG(5,<<" std is "<<sqrt(sum_k)<<endl);
+// LOG(5,<<" z is "<<(difL -diffNumOfFreeParam )/sqrt(sum_k)<<endl);
+
+}
+
diff --git a/libs/phylogeny/khTest.h b/libs/phylogeny/khTest.h
new file mode 100644
index 0000000..4b21fc9
--- /dev/null
+++ b/libs/phylogeny/khTest.h
@@ -0,0 +1,10 @@
+// $Id: khTest.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___KH_TEST
+#define ___KH_TEST
+
+void makekhTest(const VVdouble & likelihoodVal, MDOUBLE diffNumOfFreeParam=0);
+
+
+#endif
+
diff --git a/libs/phylogeny/likeDist.cpp b/libs/phylogeny/likeDist.cpp
new file mode 100644
index 0000000..648105e
--- /dev/null
+++ b/libs/phylogeny/likeDist.cpp
@@ -0,0 +1,379 @@
+// $Id: likeDist.cpp 5956 2009-03-15 10:00:36Z adist $
+
+#include "likeDist.h"
+#include "numRec.h"
+#include "someUtil.h"
+
+stochasticProcess& likeDist::getNonConstStochasticProcess() {
+ if (!_nonConstSpPtr) {
+ errorMsg::reportError("likeDist::getNonConstStochasticProcess: Can't give non-const stochasticProcess because the stochasticProcess that was given to the constructor of this likeDist object was const");
+ }
+ return *_nonConstSpPtr;
+}
+
+// ======================= functors needed for the computations =============
+
+class C_evalLikeDistDirect{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const vector<MDOUBLE> * _weights;
+public:
+ C_evalLikeDistDirect(const stochasticProcess& inS1,
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ return -likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights);
+ }
+};
+
+MDOUBLE likeDist::evalLikelihoodForDistance(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dist,
+ const vector<MDOUBLE> * weights) {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ for (int pos=0; pos < s1.seqLen(); ++pos){
+ if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ if (s1.isUnknown(pos) && s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = sp.freq(s2[pos]);
+ } else if (s2.isUnknown(pos) && s1.isSpecific(pos)) {
+ posLikelihood = sp.freq(s1[pos]);
+ } else {
+ for (int rateCategor = 0; rateCategor<sp.categories(); ++rateCategor) {
+ MDOUBLE rate = sp.rates(rateCategor);
+ MDOUBLE pij= 0.0;
+ if (s1.isSpecific(pos) && s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
+ pij= sp.Pij_t(s1[pos],s2[pos],dist*rate);
+ posLikelihood += pij * sp.freq(s1[pos])*sp.ratesProb(rateCategor);
+ } else {// this is the most complicated case, when you have
+ // combinations of letters, for example B in one
+ // sequence and ? in the other.
+ for (int iS1 =0; iS1< sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< sp.alphabetSize(); ++iS2) {
+ if ((s1.getAlphabet()->relations(s1[pos],iS1)) &&
+ (s2.getAlphabet()->relations(s2[pos],iS2))) {
+ posLikelihood += sp.freq(iS1)*sp.Pij_t(iS1,iS2,dist*rate)*sp.ratesProb(rateCategor);
+ }
+ }
+ }
+ }
+ } // end of for on the rates
+ }
+ assert(posLikelihood!=0.0);
+ sumL += log(posLikelihood)*(weights ? (*weights)[pos]:1.0);
+ }
+ return sumL;
+};
+
+class C_evalLikeDistDirect_d{ // derivative.
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const vector<MDOUBLE> * _weights;
+public:
+ C_evalLikeDistDirect_d(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ posLikelihood_d = 0.0;
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ posLikelihood_d =0.0;
+ } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ posLikelihood_d =0.0;
+ } else {
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+ MDOUBLE pij= 0.0;
+ MDOUBLE dpij=0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
+ //simple case, where AA i is changing to AA j
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate;
+ MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor);
+ posLikelihood += pij *tmp;
+ posLikelihood_d += dpij*tmp;
+ } else {// this is the most complicated case, when you have combinations of letters,
+ // for example B in one sequence and ? in the other.
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor);
+ posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
+ posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate;
+ }
+ }
+ }
+ }
+ }// end of for rate categories
+ }
+ assert(posLikelihood>0.0);
+ sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
+ }
+ return -sumL;
+ };
+};
+
+
+// THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE
+MDOUBLE likeDist::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2,
+ const MDOUBLE dis2evaluate) {
+ C_evalLikeDistDirect Cev(_sp,s1,s2,NULL);
+ return -Cev.operator ()(dis2evaluate);
+}
+
+MDOUBLE likeDist::giveDistanceThroughCTC( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const {
+ // only in the case of homogenous model - work through pairwise EM like
+ countTableComponentGam ctc;
+ if (_sp.categories() != 1) {
+ errorMsg::reportError("this function only work for homogenous model.");
+ }
+ ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1);
+ for (int i=0; i<s1.seqLen(); ++i) {
+ ctc.addToCounts(s1[i],s2[i],0,weights?(*weights)[i]:1.0);
+ }
+ MDOUBLE resL =0;
+ return giveDistance(ctc,resL);
+}
+
+const MDOUBLE likeDist::giveDistance(const countTableComponentGam& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess) const {
+ //return giveDistanceNR(ctc,resL,initialGuess);
+ return giveDistanceBrent(ctc,resQ,initialGuess);
+}
+
+const MDOUBLE likeDist::giveDistanceBrent(const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const {
+ const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+ MDOUBLE dist=-1.0;
+ resL = -dbrent(ax,bx,cx,
+ C_evalLikeDist(ctc,_sp,_unObservableData_p),
+ C_evalLikeDist_d(ctc,_sp),
+ tol,
+ &dist);
+ return dist;
+}
+
+template <typename regF, typename dF>
+MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f,
+ dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function.
+ zeroFound = 1;
+ MDOUBLE currentF = f(current);
+ if (fabs(currentF)<tol) return current;
+ MDOUBLE lowF = f(low);
+ MDOUBLE highF = f(high);
+ if (((lowF>0) && (highF>0)) || ((lowF<0) && (highF<0))) {// unable to find a zero
+ zeroFound = 0;
+ return 0;
+ }
+ if (lowF>0) {// fixing things to be in the right order.
+ MDOUBLE tmp = low;
+ low = high;
+ high = tmp;
+ tmp = lowF;
+ lowF = highF;
+ highF = tmp;
+ }
+ if (currentF>0) {
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ } // now the zero is between current and either low or high.
+
+ MDOUBLE currentIntervalSize = fabs(low-high);
+ MDOUBLE oldIntervalSize = currentIntervalSize;
+
+ // we have to decide if we do NR or devide the interval by two:
+ // we want to check if the next NR step is within our interval
+ // recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1)
+ // So we want (current - currentF/currentDF) to be between low and high
+ for (int i=0 ; i < max_it; ++i) {
+ MDOUBLE currentDF = df(current);
+ MDOUBLE newGuess = current - currentF/currentDF;
+ if ((newGuess<low && newGuess> high) || (newGuess>low && newGuess< high)) {
+ // in this case we should do a NR step.
+ current = newGuess;
+ currentF = f(current);
+ if (currentF > 0){
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ }
+
+ oldIntervalSize = currentIntervalSize;
+ currentIntervalSize =fabs (high-low);
+ if (currentIntervalSize < tol) {
+ return current;
+ }
+ //LOG(5,<<"NR: low= "<<low<<" high= "<<high<<endl);
+ }
+ else { // bisection
+ oldIntervalSize = currentIntervalSize;
+ currentIntervalSize /= 2.0;
+ current = (low+high)/2.0;
+ currentF = f(current);
+ if (currentF > 0){
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ }
+ //LOG(5,<<"BIS: low= "<<low<<" high= "<<high<<endl);
+ if (currentIntervalSize < tol) {
+ return current;
+ }
+
+ }
+ }
+ errorMsg::reportError("to many iterations in myNR function");
+ return 0;
+}
+
+const MDOUBLE likeDist::giveDistanceNR( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const {
+ //change bx so that it will be the current branch length!
+ const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+ // LOG(5,<<"===================================================\n");
+ MDOUBLE dist=-1.0;
+ int zeroFound = 0;
+ dist = myNRmethod(ax,bx,cx,
+ C_evalLikeDist_d(ctc,_sp),
+ C_evalLikeDist_d2(ctc,_sp),
+ tol,
+ 100,
+ zeroFound);// max it for NR;
+ if (zeroFound == 0) {// there was an error finding a zero
+ dist = bx;
+ }
+
+ return dist;
+}
+
+
+
+
+
+
+
+
+
+
+
+/*
+
+
+
+
+const MDOUBLE likeDist::giveDistance( // the NR version.
+ const countTableComponentGam& ctc,
+ MDOUBLE& resL) const {
+ LOG(5,<<"=============="<<endl);
+ MDOUBLE oldGuess=0.05; // move to parameters.
+ if (oldGuess<0) oldGuess=0.05; // move up.
+ int max_it = 100;
+ MDOUBLE oldDist =0;
+ MDOUBLE currentDist =oldGuess;
+ MDOUBLE newDer =VERYBIG;
+ MDOUBLE oldDer =VERYBIG;
+ //const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance,tol=_toll;
+ for (int i=0; i < max_it; ++i){
+ MDOUBLE sumDL=0.0;
+ MDOUBLE sumDL2=0.0;
+ for (int alph1=0; alph1 < ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_s1.categories(); ++rateCategor) {
+ MDOUBLE rate = _s1.rates(rateCategor);
+
+ MDOUBLE pij= _s1.Pij_t(alph1,alph2,currentDist*rate);
+ MDOUBLE dpij = _s1.dPij_dt(alph1,alph2,currentDist*rate);
+ MDOUBLE dpij2 = _s1.d2Pij_dt2(alph1,alph2,currentDist*rate);
+ if (pij==0) {
+ pij = 0.000000001;
+ dpij = 0.000000001;
+ }
+ sumDL+= ctc.getCounts(alph1,alph2,rateCategor)*dpij
+ *rate/pij;
+ sumDL2+= ctc.getCounts(alph1,alph2,rateCategor)*rate*(pij*dpij2-dpij *dpij)
+ /(pij*pij);
+ }
+ }
+ }
+ oldDer = newDer;
+ newDer = sumDL;
+ LOG(5,<<"\ndistance = "<<currentDist<<endl);
+ LOG(5,<<"derivation = "<<sumDL<<endl);
+ LOG(5,<<"sec derivation = "<<sumDL2<<endl);
+ oldDist = currentDist;
+ if ((fabs(newDer) < fabs(oldDer)) && (sumDL2 < 0)) {
+ currentDist = currentDist - newDer/sumDL2;
+ }
+ else {
+ currentDist = currentDist / 2;
+ }
+ MDOUBLE epsilonForDeriv = 0.001;// move up
+ if (fabs(newDer) < epsilonForDeriv) break;
+
+ }
+
+ return currentDist;
+}*/
+
+const MDOUBLE likeDist::giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const {
+
+ const MDOUBLE ax=0, cx=_maxPairwiseDistance,tol=_toll;
+ MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/*=1.0*/;
+ if (!(bx==bx)) bx = 1.0; // safety check that the JC distance did not return nan (not a number)
+ MDOUBLE dist=-1.0;
+ MDOUBLE resL = -dbrent(ax,bx,cx,
+ C_evalLikeDistDirect(_sp,s1,s2,weights),
+ C_evalLikeDistDirect_d(_sp,s1,s2,weights),
+ tol,
+ &dist);
+ if (score) *score = resL;
+ return dist;
+}
+
+const MDOUBLE likeDist::giveLikelihood(const sequence& s1,
+ const sequence& s2,
+ MDOUBLE distance,
+ const vector<MDOUBLE> * weights) const
+{
+
+
+ C_evalLikeDistDirect evalDis(_sp,s1,s2,weights);
+ return -evalDis(distance);
+
+}
diff --git a/libs/phylogeny/likeDist.h b/libs/phylogeny/likeDist.h
new file mode 100644
index 0000000..43c5b0f
--- /dev/null
+++ b/libs/phylogeny/likeDist.h
@@ -0,0 +1,203 @@
+// $Id: likeDist.h 6107 2009-04-26 12:22:58Z cohenofi $
+
+#ifndef ___LIKE_DIST_H
+#define ___LIKE_DIST_H
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "distanceMethod.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+#include "jcDistance.h"
+#include "unObservableData.h"
+#include <cmath>
+using namespace std;
+
+class likeDist : public distanceMethod {
+public:
+ // WARNING: the stochasticProcess is NOT copied. The same object is used
+ explicit likeDist(const stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0,
+ unObservableData* unObservableData_p=NULL)
+ : _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_unObservableData_p(unObservableData_p) {}
+
+ likeDist(const likeDist& other)
+ : _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_jcDist(other._jcDist) {}
+
+ virtual likeDist* clone() const {return new likeDist(*this);}
+ // This constructor allows non-const stochasticProcess so that likeDist will be able to change alpha, etc.
+ explicit likeDist(stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0)
+ : _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) {}
+
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ const MDOUBLE giveDistance(const countTableComponentGam& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+ // given two sequences, it evaluates the log likelihood.
+ MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dis2evaluate);
+
+ // returns the estimated ML distance between the 2 sequences.
+ // if score is given, it will be the log-likelihood.
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+
+ // this function creates a countTableComponent (ctc) from the two sequences.
+ // it then computes the distance from this ctc.
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ MDOUBLE giveDistanceThroughCTC(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+
+ const MDOUBLE giveLikelihood(const sequence& s1,
+ const sequence& s2,
+ MDOUBLE distance,
+ const vector<MDOUBLE> * weights=NULL) const;
+
+ // return the stochasticProcess
+ const stochasticProcess& getStochasticProcess() const {return _sp;}
+ stochasticProcess& getNonConstStochasticProcess();
+ bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
+ MDOUBLE getToll() const {return _toll;}
+ MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
+
+protected:
+ const stochasticProcess &_sp;
+ stochasticProcess *_nonConstSpPtr;
+ const MDOUBLE _toll;
+ const MDOUBLE _maxPairwiseDistance;
+ jcDistance _jcDist;
+ unObservableData* _unObservableData_p;
+
+private:
+ const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+ const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+
+
+public:
+ static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dist,
+ const vector<MDOUBLE> * weights=NULL);
+
+};
+
+//////////////////////////////////////////////////////////////////////////
+class C_evalLikeDist{
+private:
+ const countTableComponentGam& _ctc;
+ const stochasticProcess& _sp;
+ unObservableData* _unObservableData_p;
+
+public:
+ C_evalLikeDist(const countTableComponentGam& ctc,
+ const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL)
+ :_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ const MDOUBLE epsilonPIJ = 1e-10;
+ MDOUBLE sumL=0.0;
+ MDOUBLE sumLtemp=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
+ sumLtemp += _ctc.getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp.freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
+ //if(_unObservableData_p)
+ // sumLtemp = sumLtemp/(1- exp(_unObservableData_p->getlogLforMissingData()));
+ sumL += sumLtemp;
+ }
+ }
+ }
+ if(_unObservableData_p)
+ sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); //???
+ LOG(12,<<"check bl="<<dist<<" gives "<<sumL<<endl);
+
+ return -sumL;
+ };
+};
+
+// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
+// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
+// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
+
+class C_evalLikeDist_d{ // derivative.
+public:
+ C_evalLikeDist_d(const countTableComponentGam& ctc,
+ const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL): _ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
+private:
+ const countTableComponentGam& _ctc;
+ const stochasticProcess& _sp;
+ unObservableData* _unObservableData_p;
+
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
+ sumDL+= _ctc.getCounts(alph1,alph2,rateCategor)*dpij //*_sp.ratesProb(rateCategor) : removed CODE_RED
+ *rate/pij;
+ }
+ }
+ }//cerr<<"derivation = "<<-sumDL<<endl;
+ //if(_unObservableData_p)
+ // sumDL = sumDL/(1- exp(_unObservableData_p->getlogLforMissingData())); //???
+ return -sumDL;
+ };
+};
+
+class C_evalLikeDist_d2{ // second derivative.
+public:
+ C_evalLikeDist_d2(const countTableComponentGam& ctc,
+ const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
+private:
+ const countTableComponentGam& _ctc;
+ const stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
+ MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
+ sumDL+= rate*_ctc.getCounts(alph1,alph2,rateCategor)*
+ (pij*d2pij - dpij *dpij )/(pij*pij);
+ }
+ }
+ }
+ return -sumDL;
+ };
+};
+
+#endif
+
diff --git a/libs/phylogeny/likeDist2Codon.cpp b/libs/phylogeny/likeDist2Codon.cpp
new file mode 100644
index 0000000..202b191
--- /dev/null
+++ b/libs/phylogeny/likeDist2Codon.cpp
@@ -0,0 +1,25 @@
+// $RCSfile$ $Revision: 4699 $ $Date: 2008-08-14 17:19:46 +0300 (ה, 14 אוגוסט 2008) $
+
+#include "likeDist2Codon.h"
+#include "numRec.h"
+
+
+const MDOUBLE likeDist2Codon::giveDistance( const countTableComponentGam& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess) const {
+ //return giveDistanceNR(ctc,resL,initialGuess);
+ return giveDistanceBrent(ctc,resQ,initialGuess);
+}
+
+const MDOUBLE likeDist2Codon::giveDistanceBrent( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const {
+ const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+ MDOUBLE dist=-1.0;
+ resL = -dbrent(ax,bx,cx,
+ C_evalLikeDist2Codon(ctc,_spVec),
+ C_evalLikeDist_d_2Codon(ctc,_spVec),
+ tol,
+ &dist);
+ return dist;
+}
diff --git a/libs/phylogeny/likeDist2Codon.h b/libs/phylogeny/likeDist2Codon.h
new file mode 100644
index 0000000..54a7f70
--- /dev/null
+++ b/libs/phylogeny/likeDist2Codon.h
@@ -0,0 +1,110 @@
+// $Id: likeDist2Codon.h 4699 2008-08-14 14:19:46Z privmane $
+
+#ifndef ___LIKE_DIST_2_CODON_H
+#define ___LIKE_DIST_2_CODON_H
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "distanceMethod.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+#include "wYangModel.h"
+#include <cmath>
+using namespace std;
+
+class likeDist2Codon : public distanceMethod {
+public:
+ explicit likeDist2Codon(const vector<stochasticProcess>& spVec,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 2.0) : _spVec(spVec) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) {
+ }
+
+ likeDist2Codon (const likeDist2Codon& other): _spVec(other._spVec) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {};
+ virtual likeDist2Codon* clone() const {return new likeDist2Codon(*this);}
+
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ const MDOUBLE giveDistance( const countTableComponentGam& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+
+ // returns the estimated ML distance between the 2 sequences.
+ // if score is given, it will be the log-likelihood.
+ //!!!!!!!!!!!!!!TO DO
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const { return 1;}
+
+ const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const;
+
+private:
+ const vector<stochasticProcess>& _spVec;
+ const MDOUBLE _toll;
+ const MDOUBLE _maxPairwiseDistance;
+
+};
+
+
+class C_evalLikeDist2Codon{
+private:
+ const countTableComponentGam& _ctc;
+ const vector<stochasticProcess>& _spVec;
+public:
+ C_evalLikeDist2Codon(const countTableComponentGam& ctc,
+ const vector<stochasticProcess>& inS1):_ctc(ctc), _spVec(inS1) {};
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ const MDOUBLE epsilonPIJ = 1e-10;
+ MDOUBLE sumL=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int categor = 0; categor<_spVec.size(); ++categor) {
+ MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist);
+ if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
+ sumL += _ctc.getCounts(alph1,alph2,categor)*(log(pij)-log(_spVec[categor].freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
+ }
+ }
+ }
+ // LOG(5,<<"check bl="<<dist<<" gives "<<sumL<<endl);
+
+ return -sumL;
+ };
+};
+
+// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
+// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
+// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
+
+class C_evalLikeDist_d_2Codon{ // derivative.
+public:
+ C_evalLikeDist_d_2Codon(const countTableComponentGam& ctc,
+ const vector<stochasticProcess>& inS1) : _ctc(ctc), _spVec(inS1) {};
+private:
+ const countTableComponentGam& _ctc;
+ const vector<stochasticProcess>& _spVec;
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int categor = 0; categor<_spVec.size(); ++categor) {
+ MDOUBLE selection = static_cast<wYangModel*>(_spVec[categor].getPijAccelerator()->getReplacementModel())->getW();
+ MDOUBLE pij= _spVec[categor].Pij_t(alph1,alph2,dist);
+ MDOUBLE dpij = _spVec[categor].dPij_dt(alph1,alph2,dist);
+ sumDL+= _ctc.getCounts(alph1,alph2,categor)*dpij //*_sp.ratesProb(rateCategor) : removed CODE_RED
+ *selection/pij;
+ }
+ }
+ }
+ //LOG(5,<<"derivation = "<<-sumDL<<endl);
+ return -sumDL;
+ };
+};
+
+#endif
+
diff --git a/libs/phylogeny/likeDist2USSRV.cpp b/libs/phylogeny/likeDist2USSRV.cpp
new file mode 100644
index 0000000..7f7e946
--- /dev/null
+++ b/libs/phylogeny/likeDist2USSRV.cpp
@@ -0,0 +1,65 @@
+// $Id: likeDist2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
+
+
+#include "likeDist2USSRV.h"
+#include "numRec.h"
+
+
+const MDOUBLE likeDist2USSRV::giveDistance( const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess) const {
+ return giveDistanceBrent(ctcBase,ctcSSRV,resQ,initialGuess);
+}
+
+
+const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const {
+ const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+ LOG(12,<<"ax: " << ax << " bx: " << bx << " cx: " << cx << endl);
+ MDOUBLE dist=-1.0;
+ resL = -brent(ax,bx,cx,
+ C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
+ tol,
+ &dist);
+
+
+ LOG(9, <<"brent: resL = " << resL << " dist = " << dist << endl);
+
+ return dist;
+}
+
+// @@@@dbrent doesn't work. I should try fix this
+//const MDOUBLE likeDist2USSRV::giveDistanceBrent(const countTableComponentGam& ctcBase,
+// const countTableComponentHom& ctcSSRV,
+// MDOUBLE& resL,
+// const MDOUBLE initialGuess) const {
+// const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+// const MDOUBLE ax_debug=0,bx_debug=initialGuess,cx_debug=_maxPairwiseDistance,tol_debug=_toll;
+// MDOUBLE dist=-1.0;
+// // @@@@ debug OZ
+// MDOUBLE dist_debug=-1.0;
+// MDOUBLE resL_debug = -brent(ax_debug,bx_debug,cx_debug,
+// C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
+// tol_debug,
+// &dist_debug);
+//
+// resL = -dbrent(ax,bx,cx,
+// C_evalLikeDist2USSRV(ctcBase,ctcSSRV,_model),
+// C_evalLikeDist_d_2USSRV(ctcBase,ctcSSRV,_model),
+// tol,
+// &dist);
+//
+// MDOUBLE small = 0.001;
+// if ((resL < resL_debug - small) || (resL_debug < resL-small) ||
+// (dist < dist_debug - small) || (dist_debug < dist-small))
+// {
+// LOG(8,<<"likeDist2USSRV::giveDistanceBrent, different results when using brent and dbrent" << endl);
+// LOG(8,<<"dbrent resL = " << resL << " , brent resL = " << resL_debug << endl);
+// LOG(8,<<"dbrent dist = " << dist << " , brent dist = " << dist_debug << endl);
+// }
+// // end of debug OZ
+// return dist;
+//}
diff --git a/libs/phylogeny/likeDist2USSRV.h b/libs/phylogeny/likeDist2USSRV.h
new file mode 100644
index 0000000..7ba14e4
--- /dev/null
+++ b/libs/phylogeny/likeDist2USSRV.h
@@ -0,0 +1,152 @@
+// $Id: likeDist2USSRV.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___LIKE_DIST_2_USSRV_H
+#define ___LIKE_DIST_2_USSRV_H
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "distanceMethod.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+#include "ussrvModel.h"
+#include <cmath>
+using namespace std;
+
+class likeDist2USSRV : public distanceMethod {
+public:
+ explicit likeDist2USSRV(const ussrvModel& model,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0) : _model(model) ,_toll(toll),_maxPairwiseDistance(maxPairwiseDistance)
+ {}
+
+ likeDist2USSRV (const likeDist2USSRV& other): _model(other._model) ,_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance) {};
+ virtual likeDist2USSRV* clone() const {return new likeDist2USSRV(*this);}
+
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ const MDOUBLE giveDistance( const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+
+ // returns the estimated ML distance between the 2 sequences.
+ // if score is given, it will be the log-likelihood.
+ //!!!!!!!!!!!!!!TO DO @@@@
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const {
+ LOG(4,<<"likeDist2USSRV:giveDistance : This method should never be used" << endl);
+ return 1;}
+
+ const MDOUBLE giveDistanceBrent(const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ MDOUBLE& resL,
+ MDOUBLE initialGuess) const;
+
+private:
+ const ussrvModel& _model;
+ const MDOUBLE _toll;
+ const MDOUBLE _maxPairwiseDistance;
+
+};
+
+
+class C_evalLikeDist2USSRV{
+private:
+ const countTableComponentGam& _ctcBase;
+ const countTableComponentHom& _ctcSSRV;
+ const ussrvModel& _model;
+public:
+ C_evalLikeDist2USSRV(const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ const ussrvModel& model):_ctcBase(ctcBase),_ctcSSRV(ctcSSRV), _model(model) {};
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ const MDOUBLE epsilonPIJ = 1e-10;
+ MDOUBLE sumL=0.0;
+ MDOUBLE pij;
+ int categor, alph1,alph2;
+ // base model
+ const stochasticProcess& baseSp = _model.getBaseModel();
+
+ for (alph1=0; alph1 < _ctcBase.alphabetSize(); ++alph1){
+ for (alph2=0; alph2 < _ctcBase.alphabetSize(); ++alph2){
+ for (categor = 0; categor < baseSp.categories(); ++categor) {
+ MDOUBLE rate = baseSp.rates(categor);
+ pij= baseSp.Pij_t(alph1,alph2,dist*rate);
+ if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
+ sumL += _ctcBase.getCounts(alph1,alph2,categor)*(log(pij)-log(baseSp.freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
+
+ }
+ }
+ }
+
+ // ssrv model
+ const stochasticProcessSSRV& ssrvSp = _model.getSSRVmodel();
+ for (alph1=0; alph1 < _ctcSSRV.alphabetSize(); ++alph1){
+ for (alph2=0; alph2 < _ctcSSRV.alphabetSize(); ++alph2){
+ pij = ssrvSp.Pij_t(alph1,alph2,dist);
+ if (pij<epsilonPIJ) pij = epsilonPIJ;
+ sumL+=_ctcSSRV.getCounts(alph1,alph2)*(log(pij)-log(ssrvSp.freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
+ }
+ }
+ LOG(12,<<"check bl="<<dist<<" gives "<<sumL<<endl);
+
+ return -sumL;
+ }
+};
+
+// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
+// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
+// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
+
+// @@@@ doesn't work
+class C_evalLikeDist_d_2USSRV{ // derivative.
+public:
+ C_evalLikeDist_d_2USSRV(const countTableComponentGam& ctcBase,
+ const countTableComponentHom& ctcSSRV,
+ const ussrvModel& model) : _ctcBase(ctcBase), _ctcSSRV(ctcSSRV),_model(model) {};
+
+private:
+ const countTableComponentGam& _ctcBase;
+ const countTableComponentHom& _ctcSSRV;
+ const ussrvModel& _model;
+
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ MDOUBLE pij, dpij;
+ int categor, alph1,alph2;
+ // Base model
+ const stochasticProcess& spBase = _model.getBaseModel();
+ for (alph1=0; alph1 < _ctcBase.alphabetSize(); ++alph1){
+ for (alph2=0; alph2 < _ctcBase.alphabetSize(); ++alph2){
+ for (categor = 0; categor<_model.noOfCategor(); ++categor) {
+ MDOUBLE rate = spBase.rates(categor);
+ MDOUBLE pij= spBase.Pij_t(alph1,alph2,dist);
+ MDOUBLE dpij= spBase.dPij_dt(alph1,alph2,dist);
+
+ sumDL+= _ctcBase.getCounts(alph1,alph2,categor)*dpij
+ *rate/pij;
+ }
+ }
+ }
+ // SSRV model
+ const stochasticProcessSSRV& spSSRV = _model.getSSRVmodel();
+ for (alph1=0; alph1 < _ctcSSRV.alphabetSize(); ++alph1){
+ for (alph2=0; alph2 < _ctcSSRV.alphabetSize(); ++alph2){
+ pij= spSSRV.Pij_t(alph1,alph2,dist);
+ dpij= spSSRV.dPij_dt(alph1,alph2,dist);
+ sumDL+= _ctcSSRV.getCounts(alph1,alph2)*dpij/pij; //rate=1;
+ }
+ }
+
+ LOG(8,<<"derivation = "<<-sumDL<<endl);
+ return -sumDL;
+ };
+};
+
+#endif // ___LIKE_DIST_2_USSRV_H
+
diff --git a/libs/phylogeny/likeDistProp.cpp b/libs/phylogeny/likeDistProp.cpp
new file mode 100644
index 0000000..bbea47f
--- /dev/null
+++ b/libs/phylogeny/likeDistProp.cpp
@@ -0,0 +1,21 @@
+// $Id: likeDistProp.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "likeDistProp.h"
+#include "numRec.h"
+
+const MDOUBLE likeDistProp::giveDistance( const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resL) const {
+ const MDOUBLE MAXDISTANCE=2.0;
+// const MDOUBLE PRECISION_TOLL=0.001;
+ const MDOUBLE ax=0,bx=1.0,cx=MAXDISTANCE,tol=_toll;
+ MDOUBLE dist=-1.0;
+ resL = -dbrent(ax,bx,cx,
+ C_evallikeDistProp(ctc,_s1),
+ C_evallikeDistProp_d(ctc,_s1),
+ tol,
+ &dist);
+ return dist;
+}
+
+// the minus resL = -dbrent because C_evalDist return - value, because it is computing the min not the max...
+
diff --git a/libs/phylogeny/likeDistProp.h b/libs/phylogeny/likeDistProp.h
new file mode 100644
index 0000000..a1f077f
--- /dev/null
+++ b/libs/phylogeny/likeDistProp.h
@@ -0,0 +1,91 @@
+// $Id: likeDistProp.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___LIKE_DIST_PROP
+#define ___LIKE_DIST_PROP
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "stochasticProcess.h"
+#include <cmath>
+
+class likeDistProp {
+private:
+ const int _alphabetSize;
+ const vector<stochasticProcess>& _s1;
+ const MDOUBLE _toll;
+public:
+ const MDOUBLE giveDistance( const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resL) const;
+ explicit likeDistProp(const int alphabetSize,
+ const vector<stochasticProcess>& s1,
+ const MDOUBLE toll) : _alphabetSize(alphabetSize), _s1(s1) ,_toll(toll){
+ }
+};
+
+
+
+class C_evallikeDistProp_d{ // derivative.
+public:
+ C_evallikeDistProp_d(const vector<countTableComponentGam>& ctc,
+ const vector<stochasticProcess>& inS1) : _ctc(ctc), _sp(inS1) {};
+private:
+ const vector<countTableComponentGam>& _ctc;
+ const vector<stochasticProcess>& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ const MDOUBLE epsilonPIJ = 1e-10;
+ for (int gene=0; gene < _ctc.size(); ++ gene) {
+ for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) {
+ MDOUBLE rate = _sp[gene].rates(rateCategor);
+ MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate);
+ MDOUBLE dpij = _sp[gene].dPij_dt(alph1,alph2,dist*rate);
+ if (pij<epsilonPIJ) {
+ pij = epsilonPIJ;
+ dpij = epsilonPIJ;
+ }
+ sumDL+= _ctc[gene].getCounts(alph1,alph2,rateCategor)*dpij*_sp[gene].ratesProb(rateCategor)
+ *rate/pij;
+ }
+ }
+ }
+ }
+ return -sumDL;
+ }
+};
+
+
+
+class C_evallikeDistProp{
+private:
+ const vector<countTableComponentGam>& _ctc;
+ const vector<stochasticProcess>& _sp;
+public:
+ C_evallikeDistProp(const vector<countTableComponentGam>& ctc,
+ const vector<stochasticProcess>& inS1):_ctc(ctc), _sp(inS1) {};
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ const MDOUBLE epsilonPIJ = 1e-10;
+ MDOUBLE sumL=0.0;
+ for (int gene=0; gene < _ctc.size(); ++ gene) {
+ for (int alph1=0; alph1 < _ctc[gene].alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc[gene].alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp[gene].categories(); ++rateCategor) {
+ MDOUBLE rate = _sp[gene].rates(rateCategor);
+ MDOUBLE pij= _sp[gene].Pij_t(alph1,alph2,dist*rate);
+ if (pij<0) {
+ pij = epsilonPIJ;
+ }
+ sumL += _ctc[gene].getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp[gene].freq(alph2)))*_sp[gene].ratesProb(rateCategor);
+ }
+ }
+ }
+ }
+ return -sumL;
+ }
+};
+
+#endif
+
diff --git a/libs/phylogeny/likeDistfixRoot.cpp b/libs/phylogeny/likeDistfixRoot.cpp
new file mode 100644
index 0000000..1466c38
--- /dev/null
+++ b/libs/phylogeny/likeDistfixRoot.cpp
@@ -0,0 +1,378 @@
+// $Id: likeDistfixRoot.cpp 4470 2008-07-17 15:37:40Z cohenofi $
+
+#include "likeDistfixRoot.h"
+#include "numRec.h"
+#include "someUtil.h"
+
+stochasticProcess& likeDistfixRoot::getNonConstStochasticProcess() {
+ if (!_nonConstSpPtr) {
+ errorMsg::reportError("likeDistfixRoot::getNonConstStochasticProcess: Can't give non-const stochasticProcess because the stochasticProcess that was given to the constructor of this likeDistfixRoot object was const");
+ }
+ return *_nonConstSpPtr;
+}
+
+// ======================= functors needed for the computations =============
+
+class C_evalLikeDistDirect{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const vector<MDOUBLE> * _weights;
+public:
+ C_evalLikeDistDirect(const stochasticProcess& inS1,
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights): _sp(inS1),_s1(s1),_s2(s2),_weights(weights) {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ return -likeDistfixRoot::evalLikelihoodForDistance(_sp,_s1,_s2,dist,_weights);
+ }
+};
+
+MDOUBLE likeDistfixRoot::evalLikelihoodForDistance(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dist,
+ const vector<MDOUBLE> * weights) {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ for (int pos=0; pos < s1.seqLen(); ++pos){
+ if (s1.isUnknown(pos) && s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ if (s1.isUnknown(pos) && s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = sp.freq(s2[pos]);
+ } else if (s2.isUnknown(pos) && s1.isSpecific(pos)) {
+ posLikelihood = sp.freq(s1[pos]);
+ } else {
+ for (int rateCategor = 0; rateCategor<sp.categories(); ++rateCategor) {
+ MDOUBLE rate = sp.rates(rateCategor);
+ MDOUBLE pij= 0.0;
+ if (s1.isSpecific(pos) && s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
+ pij= sp.Pij_t(s1[pos],s2[pos],dist*rate);
+ posLikelihood += pij * sp.freq(s1[pos])*sp.ratesProb(rateCategor);
+ } else {// this is the most complicated case, when you have
+ // combinations of letters, for example B in one
+ // sequence and ? in the other.
+ for (int iS1 =0; iS1< sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< sp.alphabetSize(); ++iS2) {
+ if ((s1.getAlphabet()->relations(s1[pos],iS1)) &&
+ (s2.getAlphabet()->relations(s2[pos],iS2))) {
+ posLikelihood += sp.freq(iS1)*sp.Pij_t(iS1,iS2,dist*rate)*sp.ratesProb(rateCategor);
+ }
+ }
+ }
+ }
+ } // end of for on the rates
+ }
+ assert(posLikelihood!=0.0);
+ sumL += log(posLikelihood)*(weights ? (*weights)[pos]:1.0);
+ }
+ return sumL;
+};
+
+class C_evalLikeDistDirect_d{ // derivative.
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const vector<MDOUBLE> * _weights;
+public:
+ C_evalLikeDistDirect_d(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights): _sp(sp),_s1(s1),_s2(s2),_weights(weights) {};
+
+ MDOUBLE operator() (MDOUBLE dist) const {
+ MDOUBLE sumL=0.0; // sum of log likelihoods
+ MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
+ MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ posLikelihood_d = 0.0;
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ posLikelihood_d =0.0;
+ } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ posLikelihood_d =0.0;
+ } else {
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+ MDOUBLE pij= 0.0;
+ MDOUBLE dpij=0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
+ //simple case, where AA i is changing to AA j
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate;
+ MDOUBLE tmp = _sp.freq(_s1[pos])*_sp.ratesProb(rateCategor);
+ posLikelihood += pij *tmp;
+ posLikelihood_d += dpij*tmp;
+ } else {// this is the most complicated case, when you have combinations of letters,
+ // for example B in one sequence and ? in the other.
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ MDOUBLE exp = _sp.freq(iS1)*_sp.ratesProb(rateCategor);
+ posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
+ posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate;
+ }
+ }
+ }
+ }
+ }// end of for rate categories
+ }
+ assert(posLikelihood>0.0);
+ sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
+ }
+ return -sumL;
+ };
+};
+
+
+// THIS FUNCTION EVALUATES THE LIKELIHOOD GIVEN THE DISTANCE
+MDOUBLE likeDistfixRoot::evalLogLikelihoodGivenDistance(const sequence& s1, const sequence& s2,
+ const MDOUBLE dis2evaluate) {
+ C_evalLikeDistDirect Cev(_sp,s1,s2,NULL);
+ return -Cev.operator ()(dis2evaluate);
+}
+
+//MDOUBLE likeDistfixRoot::giveDistanceThroughCTC( const sequence& s1,
+// const sequence& s2,
+// const vector<MDOUBLE> * weights,
+// MDOUBLE* score) const {
+// // only in the case of homogenous model - work through pairwise EM like
+// countTableComponentGam ctc;
+// if (_sp.categories() != 1) {
+// errorMsg::reportError("this function only work for homogenous model.");
+// }
+// ctc.countTableComponentAllocatePlace(s1.getAlphabet()->size(),1);
+// for (int i=0; i<s1.seqLen(); ++i) {
+// ctc.addToCounts(s1[i],s2[i],0,weights?(*weights)[i]:1.0);
+// }
+// MDOUBLE resL =0;
+// return giveDistance(ctc,resL);
+//}
+
+const MDOUBLE likeDistfixRoot::giveDistance(const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess) const {
+ //return giveDistanceNR(ctc,resL,initialGuess);
+ return giveDistanceBrent(ctc,resQ,initialGuess);
+}
+
+const MDOUBLE likeDistfixRoot::giveDistanceBrent(const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess) const {
+ const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+ MDOUBLE dist=-1.0;
+ resL = -dbrent(ax,bx,cx,
+ C_evallikeDistfixRoot(ctc,_sp,_unObservableData_p),
+ C_evalLikeDist_dGLfixRoot(ctc,_sp),
+ tol,
+ &dist);
+ return dist;
+}
+
+template <typename regF, typename dF>
+MDOUBLE myNRmethod(MDOUBLE low, MDOUBLE current, MDOUBLE high, regF f,
+ dF df, const MDOUBLE tol, const int max_it, int & zeroFound) { // finding zero of a function.
+ zeroFound = 1;
+ MDOUBLE currentF = f(current);
+ if (fabs(currentF)<tol) return current;
+ MDOUBLE lowF = f(low);
+ MDOUBLE highF = f(high);
+ if (((lowF>0) && (highF>0)) || ((lowF<0) && (highF<0))) {// unable to find a zero
+ zeroFound = 0;
+ return 0;
+ }
+ if (lowF>0) {// fixing things to be in the right order.
+ MDOUBLE tmp = low;
+ low = high;
+ high = tmp;
+ tmp = lowF;
+ lowF = highF;
+ highF = tmp;
+ }
+ if (currentF>0) {
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ } // now the zero is between current and either low or high.
+
+ MDOUBLE currentIntervalSize = fabs(low-high);
+ MDOUBLE oldIntervalSize = currentIntervalSize;
+
+ // we have to decide if we do NR or devide the interval by two:
+ // we want to check if the next NR step is within our interval
+ // recall the the next NR guess is Xn+1 = Xn - f(Xn) / f(Xn+1)
+ // So we want (current - currentF/currentDF) to be between low and high
+ for (int i=0 ; i < max_it; ++i) {
+ MDOUBLE currentDF = df(current);
+ MDOUBLE newGuess = current - currentF/currentDF;
+ if ((newGuess<low && newGuess> high) || (newGuess>low && newGuess< high)) {
+ // in this case we should do a NR step.
+ current = newGuess;
+ currentF = f(current);
+ if (currentF > 0){
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ }
+
+ oldIntervalSize = currentIntervalSize;
+ currentIntervalSize =fabs (high-low);
+ if (currentIntervalSize < tol) {
+ return current;
+ }
+ //LOG(5,<<"NR: low= "<<low<<" high= "<<high<<endl);
+ }
+ else { // bisection
+ oldIntervalSize = currentIntervalSize;
+ currentIntervalSize /= 2.0;
+ current = (low+high)/2.0;
+ currentF = f(current);
+ if (currentF > 0){
+ high = current;
+ highF = currentF;
+ } else {
+ low = current;
+ lowF = currentF;
+ }
+ //LOG(5,<<"BIS: low= "<<low<<" high= "<<high<<endl);
+ if (currentIntervalSize < tol) {
+ return current;
+ }
+
+ }
+ }
+ errorMsg::reportError("to many iterations in myNR function");
+ return 0;
+}
+
+//const MDOUBLE likeDistfixRoot::giveDistanceNR( const countTableComponentGam& ctc,
+// MDOUBLE& resL,
+// const MDOUBLE initialGuess) const {
+// //change bx so that it will be the current branch length!
+// const MDOUBLE ax=0,bx=initialGuess,cx=_maxPairwiseDistance,tol=_toll;
+// // LOG(5,<<"===================================================\n");
+// MDOUBLE dist=-1.0;
+// int zeroFound = 0;
+// dist = myNRmethod(ax,bx,cx,
+// C_evalLikeDist_dGL(ctc,_sp),
+// C_evalLikeDist_d2GL(ctc,_sp),
+// tol,
+// 100,
+// zeroFound);// max it for NR;
+// if (zeroFound == 0) {// there was an error finding a zero
+// dist = bx;
+// }
+//
+// return dist;
+//}
+
+
+
+
+
+
+
+
+
+
+
+/*
+
+
+
+
+const MDOUBLE likeDistfixRoot::giveDistance( // the NR version.
+ const countTableComponentGam& ctc,
+ MDOUBLE& resL) const {
+ LOG(5,<<"=============="<<endl);
+ MDOUBLE oldGuess=0.05; // move to parameters.
+ if (oldGuess<0) oldGuess=0.05; // move up.
+ int max_it = 100;
+ MDOUBLE oldDist =0;
+ MDOUBLE currentDist =oldGuess;
+ MDOUBLE newDer =VERYBIG;
+ MDOUBLE oldDer =VERYBIG;
+ //const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance,tol=_toll;
+ for (int i=0; i < max_it; ++i){
+ MDOUBLE sumDL=0.0;
+ MDOUBLE sumDL2=0.0;
+ for (int alph1=0; alph1 < ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_s1.categories(); ++rateCategor) {
+ MDOUBLE rate = _s1.rates(rateCategor);
+
+ MDOUBLE pij= _s1.Pij_t(alph1,alph2,currentDist*rate);
+ MDOUBLE dpij = _s1.dPij_dt(alph1,alph2,currentDist*rate);
+ MDOUBLE dpij2 = _s1.d2Pij_dt2(alph1,alph2,currentDist*rate);
+ if (pij==0) {
+ pij = 0.000000001;
+ dpij = 0.000000001;
+ }
+ sumDL+= ctc.getCounts(alph1,alph2,rateCategor)*dpij
+ *rate/pij;
+ sumDL2+= ctc.getCounts(alph1,alph2,rateCategor)*rate*(pij*dpij2-dpij *dpij)
+ /(pij*pij);
+ }
+ }
+ }
+ oldDer = newDer;
+ newDer = sumDL;
+ LOG(5,<<"\ndistance = "<<currentDist<<endl);
+ LOG(5,<<"derivation = "<<sumDL<<endl);
+ LOG(5,<<"sec derivation = "<<sumDL2<<endl);
+ oldDist = currentDist;
+ if ((fabs(newDer) < fabs(oldDer)) && (sumDL2 < 0)) {
+ currentDist = currentDist - newDer/sumDL2;
+ }
+ else {
+ currentDist = currentDist / 2;
+ }
+ MDOUBLE epsilonForDeriv = 0.001;// move up
+ if (fabs(newDer) < epsilonForDeriv) break;
+
+ }
+
+ return currentDist;
+}*/
+
+const MDOUBLE likeDistfixRoot::giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const {
+ const MDOUBLE ax=0, cx=_maxPairwiseDistance,tol=_toll;
+ MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/*=1.0*/;
+ if (!(bx==bx)) bx = 1.0; // safety check that the JC distance did not return nan (not a number)
+ MDOUBLE dist=-1.0;
+ MDOUBLE resL = -dbrent(ax,bx,cx,
+ C_evalLikeDistDirect(_sp,s1,s2,weights),
+ C_evalLikeDistDirect_d(_sp,s1,s2,weights),
+ tol,
+ &dist);
+ if (score) *score = resL;
+ return dist;
+}
+
+const MDOUBLE likeDistfixRoot::giveLikelihood(const sequence& s1,
+ const sequence& s2,
+ MDOUBLE distance,
+ const vector<MDOUBLE> * weights) const
+{
+
+
+ C_evalLikeDistDirect evalDis(_sp,s1,s2,weights);
+ return -evalDis(distance);
+
+}
diff --git a/libs/phylogeny/likeDistfixRoot.h b/libs/phylogeny/likeDistfixRoot.h
new file mode 100644
index 0000000..bf63959
--- /dev/null
+++ b/libs/phylogeny/likeDistfixRoot.h
@@ -0,0 +1,211 @@
+// $Id: likeDistfixRoot.h 4470 2008-07-17 15:37:40Z cohenofi $
+
+#ifndef ___LIKE_DIST_H_GL_FIX_ROOT
+#define ___LIKE_DIST_H_GL_FIX_ROOT
+
+#include "definitions.h"
+#include "countTableComponent.h"
+#include "distanceMethod.h"
+#include "stochasticProcess.h"
+#include "logFile.h"
+#include "jcDistance.h"
+#include "sequenceContainer.h"
+#include "unObservableData.h"
+#include <cmath>
+using namespace std;
+
+class likeDistfixRoot : public distanceMethod {
+public:
+ // WARNING: the stochasticProcess is NOT copied. The same object is used
+ explicit likeDistfixRoot(const stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0,
+ unObservableData* unObservableData_p=NULL)
+ : _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_unObservableData_p(unObservableData_p) {}
+
+ likeDistfixRoot(const likeDistfixRoot& other)
+ : _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_jcDist(other._jcDist) {}
+
+ virtual likeDistfixRoot* clone() const {return new likeDistfixRoot(*this);}
+ // This constructor allows non-const stochasticProcess so that likeDistfixRoot will be able to change alpha, etc.
+ explicit likeDistfixRoot(stochasticProcess& sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0)
+ : _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance) {}
+
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ const MDOUBLE giveDistance(const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resQ,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+ // given two sequences, it evaluates the log likelihood.
+ MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dis2evaluate);
+
+ // returns the estimated ML distance between the 2 sequences.
+ // if score is given, it will be the log-likelihood.
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+
+ // this function creates a countTableComponent (ctc) from the two sequences.
+ // it then computes the distance from this ctc.
+ // THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
+ // TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
+ // NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
+ //MDOUBLE giveDistanceThroughCTC(const sequence& s1,
+ // const sequence& s2,
+ // const vector<MDOUBLE> * weights,
+ // MDOUBLE* score=NULL) const;
+
+ const MDOUBLE giveLikelihood(const sequence& s1,
+ const sequence& s2,
+ MDOUBLE distance,
+ const vector<MDOUBLE> * weights=NULL) const;
+
+ // return the stochasticProcess
+ const stochasticProcess& getStochasticProcess() const {return _sp;}
+ stochasticProcess& getNonConstStochasticProcess();
+ bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
+ MDOUBLE getToll() const {return _toll;}
+ MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
+
+protected:
+ const stochasticProcess &_sp;
+ stochasticProcess *_nonConstSpPtr;
+ const MDOUBLE _toll;
+ const MDOUBLE _maxPairwiseDistance;
+ jcDistance _jcDist;
+ unObservableData* _unObservableData_p;
+
+private:
+ const MDOUBLE giveDistanceBrent( const vector<countTableComponentGam>& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+ const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
+ MDOUBLE& resL,
+ const MDOUBLE initialGuess= 0.03) const; // initial guess
+
+
+
+public:
+ static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE dist,
+ const vector<MDOUBLE> * weights=NULL);
+
+};
+
+
+class C_evallikeDistfixRoot{
+private:
+ const vector<countTableComponentGam>& _ctc;
+ const stochasticProcess& _sp;
+ unObservableData* _unObservableData_p;
+public:
+ C_evallikeDistfixRoot(const vector<countTableComponentGam>& ctc, // ctc[letterAtRoot][rate][alph][alph]
+ const stochasticProcess& inS1, unObservableData* unObservableData_p=NULL)
+ :_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
+
+ MDOUBLE operator() (MDOUBLE dist)
+ {
+ //if(_plogLforMissingData){
+ // sequenceContainer scZero;
+ // gainLossAlphabet alph;
+ // scZero.startZeroSequenceContainerGL(_sc, alph);
+ // *_plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,scZero,*_sp);
+ //}
+ const MDOUBLE epsilonPIJ = 1e-10;
+ MDOUBLE sumL=0.0;
+ MDOUBLE sumLtemp=0.0;
+ for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
+ for (int alph1=0; alph1 < _ctc[0].alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc[0].alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
+ sumLtemp = _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp.freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
+ //sumLtemp = _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp.freq(letterAtRoot)));//*_sp.ratesProb(rateCategor);// removed.
+ //if(_unObservableData_p)
+ // sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); //???
+ sumL += sumLtemp;
+ }
+ }
+ }
+ }
+ if(_unObservableData_p)
+ sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); //???
+ LOG(12,<<"check bl="<<dist<<" gives "<<sumL<<endl);
+
+ return -sumL;
+ };
+};
+
+// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
+// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
+// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
+
+class C_evalLikeDist_dGLfixRoot{ // derivative.
+public:
+ C_evalLikeDist_dGLfixRoot(const vector<countTableComponentGam>& ctc,
+ const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
+private:
+ const vector<countTableComponentGam>& _ctc;
+ const stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
+ for (int alph1=0; alph1 < _ctc[0].alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc[0].alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
+ sumDL+= _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)*dpij //*_sp.ratesProb(rateCategor) : removed CODE_RED
+ *rate/pij;
+ }
+ }
+ }//cerr<<"derivation = "<<-sumDL<<endl;
+ }
+ return -sumDL;
+ };
+};
+
+class C_evalLikeDist_d2GLfixRoot{ // second derivative.
+public:
+ C_evalLikeDist_d2GLfixRoot(const countTableComponentGam& ctc,
+ const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
+private:
+ const countTableComponentGam& _ctc;
+ const stochasticProcess& _sp;
+public:
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumDL=0.0;
+ for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
+ for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+
+ MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
+ MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
+ MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
+ sumDL+= rate*_ctc.getCounts(alph1,alph2,rateCategor)*
+ (pij*d2pij - dpij *dpij )/(pij*pij);
+ }
+ }
+ }
+ return -sumDL;
+ };
+};
+
+#endif
+
diff --git a/libs/phylogeny/likelihoodComputation.cpp b/libs/phylogeny/likelihoodComputation.cpp
new file mode 100644
index 0000000..2715947
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation.cpp
@@ -0,0 +1,440 @@
+// $Id: likelihoodComputation.cpp 5058 2008-10-19 15:55:24Z cohenofi $
+
+#include "definitions.h"
+#include "tree.h"
+#include "computeUpAlg.h"
+#include "likelihoodComputation.h"
+#include "gammaUtilities.h"
+#include <cmath>
+#include <cassert>
+
+
+using namespace likelihoodComputation;
+
+/********************************************************************************************
+likelihood computation - full data (1)
+*********************************************************************************************/
+MDOUBLE likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * const weights,
+ unObservableData *unObservableData_p)
+{
+ computePijGam pi;
+ pi.fillPij(et,sp);
+ MDOUBLE res =0;
+ doubleRep LofPos;
+ int k;
+ for (k=0; k < sc.seqLen(); ++k) {
+ LofPos = likelihoodComputation::getLofPos(k,//pos,
+ et, //const tree&
+ sc, // sequenceContainer& sc,
+ pi, //const computePijGam& ,
+ sp
+ /*unObservableData_p*/);
+ res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& );
+ }
+ if(unObservableData_p){ // conditioning on observability for allPos & allRateCat
+ res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ return res;
+}
+
+/********************************************************************************************
+likelihood computation - per pos (1.1)
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPos(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp,
+ unObservableData *unObservableData_p)
+{
+ // with the pi already computed.
+ doubleRep tmp=0;
+ int numOfCat = sp.categories();
+ VdoubleRep tmpPerCat;
+ tmpPerCat.resize(numOfCat);
+
+ for (int i=0; i < sp.categories();++i) {
+ tmpPerCat[i] = getLofPos(pos,et,sc,pi[i],sp);
+// ver1 - fix likelihoodForEachCat by LforMissingDataPerCat - Wrong version...
+ //if(pLforMissingDataPerCat){
+ // tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]);
+ //}
+ tmp += tmpPerCat[i]*sp.ratesProb(i);
+ }
+// ver2 - fix likelihoodForEachCat by LforMissingDataAll
+ if(unObservableData_p){ // conditioning on observability for all rateCat.
+ tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ return tmp;
+}
+
+/********************************************************************************************
+likelihood computation - per pos, per cat (1.1.1)
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPos(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijHom& pi,
+ const stochasticProcess& sp,
+ unObservableData *unObservableData_p)
+{
+ computeUpAlg cup;
+ suffStatGlobalHomPos ssc;
+ cup.fillComputeUp(et,sc,pos,pi,ssc);
+
+ doubleRep tmp = 0.0;
+ for (int let = 0; let < sp.alphabetSize(); ++let) {
+ doubleRep tmpLcat=
+ ssc.get(et.getRoot()->id(),let)*
+ sp.freq(let);
+ if (!DBIG_EQUAL(convert(tmpLcat), 0.0))
+ {
+ cerr<<"tmpLcat = "<<tmpLcat<<endl;
+ errorMsg::reportError("error in likelihoodComputation::getLofPos. likelihood is smaller than zero");
+ }
+ //assert(tmpLcat>=0.0);
+ tmp+=tmpLcat;
+ }
+// cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP
+ if (!(tmp>0.0)){
+ LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<<endl;);
+ LOG(5,<<"pos = "<< pos <<endl;);
+ tmp = EPSILON;
+ //errorMsg::reportError("likelihoodComputation::getLofPos: likelihood of pos was zero!",1);
+ }
+
+ if(unObservableData_p){ // conditioning on observability
+ tmp = tmp / (1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ return tmp;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledHom(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& ssc){
+// using the pij of stochastic process rather than pre computed pij's...
+ if (ssc.size()==0) {errorMsg::reportError("error in function likelihoodComputation::getLofPosWhenUpIsFilled");}
+ doubleRep tmp = 0.0;
+ for (int let = 0; let < sp.alphabetSize(); ++let) {
+ doubleRep tmpLcat=
+ ssc.get(et.getRoot()->id(),let)*
+ sp.freq(let);
+ tmp+=tmpLcat;
+ }
+ return tmp;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPosHomModelEachSiteDifferentRate(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp){
+// using the pij of stochastic process rather than pre computed pij's...
+ if (sp.categories()!=1) {
+ errorMsg::reportError("num of categories in function getLofPosHomModel must be one");
+ }
+ computeUpAlg cup;
+ suffStatGlobalHomPos ssc;
+ computePijHom cpij;
+ cpij.fillPij(et,sp);
+ cup.fillComputeUp(et,sc,pos,cpij,ssc);
+ return getProbOfPosWhenUpIsFilledHom(pos,et,sc,sp,ssc);
+}
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPosGamModelEachSiteDifferentRate(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp){
+ computePijGam pi;
+ pi.fillPij(et,sp);
+ return getLofPos(pos,et,sc,pi,sp);
+}
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPos(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const MDOUBLE gRate){ // when there is a global rate for this position
+// using the pij of stochastic process rather than pre computed pij's...
+ computeUpAlg cup;
+ suffStatGlobalHomPos ssc;
+ cup.fillComputeUpSpecificGlobalRate(et,sc,pos,sp,ssc,gRate);
+
+ doubleRep tmp = 0.0;
+ for (int let = 0; let < sp.alphabetSize(); ++let) {
+ doubleRep tmpLcat=
+ ssc.get(et.getRoot()->id(),let)*
+ sp.freq(let);;
+ assert(tmpLcat>=0.0);
+ tmp+=tmpLcat;
+ }
+ return tmp;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getLofPosAndPosteriorOfRates(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp,
+ VdoubleRep& postrior){
+// with the pi already computed.
+ doubleRep tmp=0;
+ for (int i=0; i < sp.categories();++i) {
+ postrior[i]=getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i);
+ tmp += postrior[i];
+ }
+ for (int i=0; i < sp.categories();++i)
+ postrior[i] /= tmp;
+ return tmp;
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ const Vdouble * weights) {
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep tmp=0;
+ for (int categor = 0; categor < sp.categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.getAlphabet()->size(); ++let) {
+ veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ tmp += veryTmp*sp.ratesProb(categor);
+ }
+ like += log(tmp) * (weights?(*weights)[pos]:1);
+ }
+ return like;
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputation::getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights,
+ unObservableData* unObservableData_p) {
+ posLike.clear();
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep tmp=0;
+ for (int categor = 0; categor < sp.categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ tmp += veryTmp*sp.ratesProb(categor);
+ }
+ assert(tmp>0.0);
+ if(unObservableData_p){
+ tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ like += log(tmp) * (weights?(*weights)[pos]:1);
+ posLike.push_back(tmp);
+ }
+ return like;
+}
+/********************************************************************************************
+ fill the posteriorLike matrix with each position posterior rate (p(r|D))
+ but without the weights.
+*********************************************************************************************/
+MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ VVdoubleRep& posteriorLike,
+ const Vdouble * weights) {
+ suffStatGlobalGam cup;
+ computeUpAlg cupAlg;
+ computePijGam cpGam;
+ cpGam.fillPij(et,sp);
+ cupAlg.fillComputeUp(et,sc,cpGam,cup);
+ return getPosteriorOfRates(et,sc,sp,cup,posteriorLike,weights);
+}
+
+// fill the posteriorLike matrix with each position posterior rate (p(r|D))
+// but without the weights.
+MDOUBLE likelihoodComputation::getPosteriorOfRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VVdoubleRep& posteriorLike,
+ const Vdouble * weights) {
+ posteriorLike.clear();
+ posteriorLike.resize(sc.seqLen());
+ for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories());
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep posProb=0;
+ for (int categor = 0; categor < sp.categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.getAlphabet()->size(); ++let) {
+ veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ posProb += veryTmp*sp.ratesProb(categor);
+ posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor);
+ }
+ like += log(posProb) * (weights?(*weights)[pos]:1);
+ for (int categor1 = 0; categor1 < sp.categories(); ++categor1) {
+ posteriorLike[pos][categor1] /= posProb;
+ }
+ }
+
+ return like;
+}
+
+
+// fill the posteriorLike matrix with each position posterior rate (p(r|D))
+// and the LLPP, but without the weights.
+MDOUBLE likelihoodComputation::getPosteriorOfRatesAndLLPP(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VVdoubleRep& posteriorLike,
+ VdoubleRep& LLPerPos,
+ const Vdouble * weights) {
+ posteriorLike.clear();
+ posteriorLike.resize(sc.seqLen());
+ for (int z=0; z < posteriorLike.size(); ++z) posteriorLike[z].resize(sp.categories());
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ LLPerPos[pos] = 0.0;
+ for (int categor = 0; categor < sp.categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.getAlphabet()->size(); ++let) {
+ veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ LLPerPos[pos] += veryTmp*sp.ratesProb(categor);
+ posteriorLike[pos][categor] += veryTmp*sp.ratesProb(categor);
+ }
+ like += log(LLPerPos[pos]) * (weights?(*weights)[pos]:1);
+ for (int categor1 = 0; categor1 < sp.categories(); ++categor1) {
+ posteriorLike[pos][categor1] /= LLPerPos[pos];
+ }
+ }
+
+ return like;
+}
+
+// this function forces non gamma computation of likelihoods from up.
+// i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma.
+MDOUBLE likelihoodComputation::getTreeLikelihoodFromUpSpecifcRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalHom& cup,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights)
+{
+ posLike.clear();
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos)
+ {
+ doubleRep tmp=0;
+ for (int let =0; let < sc.getAlphabet()->size(); ++let) {
+ tmp += cup.get(pos, et.getRoot()->id(), let) * sp.freq(let);
+ }
+
+ assert(tmp > 0);
+ like += log(tmp) * (weights?(*weights)[pos]:1);
+ posLike.push_back(tmp);
+ }
+ return like;
+}
+/********************************************************************************************
+*********************************************************************************************/
+doubleRep likelihoodComputation::getProbOfPosWhenUpIsFilledGam(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGamPos& cup) {
+ doubleRep tmp=0;
+ for (int categor = 0; categor < sp.categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ tmp += veryTmp*sp.ratesProb(categor);
+ }
+ assert(tmp>0.0);
+ return tmp;
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputation::computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et,
+ const stochasticProcess &sp, Vdouble &LLPerPos) {
+ MDOUBLE treeLogLikelihood = 0.0;
+ computePijGam cpij;
+ cpij.fillPij(et, sp);
+ LLPerPos.resize(sc.seqLen());
+ doubleRep LofPos;
+ for (int pos=0; pos < sc.seqLen() ;++pos) {
+ LofPos = likelihoodComputation::getLofPos(pos, et, sc, cpij, sp);
+ MDOUBLE tmpLL = log(LofPos);
+ treeLogLikelihood += tmpLL;
+ LLPerPos[pos] = tmpLL;
+ }
+ return treeLogLikelihood;
+}
+/********************************************************************************************
+likelihood for each category - used for unObservableData
+*********************************************************************************************/
+Vdouble likelihoodComputation::getLofPosPerCat(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp)
+{
+// with the pi already computed.
+ int numOfCat = sp.categories();
+ Vdouble tmp;
+ tmp.resize(numOfCat);
+ for (int i=0; i < numOfCat;++i) {
+ tmp[i] = convert(getLofPos(pos,et,sc,pi[i],sp))*sp.ratesProb(i);
+ }
+ return tmp;
+}
+
+//doubleRep likelihoodComputation::getLofPos(const int pos,
+// const tree& et,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp){
+//// with the pi already computed.
+// doubleRep tmp=0;
+// for (int i=0; i < sp.categories();++i) {
+// tmp += getLofPos(pos,et,sc,pi[i],sp)*sp.ratesProb(i);
+// }
+// return tmp;
+//}
+
+// MDOUBLE likelihoodComputation::getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha,
+// const Vdouble originalBounderi,
+// const VVdouble& posteriorLike,
+// const VdoubleRep& LLPP,
+// const Vdouble* weights)
+// {
+// int nCategories = originalBounderi.size()-1;
+// Vdouble rateWeights; rateWeights.resize(nCategories);
+// for (int i=0; i<n; ++i)
+// rateWeights[i]=(gammp(alpha, originalBounderi[i+1]*alpha)-gammp(alpha, originalBounderi[i]*alpha))*nCategories;
+
+// }
diff --git a/libs/phylogeny/likelihoodComputation.h b/libs/phylogeny/likelihoodComputation.h
new file mode 100644
index 0000000..451c737
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation.h
@@ -0,0 +1,166 @@
+// $Id: likelihoodComputation.h 5058 2008-10-19 15:55:24Z cohenofi $
+
+#ifndef ___LIKELIHOOD_COMPUTATION
+#define ___LIKELIHOOD_COMPUTATION
+
+#include "definitions.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+#include "unObservableData.h"
+
+
+namespace likelihoodComputation {
+// likelihood computation - full data (1)
+ MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const Vdouble * const weights = NULL,
+ unObservableData *unObservableData_p=NULL);
+// likelihood computation - per pos (1.1)
+ doubleRep getLofPos(const int pos, // this function is used
+ const tree& et, // when gamma, and the br-len
+ const sequenceContainer& sc, // are the same for all pos.
+ const computePijGam& pi,
+ const stochasticProcess& sp,
+ unObservableData *unObservableData_p=NULL);
+// likelihood computation - per pos, per cat (1.1.1)
+ doubleRep getLofPos(const int pos, // this function is used
+ const tree& et, // when the br-len
+ const sequenceContainer& sc, // are the same for all
+ const computePijHom& pi, // positions.
+ const stochasticProcess& sp,
+ unObservableData *unObservableData_p=NULL);
+
+
+
+ // used when the likelihood given each category is needed, not only the sum
+ Vdouble getLofPosPerCat(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp);
+ // used to fill the likelihood for the unobservable for each category
+ doubleRep getLofPos(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp,
+ Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled
+
+
+
+
+// --------------------------------------------------------------------------------
+// this function should be used only when the branch lengths are not the same for
+// all positions. Otherwise, computePijHom should be calculated once,
+// and be used for all calls. In this function, computePijHom is being computed for
+// each position.
+doubleRep getLofPosHomModelEachSiteDifferentRate(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp);
+// ---------------------------------------------------------------------------------
+
+
+// --------------------------------------------------------------------------------
+// this function should be used only when the branch lengths are not the same for
+// all positions. Otherwise, computePijHom should be calculated once,
+// and be used for all calls. In this function, computePijHom is being computed for
+// each position.
+doubleRep getLofPosGamModelEachSiteDifferentRate(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp);
+// --------------------------------------------------------------------------------
+
+
+ doubleRep getLofPos(const int pos, // with a site specific rate.
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const MDOUBLE gRate);
+ doubleRep getProbOfPosWhenUpIsFilledHom(const int pos, // to be used for homogenous model
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalHomPos& ssc);
+ doubleRep getProbOfPosWhenUpIsFilledGam(const int pos, // to be used for Gamma model.
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGamPos& cup);
+
+ doubleRep getLofPosAndPosteriorOfRates(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const computePijGam& pi,
+ const stochasticProcess& sp,
+ VdoubleRep& postrior);
+
+ MDOUBLE getTreeLikelihoodFromUp(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ const Vdouble * weights =0 );
+
+ MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights=0,
+ unObservableData* unObservableData_p=NULL);
+ // fill this vector with each position posterior rate (p(r|D))
+ // but without the weights.
+ // the weights are used only because we return the likelihood
+ // (this takes these weights into account).
+ MDOUBLE getPosteriorOfRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VVdoubleRep& posteriorLike,
+ const Vdouble * weights = NULL);
+
+ MDOUBLE getPosteriorOfRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ VVdoubleRep& posteriorLike,
+ const Vdouble * weights = NULL);
+
+ // fill the posteriorLike matrix with each position posterior rate (p(r|D))
+ // and the LLPP, but without the weights.
+ MDOUBLE getPosteriorOfRatesAndLLPP(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ VVdoubleRep& posteriorLike,
+ VdoubleRep& LLPerPos,
+ const Vdouble * weights=NULL);
+ // From Itay M.
+ // this function forces non gamma computation of likelihoods from up.
+ // i.e., even if the stochastic process is really gamma - the likelihood is computed as if there's no gamma.
+ MDOUBLE getTreeLikelihoodFromUpSpecifcRates(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalHom& cup,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights = NULL);
+
+ // added from main semphy on 23.5.2005 (eyal privman + matan ninio).
+ MDOUBLE computeLikelihoodAndLikelihoodPerPosition(const sequenceContainer &sc, const tree &et,
+ const stochasticProcess &sp, Vdouble &LLPerPos);
+ MDOUBLE getTreeLikelihoodFromPosteriorAndAlpha(const MDOUBLE alpha,
+ const Vdouble originalBounderi,
+ const VVdouble& posteriorLike,
+ const VdoubleRep& LLPP,
+ const Vdouble* weights);
+
+
+
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/likelihoodComputation2Codon.cpp b/libs/phylogeny/likelihoodComputation2Codon.cpp
new file mode 100644
index 0000000..06af11a
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation2Codon.cpp
@@ -0,0 +1,94 @@
+#include "likelihoodComputation2Codon.h"
+
+#include "wYangModel.h"
+#include "definitions.h"
+#include "tree.h"
+#include "computeUpAlg.h"
+#include "likelihoodComputation.h"
+
+#include <cmath>
+#include <cassert>
+
+using namespace likelihoodComputation2Codon;
+
+
+
+MDOUBLE likelihoodComputation2Codon::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc,
+ const vector<stochasticProcess>& spVec,const distribution * distr){
+ computePijGam pi;
+ pi._V.resize(distr->categories());
+ for (int i=0; i < spVec.size(); ++i) {
+ pi._V[i].fillPij(et,spVec[i]);
+ }
+
+ suffStatGlobalGam ssc;
+ computeUpAlg cup;
+ cup.fillComputeUp(et,sc,pi,ssc);
+
+ MDOUBLE res = 0.0;
+ int k;
+ for (k=0; k < sc.seqLen(); ++k) {
+ MDOUBLE lnL = log(likelihoodComputation2Codon::getProbOfPosUpIsFilledSelectionGam(k,//pos,
+ et,//const tree&
+ sc,// sequenceContainer& sc,
+ spVec[0],
+ ssc[k],//const computePijGam& ,
+ distr)); //W distribution ,
+ LOG(20,<<"pos= "<<k<<" lnL= "<<lnL<<endl);
+ res += lnL;
+ //if (k==5) exit(0);
+
+ }
+ return res;
+
+
+
+}
+
+
+MDOUBLE likelihoodComputation2Codon::getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGamPos& cup,const distribution * distr){
+
+ doubleRep tmp=0.0;
+ for (int categor = 0; categor < distr->categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(categor,et.getRoot()->id(),let) * sp.freq(let);
+
+ }
+ //cout<<"category= "<<categor<<" fh= "<<veryTmp<<" freqCategor= "<<distr->ratesProb(categor)<<endl;
+ tmp += veryTmp*distr->ratesProb(categor);
+ }
+ assert(tmp>0.0);
+ return convert(tmp);
+}
+
+MDOUBLE likelihoodComputation2Codon::getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ Vdouble& posLike, // fill this vector with each position likelihood but without the weights.
+ const distribution * distr,
+ const Vdouble * weights) {
+ posLike.clear();
+ MDOUBLE like = 0;
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep tmp=0;
+ for (int categor = 0; categor < distr->categories(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(pos,categor,et.getRoot()->id(),let) * sp.freq(let);
+ }
+ tmp += veryTmp*distr->ratesProb(categor);
+ }
+ assert(tmp>0.0);
+ like += log(tmp) * (weights?(*weights)[pos]:1);
+ posLike.push_back(convert(tmp));
+ }
+ return like;
+
+}
diff --git a/libs/phylogeny/likelihoodComputation2Codon.h b/libs/phylogeny/likelihoodComputation2Codon.h
new file mode 100644
index 0000000..b01a226
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation2Codon.h
@@ -0,0 +1,35 @@
+// $Id: likelihoodComputation2Codon.h 4699 2008-08-14 14:19:46Z privmane $
+
+#ifndef ___LIKELIHOOD_COMPUTATION_2_CODON
+#define ___LIKELIHOOD_COMPUTATION_2_CODON
+
+#include "definitions.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+
+namespace likelihoodComputation2Codon {
+
+ MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc,
+ const vector<stochasticProcess>& spVec,
+ const distribution * distr);
+
+ MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& et, //used for gamma model
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGamPos& cup,
+ const distribution * distr);
+
+ MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const suffStatGlobalGam& cup,
+ Vdouble& posLike, // fill this vector with each position likelihood but without the weights.
+ const distribution * distr,
+ const Vdouble * weights=0);
+};
+
+
+
+#endif
diff --git a/libs/phylogeny/likelihoodComputation2USSRV.cpp b/libs/phylogeny/likelihoodComputation2USSRV.cpp
new file mode 100644
index 0000000..d9079d1
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation2USSRV.cpp
@@ -0,0 +1,82 @@
+// $Id: likelihoodComputation2USSRV.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "likelihoodComputation2USSRV.h"
+
+
+using namespace likelihoodComputation2USSRV;
+
+//compute likelihood for the ssrv model and the base model.
+
+MDOUBLE likelihoodComputation2USSRV::getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc, const sequenceContainer& baseSc,
+ const ussrvModel& model,const Vdouble * const weights){
+
+
+ computePijHom piSSRV;
+ piSSRV.fillPij(et,model.getSSRVmodel());
+
+ computePijGam piBase;
+ piBase.fillPij(et,model.getBaseModel());
+
+ MDOUBLE res =0.0;
+ MDOUBLE f = model.getF();
+ doubleRep LofPosSSRV(0.0),LofPosBase(0.0);
+ MDOUBLE lnL(0.);
+ int k;
+ for (k=0; k < sc.seqLen(); ++k) {
+ if (f<1.0)
+ LofPosBase = likelihoodComputation::getLofPos(k,et,baseSc,piBase,model.getBaseModel());
+ if (f>0.0) {
+ LofPosSSRV = likelihoodComputation::getLofPos(k,et,sc,piSSRV,model.getSSRVmodel());
+ if (f<1.0)
+ lnL = log(LofPosSSRV*f+(1-f)*LofPosBase);
+ else // f == 1.0
+ lnL = log(LofPosSSRV);
+ }
+ else // f == 0.0
+ lnL = log(LofPosBase);
+
+ LOG(9,<<"pos= "<<k<<" lnL= "<<lnL<<endl);
+ LOG(10,<<"logLofPosBase= "<< log(LofPosBase) << " logLofPosSSRV= " << log(LofPosSSRV) << " f= " << f <<endl);
+ res += lnL * (weights?(*weights)[k]:1);
+ }
+ return res;
+}
+
+
+
+MDOUBLE likelihoodComputation2USSRV::getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ const ussrvModel & model,
+ const suffStatGlobalGam& cupBase,
+ const suffStatGlobalHom& cupSSRV,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights) {
+ posLike.clear();
+ MDOUBLE like = 0;
+ MDOUBLE f = model.getF();
+ //computing the likelihood from up:
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep tmp=0;
+
+ doubleRep tmp2 = 0; //like for the SSRV part
+ // SSRV
+ for (int let =0; let < model.getSSRVmodel().alphabetSize(); ++let) {
+ tmp2+=cupSSRV.get(pos,et.getRoot()->id(),let) * model.getSSRVmodel().freq(let);
+ }
+ // Base model
+ for (int categor = 0; categor < model.noOfCategor(); ++categor) {
+ doubleRep veryTmp =0;
+ for (int let =0; let < model.getBaseModel().alphabetSize(); ++let) {
+ veryTmp+=cupBase.get(pos,categor,et.getRoot()->id(),let) * model.getBaseModel().freq(let);
+ }
+ tmp += veryTmp*model.getCategorProb(categor);
+ }
+
+ if(tmp<0.0) errorMsg::reportError("like< 0 in likelihoodComputation2USSRV::getTreeLikelihoodFromUp2");
+
+ like += log((1-f)*tmp+f*tmp2) * (weights?(*weights)[pos]:1);
+ posLike.push_back((1-f)*tmp+f*tmp2);
+ }
+ return like;
+}
diff --git a/libs/phylogeny/likelihoodComputation2USSRV.h b/libs/phylogeny/likelihoodComputation2USSRV.h
new file mode 100644
index 0000000..03ab51a
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputation2USSRV.h
@@ -0,0 +1,36 @@
+// $Id: likelihoodComputation2USSRV.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___LIKELIHOOD_COMPUTATION_2_USSRV
+#define ___LIKELIHOOD_COMPUTATION_2_USSRV
+
+#include "definitions.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+#include "ussrvModel.h"
+#include "tree.h"
+#include "computeUpAlg.h"
+#include "likelihoodComputation.h"
+#include <cmath>
+#include <cassert>
+
+
+namespace likelihoodComputation2USSRV {
+
+ MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& et,
+ const sequenceContainer& sc,const sequenceContainer& baseSc,
+ const ussrvModel& model,const Vdouble * const weights=0);
+
+ MDOUBLE getTreeLikelihoodFromUp2(const tree& et,
+ const sequenceContainer& sc,
+ const sequenceContainer& baseSc,
+ const ussrvModel & model,
+ const suffStatGlobalGam& cupBase,
+ const suffStatGlobalHom& cupSSRV,
+ VdoubleRep& posLike, // fill this vector with each position likelihood but without the weights.
+ const Vdouble * weights=0);
+
+};
+
+
+
+#endif // ___LIKELIHOOD_COMPUTATION_2_USSRV
diff --git a/libs/phylogeny/likelihoodComputationFactors.cpp b/libs/phylogeny/likelihoodComputationFactors.cpp
new file mode 100644
index 0000000..85e37fb
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputationFactors.cpp
@@ -0,0 +1,33 @@
+// $Id: likelihoodComputationFactors.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "tree.h"
+#include "computeUpAlg.h"
+#include "likelihoodComputationFactors.h"
+#include <cmath>
+#include <cassert>
+
+using namespace likelihoodComputation;
+
+MDOUBLE likelihoodComputation::getLOG_LofPos(const int pos,
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const MDOUBLE gRate){ // when there is a global rate for this position
+// using the pij of stochastic process rather than pre computed pij's...
+ vector<MDOUBLE> factors;
+ computeUpAlg cup;
+ suffStatGlobalHomPos ssc;
+ cup.fillComputeUpSpecificGlobalRateFactors(et,sc,pos,sp,ssc,gRate,factors);
+
+ doubleRep tmp = 0.0;
+ for (int let = 0; let < sp.alphabetSize(); ++let) {
+ doubleRep tmpLcat=
+ ssc.get(et.getRoot()->id(),let)*
+ sp.freq(let);;
+ assert(tmpLcat>=0);
+ tmp+=tmpLcat;
+ }
+ return log(tmp)-factors[et.getRoot()->id()]*log(10.0);
+}
+
diff --git a/libs/phylogeny/likelihoodComputationFactors.h b/libs/phylogeny/likelihoodComputationFactors.h
new file mode 100644
index 0000000..895a5df
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputationFactors.h
@@ -0,0 +1,28 @@
+// $Id: likelihoodComputationFactors.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___LIKELIHOOD_COMPUTATION_FACTORS
+#define ___LIKELIHOOD_COMPUTATION_FACTORS
+
+#include "definitions.h"
+#include "tree.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+
+namespace likelihoodComputation {
+
+ MDOUBLE getLOG_LofPos(const int pos, // with a site specific rate.
+ const tree& et,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const MDOUBLE gRate);
+
+ // add all the other functions to use factors...
+
+
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/likelihoodComputationGL.cpp b/libs/phylogeny/likelihoodComputationGL.cpp
new file mode 100644
index 0000000..8bfa8f8
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputationGL.cpp
@@ -0,0 +1,326 @@
+#include "likelihoodComputationGL.h"
+
+#include "definitions.h"
+#include "tree.h"
+#include "likelihoodComputation.h"
+#include <cmath>
+#include <cassert>
+
+using namespace likelihoodComputationGL;
+
+// account for RateCat, GainCat,LossCat
+// - For each RateCat an "external" multiplication is conducted - copy_et.multipleAllBranchesByFactor
+// - the GainCat*LossCat SPs are covered by the "internal" mechanism of PijGam
+
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,
+ const distribution * distGain, const distribution * distLoss,
+ unObservableData *unObservableData_p)
+{
+ int numOfRateCategories = spVVec[0][0]->categories();
+ vector<computePijGam> pi_vec(numOfRateCategories);
+ vector<suffStatGlobalGam> ssc_vec(numOfRateCategories);
+ vector<computeUpAlg> cup_vec(numOfRateCategories);
+
+ likelihoodComputationGL::fillPijAndUp(tr,sc,spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec);
+ MDOUBLE res = 0.0;
+ for (int k=0; k < sc.seqLen(); ++k) {
+ MDOUBLE lnL = 0;
+ MDOUBLE resGivenRate = 0.0;
+ for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
+ lnL = log(likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(k,//pos,
+ tr,//const tree&
+ sc,// sequenceContainer& sc,
+ spVVec, // only needed for sp.freq(let)
+ ssc_vec[rateIndex][k],//const computePijGam& ,
+ distGain, distLoss)); // distributions ,
+ resGivenRate += lnL * spVVec[0][0]->ratesProb(rateIndex);
+ }
+ LOG(20,<<"pos= "<<k+1<<" lnL= "<<lnL<<endl);
+ //res += lnL;
+ res += resGivenRate;
+ }
+ if(unObservableData_p){
+ res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ return res;
+}
+/********************************************************************************************
+*********************************************************************************************/
+void likelihoodComputationGL::fillPijAndUp(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,
+ const distribution * distGain, const distribution * distLoss,
+ vector<computePijGam>& pi_vec,
+ vector<suffStatGlobalGam>& ssc_vec, // info filled into suffStat
+ vector<computeUpAlg>& cup_vec)
+{
+ int numOfSPs = distGain->categories()*distLoss->categories();
+ int numOfRateCategories = spVVec[0][0]->categories();
+ for (int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
+ tree copy_et = tr;
+ copy_et.multipleAllBranchesByFactor(spVVec[0][0]->rates(rateIndex));
+ pi_vec[rateIndex]._V.resize(numOfSPs);
+ //Pij
+ for (int i=0; i < numOfSPs; ++i) {
+ int gainIndex =fromIndex2gainIndex(i,distGain->categories(),distLoss->categories());
+ int lossIndex =fromIndex2lossIndex(i,distGain->categories(),distLoss->categories());
+ pi_vec[rateIndex]._V[i].fillPij(copy_et,*spVVec[gainIndex][lossIndex]);
+ }
+ //ComputeUp
+ cup_vec[rateIndex].fillComputeUp(copy_et,sc,pi_vec[rateIndex],ssc_vec[rateIndex]);
+ }
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
+ const suffStatGlobalGamPos& cup,
+ const distribution * distGain, const distribution * distLoss)
+{
+
+ doubleRep res =0;
+ int numOfSPs = distGain->categories()*distLoss->categories();
+ for (int categor = 0; categor < numOfSPs; ++categor) {
+ doubleRep veryTmp =0.0;
+ int gainCategor = fromIndex2gainIndex(categor,distGain->categories(),distLoss->categories());
+ int lossCategor = fromIndex2lossIndex(categor,distGain->categories(),distLoss->categories());
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(categor,tr.getRoot()->id(),let) * spVVec[gainCategor][lossCategor]->freq(let);
+ }
+ res += veryTmp*(distGain->ratesProb(gainCategor)*distLoss->ratesProb(lossCategor));
+ }
+ if ((res<-EPSILON)){
+ string err = "Error in likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam, non probability value (<0) Res=";
+ err+=double2string(convert(res));
+ errorMsg::reportError(err);
+ };
+ return convert(res);
+}
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputationGL::getTreeLikelihoodFromUp2(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
+ const suffStatGlobalGam& cup, //computing the likelihood from up:
+ const distribution * distGain, const distribution * distLoss,
+ unObservableData *unObservableData_p,
+ Vdouble* posLike,
+ const Vdouble * weights)
+{
+ if(posLike)
+ posLike->clear();
+ MDOUBLE like = 0;
+
+ int numOfSPs = distGain->categories()*distLoss->categories();
+ for (int pos = 0; pos < sc.seqLen(); ++pos) {
+ doubleRep tmp=0;
+ for (int categor = 0; categor < numOfSPs; ++categor) {
+ doubleRep veryTmp =0;
+ int gainCategor = fromIndex2gainIndex(categor,distGain->categories(),distLoss->categories());
+ int lossCategor = fromIndex2lossIndex(categor,distGain->categories(),distLoss->categories());
+ for (int let =0; let < sc.alphabetSize(); ++let) {
+ veryTmp+=cup.get(pos,categor,tr.getRoot()->id(),let) * spVVec[gainCategor][lossCategor]->freq(let);
+ }
+ tmp += veryTmp*(distGain->ratesProb(gainCategor)*distLoss->ratesProb(lossCategor));
+ }
+ if(unObservableData_p)
+ tmp = tmp/(1- exp(unObservableData_p->getlogLforMissingData()));
+ if(posLike)
+ posLike->push_back(log(tmp));
+ like += log(tmp) * (weights?(*weights)[pos]:1);
+
+ }
+ return like;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+MDOUBLE likelihoodComputationGL::getTreeLikelihoodFromUp2(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
+ const vector<suffStatGlobalGam>& cup_vec, //computing the likelihood from up:
+ const distribution * distGain, const distribution * distLoss,
+ unObservableData *unObservableData_p,
+ Vdouble* posLike,
+ const Vdouble * weights)
+{
+ if(posLike)
+ posLike->resize(sc.seqLen());
+ MDOUBLE like = 0;
+ int numOfRateCategories = spVVec[0][0]->categories();
+ for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
+ Vdouble posLikePerCat;
+ like += likelihoodComputationGL::getTreeLikelihoodFromUp2(tr,sc,spVVec,cup_vec[rateIndex], distGain,distLoss,unObservableData_p,&posLikePerCat,weights)
+ * spVVec[0][0]->ratesProb(rateIndex);
+ if(posLike){
+ for (int k=0; k < sc.seqLen(); ++k) {
+ (*posLike)[k]+= (posLikePerCat[k]* spVVec[0][0]->ratesProb(rateIndex));
+ }
+ }
+ }
+ return like;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+//MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSameNoComputeUp(const tree& tr,
+// const sequenceContainer& sc,
+// const vector<vector<stochasticProcess*> >& spVVec,
+// const distribution * distGain, const distribution * distLoss,
+// unObservableData *unObservableData_p)
+//{
+// MDOUBLE res = 0.0;
+// int numOfSPs = distGain->categories()*distLoss->categories();
+// for (int i=0; i < numOfSPs; ++i) {
+// int gainIndex =fromIndex2gainIndex(i,distGain->categories(),distLoss->categories());
+// int lossIndex =fromIndex2lossIndex(i,distGain->categories(),distLoss->categories());
+// res += likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(tr,sc,*spVVec[gainIndex][lossIndex])* distGain->ratesProb(gainIndex)*distLoss->ratesProb(lossIndex);
+// }
+// if(unObservableData_p){
+// res = res - sc.seqLen()*log(1- exp(unObservableData_p->getlogLforMissingData()));
+// }
+// return res;
+//}
+
+
+
+
+/********************************************************************************************
+un-obervable data
+*********************************************************************************************/
+
+/********************************************************************************************
+ used to fill the likelihood for the unobervable for each category
+*********************************************************************************************/
+//doubleRep likelihoodComputationGL::getLofPos(const int pos,
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp,
+// Vdouble& likePerCat) // all the likdelhoodsPerCat and rateProb are filled
+//{
+// // with the pi already computed.
+// int numOfCat = sp.categories();
+// doubleRep tmp=0;
+// for (int i=0; i < numOfCat;++i) {
+// likePerCat[i] = getLofPos(pos,tr,sc,pi[i],sp)*sp.ratesProb(i);
+// likePerCat[i+numOfCat] = sp.ratesProb(i);
+// tmp += likePerCat[i];
+// }
+// return tmp;
+//}
+///********************************************************************************************
+//likelihood computation - full data (1)
+//*********************************************************************************************/
+//MDOUBLE likelihoodComputationGL::getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
+// const sequenceContainer& sc,
+// const stochasticProcess& sp,
+// const Vdouble * const weights,
+// Vdouble *pLforMissingDataPerCat)
+//{
+// computePijGam pi;
+// pi.fillPij(tr,sp);
+// MDOUBLE res =0;
+// doubleRep LofPos;
+// int k;
+// for (k=0; k < sc.seqLen(); ++k) {
+// LofPos = likelihoodComputationGL::getLofPos(k,//pos,
+// tr,//const tree&
+// sc,// sequenceContainer& sc,
+// pi,//const computePijGam& ,
+// sp,
+// pLforMissingDataPerCat);
+// res += log(LofPos) * (weights?(*weights)[k]:1);//const stochasticProcess& );
+// }
+// return res;
+//}
+//
+///********************************************************************************************
+//likelihood computation - per pos (1.1)
+//*********************************************************************************************/
+//doubleRep likelihoodComputationGL::getLofPos(const int pos,
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp,
+// Vdouble *pLforMissingDataPerCat)
+//{
+//// with the pi already computed.
+// doubleRep tmp=0;
+// int numOfCat = sp.categories();
+// Vdouble tmpPerCat;
+// tmpPerCat.resize(numOfCat);
+//
+// for (int i=0; i < sp.categories();++i) {
+// tmpPerCat[i] = getLofPos(pos,tr,sc,pi[i],sp);
+// if(pLforMissingDataPerCat){
+// LOG(11,<<"res before MissingData correction= "<<tmpPerCat[i]);
+// tmpPerCat[i] = tmpPerCat[i]/(1- (*pLforMissingDataPerCat)[i]);
+// LOG(11,<<" after= "<<tmpPerCat[i]<<endl);
+// }
+// tmp += tmpPerCat[i]*sp.ratesProb(i);
+// }
+// return tmp;
+//}
+//
+///********************************************************************************************
+//likelihood computation - per pos, per cat (1.1.1)
+//*********************************************************************************************/
+//doubleRep likelihoodComputationGL::getLofPos(const int pos,
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijHom& pi,
+// const stochasticProcess& sp)
+//{
+// computeUpAlg cup;
+// suffStatGlobalHomPos ssc;
+// cup.fillComputeUp(tr,sc,pos,pi,ssc);
+//
+// doubleRep tmp = 0.0;
+// for (int let = 0; let < sp.alphabetSize(); ++let) {
+// doubleRep tmpLcat=
+// ssc.get(tr.getRoot()->id(),let)*
+// sp.freq(let);
+// if (!DBIG_EQUAL(convert(tmpLcat), 0.0))
+// {
+// cerr<<"tmpLcat = "<<tmpLcat<<endl;
+// errorMsg::reportError("error in likelihoodComputation::getLofPos. likelihood is smaller than zero");
+// }
+//
+// //assert(tmpLcat>=0.0);
+// tmp+=tmpLcat;
+// }
+//// cout<<"likelihoodComputation::getLofPos: tmp = "; tmp.outputn(cout); // DEBUG EP
+// if (!(tmp>0.0)){
+// LOG(5,<<"likelihoodComputation::getLofPos: "<< tmp<<endl;);
+// LOG(5,<<"pos = "<< pos <<endl;);
+// tmp = EPSILON;
+// //errorMsg::reportError("likelihoodComputation::getLofPos: likelihood of pos was zero!",1);
+//
+// }
+// return tmp;
+//}
+//
+//Vdouble likelihoodComputationGL::getLofPosPerCat(const int pos,
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp)
+//{
+//// with the pi already computed.
+// int numOfCat = sp.categories();
+// Vdouble tmp;
+// tmp.resize(numOfCat*2);
+// for (int i=0; i < numOfCat;++i) {
+// tmp[i] = getLofPos(pos,tr,sc,pi[i],sp)*sp.ratesProb(i);
+// tmp[i+numOfCat] = sp.ratesProb(i);
+// }
+// return tmp;
+//}
+
diff --git a/libs/phylogeny/likelihoodComputationGL.h b/libs/phylogeny/likelihoodComputationGL.h
new file mode 100644
index 0000000..4209943
--- /dev/null
+++ b/libs/phylogeny/likelihoodComputationGL.h
@@ -0,0 +1,97 @@
+#ifndef ___LIKELIHOOD_COMPUTATION_GL
+#define ___LIKELIHOOD_COMPUTATION_GL
+
+#include "definitions.h"
+#include "computePijComponent.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+#include "unObservableData.h"
+#include "computeUpAlg.h"
+
+
+namespace likelihoodComputationGL {
+
+
+ MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,
+ const distribution * distGain, const distribution * distLoss,
+ unObservableData *unObservableData_p);
+ void fillPijAndUp(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,
+ const distribution * distGain, const distribution * distLoss,
+ vector<computePijGam>& pi_vec,
+ vector<suffStatGlobalGam>& ssc_vec,
+ vector<computeUpAlg>& cup_vec);
+ MDOUBLE getProbOfPosUpIsFilledSelectionGam(const int pos,const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec, // only needed for sp.freq(let)
+ const suffStatGlobalGamPos& cup,
+ const distribution * distGain, const distribution * distLoss);
+
+
+ MDOUBLE getTreeLikelihoodFromUp2(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
+ const suffStatGlobalGam& cup,
+ const distribution * distGain, const distribution * distLoss,unObservableData *unObservableData_p,
+ Vdouble* posLike =NULL,
+ const Vdouble * weights =NULL);
+ MDOUBLE getTreeLikelihoodFromUp2(const tree& tr,
+ const sequenceContainer& sc,
+ const vector<vector<stochasticProcess*> >& spVVec,// only needed for sp.freq(let)
+ const vector<suffStatGlobalGam>& cup_vec,
+ const distribution * distGain, const distribution * distLoss,unObservableData *unObservableData_p,
+ Vdouble* posLike =NULL,
+ const Vdouble * weights =NULL);
+
+// Error
+ //MDOUBLE getTreeLikelihoodAllPosAlphTheSameNoComputeUp(const tree& tr,
+ // const sequenceContainer& sc,
+ // const vector<vector<stochasticProcess*> >& spVVec,
+ // const distribution * distGain, const distribution * distLoss,
+ // unObservableData *unObservableData_p);
+
+
+///********************************************************************************************
+//un-obervable data
+//*********************************************************************************************/
+//// used to fill the likelihood for the unobservable for each category
+// doubleRep getLofPos(const int pos,
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp,
+// Vdouble& likePerCat); // all the likdelhoodsPerCat and rateProb are filled
+//// likelihood computation - full data (1)
+// MDOUBLE getTreeLikelihoodAllPosAlphTheSame(const tree& tr,
+// const sequenceContainer& sc,
+// const stochasticProcess& sp,
+// const Vdouble * const weights,
+// Vdouble *pLforMissingDataPerCat=NULL);
+//// likelihood computation - per pos (1.1)
+// doubleRep getLofPos(const int pos, // this function is used
+// const tree& tr, // when gamma, and the br-len
+// const sequenceContainer& sc, // are the same for all pos.
+// const computePijGam& pi,
+// const stochasticProcess& sp,
+// Vdouble *pLforMissingDataPerCat=NULL);
+//// likelihood computation - per pos, per cat (1.1.1)
+// doubleRep getLofPos(const int pos, // this function is used
+// const tree& tr, // when the br-len
+// const sequenceContainer& sc, // are the same for all
+// const computePijHom& pi, // positions.
+// const stochasticProcess& sp);
+//
+// Vdouble getLofPosPerCat(const int pos, // used when the likelihood given each category is needed, not only the sum
+// const tree& tr,
+// const sequenceContainer& sc,
+// const computePijGam& pi,
+// const stochasticProcess& sp);
+
+
+
+};
+
+#endif
diff --git a/libs/phylogeny/logFile.cpp b/libs/phylogeny/logFile.cpp
new file mode 100644
index 0000000..cbaccab
--- /dev/null
+++ b/libs/phylogeny/logFile.cpp
@@ -0,0 +1,48 @@
+// $Id: logFile.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "logFile.h"
+#include "errorMsg.h"
+
+int myLog::_loglvl = 3;
+ostream *myLog::_out= NULL;
+bool myLog::_firstTime = true;
+
+void myLog::setLog(const string logfilename, const int loglvl) {
+ if (_out != NULL) myLog::endLog();
+ if ((logfilename == "-")|| (logfilename == "")) {
+ myLog::setLogOstream(&cout);
+ } else {
+ ofstream* outLF = new ofstream;
+ if (_firstTime) {
+ outLF->open(logfilename.c_str());
+ _firstTime = false;
+ }
+ else
+ outLF->open(logfilename.c_str(), ofstream::out | ofstream::app); // append
+ if (!outLF->is_open()) {
+ errorMsg::reportError(string("Can't open for writing the log file ")+logfilename);
+ }
+ myLog::setLogOstream(outLF);
+ }
+ myLog::setLogLvl(loglvl);
+ LOG(3,<<"START OF LOG FILE"<<endl);
+}
+
+void myLog::endLog(void){
+ LOG(3,<<"END OF LOG FILE"<<endl);
+ if (_out!=&cout && _out != NULL) {
+ ((ofstream*)_out)->close();
+ delete _out;
+ _out = NULL;
+ _firstTime=false;
+ }
+}
+
+void myLog::printArgv(int loglvl, int argc, char *argv[]) {
+ LOG(loglvl,<<"argv =");
+
+ for (int i=0;i<argc;++i)
+ LOG(loglvl,<<" \""<<argv[i]<<"\"");
+ LOG(loglvl,<<endl);
+
+}
diff --git a/libs/phylogeny/logFile.h b/libs/phylogeny/logFile.h
new file mode 100644
index 0000000..f2d4076
--- /dev/null
+++ b/libs/phylogeny/logFile.h
@@ -0,0 +1,50 @@
+// $Id: logFile.h 6067 2009-04-14 19:12:28Z itaymay $
+
+#ifndef ___LOG
+#define ___LOG
+
+
+#include <string>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+class myLog {
+public:
+ static int LogLevel() { return _loglvl;}
+ static ostream& LogFile(void) {
+ if (_out == NULL) return cerr;
+ return *_out;
+ }
+
+ static void setLogLvl(const int newLogLvl) {_loglvl = newLogLvl;}
+ static void setLogOstream(ostream* out) {_out = out;}
+
+ // this function is problematic, because it issue a call to NEW
+ // which because the function is static - cannot be deleted.
+ // but, this will not effect the program, because there is only
+ // 1 instance of _out and it will be released anyway in the end of the program.
+ static void setLog(const string logfilename, const int loglvl);
+ static void endLog(void);
+ static void printArgv(int loglvl, int argc, char *argv[]) ;
+private:
+ static ostream* _out;
+ static int _loglvl;
+ static bool _firstTime;
+};
+
+#ifdef LOG
+#undef LOG
+#endif
+
+
+#define LOG(Lev, ex) { if( Lev <= myLog::LogLevel() ) myLog::LogFile() ex; }
+#define LOGnOUT(Lev, ex) { if( Lev <= myLog::LogLevel() ) {myLog::LogFile() ex; cerr ex; }}
+#define LOGDO(Lev, ex) { if( Lev <= myLog::LogLevel() ) ex; }
+
+
+#endif
+
+
+
diff --git a/libs/phylogeny/logRep.cpp b/libs/phylogeny/logRep.cpp
new file mode 100644
index 0000000..0e0c07b
--- /dev/null
+++ b/libs/phylogeny/logRep.cpp
@@ -0,0 +1,30 @@
+#ifdef LOGREP
+#include "logRep.h"
+#include <cmath>
+
+//logRep::logRep()
+//{
+// _log = VERYSMALL2;
+//}
+
+//logRep::logRep(MDOUBLE a){
+// _log = ((a==0.0) ? VERYSMALL2 : log(a));
+//}
+
+
+//logRep::logRep(const logRep& other): _log(other._log) {}
+
+
+
+MDOUBLE convert(const logRep& a){
+ return exp(a.getLog());
+}
+
+
+
+
+ostream& operator<<(ostream &out, const logRep& a){
+ a.output(out);
+ return out;
+}
+#endif
diff --git a/libs/phylogeny/logRep.h b/libs/phylogeny/logRep.h
new file mode 100644
index 0000000..b1bd830
--- /dev/null
+++ b/libs/phylogeny/logRep.h
@@ -0,0 +1,162 @@
+#ifndef __LOG_REP_H
+#define __LOG_REP_H
+
+#ifdef LOGREP
+
+#include "definitions.h"
+#include "AddLog.h"
+
+
+
+#include <iostream>
+#include <cmath>
+using namespace std;
+
+/* logRep: enables working with much larger or smaller numbers than normally possible
+by the regular double representation
+ * Representation of a number x by the log of x
+ Note: Base is 2!!
+ WARNING: Note that logRep can only be used for positive values
+ (such as probablities) - you can't have the log of a negative!
+ For a general real number use class doubleRep.
+ */
+
+class logRep{
+public:
+
+ logRep() : _log(VERYSMALL){}
+ logRep(MDOUBLE a) {_log = ((a==0.0) ? VERYSMALL : log(a));}
+ logRep(const logRep& other) : _log(other._log) {}
+ logRep* clone() {return new logRep(*this);}
+
+ void output(ostream &out) const{ out<<exp(_log);}
+
+ friend MDOUBLE convert(const logRep& a);
+ //inline MDOUBLE convert();
+ inline logRep& operator=(const logRep& a);
+ inline logRep& operator+=(logRep a);
+ friend inline logRep operator+(const logRep& a, const logRep& b);
+ inline logRep& operator-=(const logRep& a);
+ friend inline logRep operator-(const logRep& a, const logRep& b);
+ inline logRep& operator*=(const logRep& a);
+ friend inline logRep operator*(const logRep& a, const logRep& b);
+ inline logRep& operator/=(const logRep& a);
+ friend inline logRep operator/(const logRep& a, const logRep& b);
+
+ friend inline bool operator==(const logRep& a, const logRep& b);
+ friend inline bool operator!=(const logRep& a, const logRep& b);
+ friend inline bool operator<(const logRep& a, const logRep& b);
+ friend inline bool operator<=(const logRep& a, const logRep& b);
+ friend inline bool operator>(const logRep& a, const logRep& b);
+ friend inline bool operator>=(const logRep& a, const logRep& b);
+ friend inline MDOUBLE log(const logRep& d);
+
+private:
+ const MDOUBLE getLog() const {return _log;}
+
+private:
+ MDOUBLE _log;
+ //static tAddLog_Precompute _add;
+
+};
+
+inline logRep& logRep::operator=(const logRep& a){
+ _log=a.getLog();
+ return *this;
+}
+
+//inline MDOUBLE convert(){
+// return exp(_log);
+//}
+
+// Original version by Adi Stern
+inline logRep& logRep::operator+=(logRep a){
+ if (_log == VERYSMALL)
+ _log = a._log;
+ else if (a._log == VERYSMALL ) return *this;
+ else _log = AddLog(_log, a._log);
+ return *this;
+}
+
+inline logRep operator+(const logRep& a, const logRep& b){
+ logRep temp(a);
+ temp+=b;
+ return temp;
+}
+
+inline logRep& logRep::operator*=(const logRep& a){
+ if ((_log == VERYSMALL) || (a._log== VERYSMALL )){
+ _log = VERYSMALL;
+ return *this;
+ }
+ _log+=a._log;
+ return *this;
+}
+
+inline logRep operator*(const logRep& a, const logRep& b){
+ logRep temp(a);
+ temp*=b;
+ return temp;
+}
+
+inline logRep& logRep::operator/=(const logRep& a){
+ _log-=a._log;
+ return *this;
+}
+
+inline logRep operator/(const logRep& a, const logRep& b){
+ logRep temp(a);
+ temp/=b;
+ return temp;
+}
+
+/************************
+ * Comparison operators *
+ ************************/
+inline bool operator==(const logRep& a, const logRep& b){
+ return (a.getLog()==b.getLog());
+}
+inline bool operator!=(const logRep& a, const logRep& b){
+ return !(a==b);
+}
+
+inline bool operator<(const logRep& a, const logRep& b){
+ if (a.getLog()<b.getLog()) {return true;}
+ else {return false;}
+
+}
+
+inline bool operator>(const logRep& a, const logRep& b){
+
+ if (a.getLog()>b.getLog()) {return true;}
+ else {return false;}
+
+}
+
+inline bool operator<=(const logRep& a, const logRep& b){
+ return !(a>b);
+}
+
+inline bool operator>=(const logRep& a, const logRep& b){
+ return !(a<b);
+}
+
+ostream& operator<<(ostream &out, const logRep& a);
+
+inline MDOUBLE log(const logRep& d) {return d.getLog();}
+
+inline ostream &operator<<(ostream &out, const VlogRep &v){
+ for (int j=0;j<v.size();++j)
+ out<< v[j]<<" ";
+ out <<endl;
+ return(out);
+}
+
+inline ostream &operator<<(ostream &out, const VVlogRep &m){
+ for (int i=0;i<m.size();++i)
+ out<<m[i];
+ out <<endl;
+ return(out);
+}
+#endif
+#endif
diff --git a/libs/phylogeny/make.dep b/libs/phylogeny/make.dep
new file mode 100644
index 0000000..d737b16
--- /dev/null
+++ b/libs/phylogeny/make.dep
@@ -0,0 +1,715 @@
+AddLog.o AddLog.debug.o: AddLog.cpp AddLog.h
+NNiProp.o NNiProp.debug.o: NNiProp.cpp definitions.h treeIt.h errorMsg.h tree.h \
+ readTree.h logFile.h treeUtil.h NNiProp.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ bblEM.h countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h bblEMProportional.h
+NNiSep.o NNiSep.debug.o: NNiSep.cpp definitions.h treeIt.h errorMsg.h tree.h readTree.h \
+ logFile.h treeUtil.h NNiSep.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h bblEM.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h bblEMSeperate.h
+Nni.o Nni.debug.o: Nni.cpp definitions.h treeUtil.h tree.h readTree.h errorMsg.h \
+ logFile.h treeIt.h Nni.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h bblEM.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h
+aaJC.o aaJC.debug.o: aaJC.cpp aaJC.h replacementModel.h definitions.h errorMsg.h
+allTrees.o allTrees.debug.o: allTrees.cpp definitions.h allTrees.h tree.h readTree.h \
+ errorMsg.h logFile.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h treeUtil.h treeIt.h \
+ bblEM.h countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h
+allTreesSeparateModel.o allTreesSeparateModel.debug.o: allTreesSeparateModel.cpp definitions.h treeIt.h \
+ errorMsg.h tree.h readTree.h logFile.h allTreesSeparateModel.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h bblEMSeperate.h
+alphabet.o alphabet.debug.o: alphabet.cpp alphabet.h
+amino.o amino.debug.o: amino.cpp amino.h definitions.h errorMsg.h alphabet.h \
+ geneticCodeHolder.h codon.h someUtil.h logFile.h
+bestAlpha.o bestAlpha.debug.o: bestAlpha.cpp bestAlpha.h definitions.h \
+ likelihoodComputation.h computePijComponent.h tree.h readTree.h \
+ errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h gammaDistribution.h \
+ generalGammaDistribution.h bblEM.h countTableComponent.h numRec.h \
+ uniformDistribution.h
+bestAlphaManyTrees.o bestAlphaManyTrees.debug.o: bestAlphaManyTrees.cpp bestAlphaManyTrees.h \
+ definitions.h computePijComponent.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h sequenceContainer.h sequence.h alphabet.h mulAlphabet.h \
+ someUtil.h gainLossAlphabet.h bblEM.h countTableComponent.h \
+ suffStatComponent.h unObservableData.h gammaDistribution.h \
+ generalGammaDistribution.h likelihoodComputation.h bestAlpha.h numRec.h \
+ uniformDistribution.h bblEMProportional.h bblEMSeperate.h
+bestHKYparam.o bestHKYparam.debug.o: bestHKYparam.cpp bestHKYparam.h definitions.h \
+ likelihoodComputation.h computePijComponent.h tree.h readTree.h \
+ errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h gammaDistribution.h \
+ generalGammaDistribution.h hky.h bblEM.h countTableComponent.h numRec.h \
+ uniformDistribution.h bestAlpha.h
+bootstrap.o bootstrap.debug.o: bootstrap.cpp definitions.h someUtil.h logFile.h alphabet.h \
+ bootstrap.h split.h splitMap.h tree.h readTree.h errorMsg.h treeUtil.h \
+ splitTreeUtil.h
+bblEM.o bblEM.debug.o: bblEM.cpp bblEM.h definitions.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h sequenceContainer.h sequence.h alphabet.h mulAlphabet.h \
+ someUtil.h gainLossAlphabet.h countTableComponent.h \
+ computePijComponent.h suffStatComponent.h unObservableData.h \
+ likelihoodComputation.h computeUpAlg.h computeDownAlg.h computeCounts.h \
+ treeIt.h fromCountTableComponentToDistance.h
+bblEMfixRoot.o bblEMfixRoot.debug.o: bblEMfixRoot.cpp bblEMfixRoot.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h likelihoodComputation.h computeUpAlg.h \
+ computeDownAlg.h computeCounts.h treeIt.h \
+ fromCountTableComponentToDistancefixRoot.h
+bblEMProprtional.o bblEMProprtional.debug.o: bblEMProprtional.cpp bblEM.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h bblEMProportional.h likelihoodComputation.h \
+ computeUpAlg.h computeDownAlg.h computeCounts.h treeIt.h \
+ fromCountTableComponentToDistance.h \
+ fromCountTableComponentToDistanceProp.h
+bblEMSeperate.o bblEMSeperate.debug.o: bblEMSeperate.cpp bblEM.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h bblEMSeperate.h
+chebyshevAccelerator.o chebyshevAccelerator.debug.o: chebyshevAccelerator.cpp chebyshevAccelerator.h \
+ pijAccelerator.h definitions.h replacementModel.h
+clustalFormat.o clustalFormat.debug.o: clustalFormat.cpp clustalFormat.h sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h codon.h geneticCodeHolder.h
+codon.o codon.debug.o: codon.cpp codon.h definitions.h errorMsg.h someUtil.h logFile.h \
+ alphabet.h geneticCodeHolder.h nucleotide.h amino.h matrixUtils.h \
+ sequenceContainer.h sequence.h mulAlphabet.h gainLossAlphabet.h
+codonJC.o codonJC.debug.o: codonJC.cpp codonJC.h replacementModel.h definitions.h
+computeCounts.o computeCounts.debug.o: computeCounts.cpp computeCounts.h definitions.h \
+ countTableComponent.h sequenceContainer.h sequence.h errorMsg.h \
+ alphabet.h mulAlphabet.h someUtil.h logFile.h gainLossAlphabet.h \
+ computePijComponent.h tree.h readTree.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h suffStatComponent.h
+computeDownAlg.o computeDownAlg.debug.o: computeDownAlg.cpp definitions.h computeDownAlg.h \
+ tree.h readTree.h errorMsg.h logFile.h suffStatComponent.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h computePijComponent.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h treeIt.h
+computeMarginalAlg.o computeMarginalAlg.debug.o: computeMarginalAlg.cpp definitions.h treeIt.h \
+ errorMsg.h tree.h readTree.h logFile.h computeMarginalAlg.h \
+ suffStatComponent.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h computePijComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h
+computePijComponent.o computePijComponent.debug.o: computePijComponent.cpp definitions.h treeIt.h \
+ errorMsg.h tree.h readTree.h logFile.h computePijComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h
+computeUpAlg.o computeUpAlg.debug.o: computeUpAlg.cpp definitions.h computeUpAlg.h tree.h \
+ readTree.h errorMsg.h logFile.h suffStatComponent.h sequenceContainer.h \
+ sequence.h alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ computePijComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h treeIt.h seqContainerTreeMap.h
+computeUpAlgFactors.o computeUpAlgFactors.debug.o: computeUpAlgFactors.cpp definitions.h \
+ computeUpAlg.h tree.h readTree.h errorMsg.h logFile.h \
+ suffStatComponent.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h computePijComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ seqContainerTreeMap.h treeIt.h
+computeSubstitutionCounts.o computeSubstitutionCounts.debug.o: computeSubstitutionCounts.cpp \
+ computeSubstitutionCounts.h definitions.h replacementModel.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h tree.h readTree.h \
+ computePosteriorExpectationOfSubstitutions.h simulateJumps.h \
+ simulateJumpsAbstract.h stochasticProcess.h pijAccelerator.h \
+ distribution.h suffStatComponent.h computePijComponent.h \
+ computePosteriorExpectationOfSubstitutions_nonReversibleSp.h \
+ multipleStochasticProcess.h matrixUtils.h simulateCodonsJumps.h treeIt.h \
+ treeUtil.h
+computePosteriorExpectationOfSubstitutions.o computePosteriorExpectationOfSubstitutions.debug.o: \
+ computePosteriorExpectationOfSubstitutions.cpp \
+ computePosteriorExpectationOfSubstitutions.h definitions.h \
+ simulateJumps.h simulateJumpsAbstract.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h alphabet.h sequenceContainer.h sequence.h mulAlphabet.h \
+ someUtil.h gainLossAlphabet.h suffStatComponent.h computePijComponent.h \
+ computeDownAlg.h computeUpAlg.h matrixUtils.h treeIt.h \
+ likelihoodComputation.h unObservableData.h
+computePosteriorExpectationOfSubstitutions_nonReversibleSp.o computePosteriorExpectationOfSubstitutions_nonReversibleSp.debug.o: \
+ computePosteriorExpectationOfSubstitutions_nonReversibleSp.cpp \
+ definitions.h computeDownAlg.h tree.h readTree.h errorMsg.h logFile.h \
+ suffStatComponent.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h computePijComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ computeUpAlg.h matrixUtils.h treeIt.h likelihoodComputation.h \
+ unObservableData.h \
+ computePosteriorExpectationOfSubstitutions_nonReversibleSp.h \
+ computePosteriorExpectationOfSubstitutions.h simulateJumps.h \
+ simulateJumpsAbstract.h
+ConversionUtils.o ConversionUtils.debug.o: ConversionUtils.cpp ConversionUtils.h definitions.h \
+ someUtil.h logFile.h alphabet.h errorMsg.h
+countTableComponent.o countTableComponent.debug.o: countTableComponent.cpp countTableComponent.h \
+ definitions.h logFile.h
+datMatrixHolder.o datMatrixHolder.debug.o: datMatrixHolder.cpp datMatrixHolder.h cpREV45.dat.q \
+ dayhoff.dat.q jones.dat.q mtREV24.dat.q wag.dat.q HIVb.dat.q HIVw.dat.q \
+ adrianCodon.dat.q LG.dat.q
+distanceTable.o distanceTable.debug.o: distanceTable.cpp definitions.h distanceTable.h \
+ distanceMethod.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h sequenceContainer.h gainLossAlphabet.h
+distribution.o distribution.debug.o: distribution.cpp distribution.h definitions.h errorMsg.h
+errorMsg.o errorMsg.debug.o: errorMsg.cpp definitions.h errorMsg.h logFile.h
+evaluateCharacterFreq.o evaluateCharacterFreq.debug.o: evaluateCharacterFreq.cpp \
+ evaluateCharacterFreq.h sequenceContainer.h definitions.h sequence.h \
+ errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ gainLossAlphabet.h
+fastStartTree.o fastStartTree.debug.o: fastStartTree.cpp definitions.h tree.h readTree.h \
+ errorMsg.h logFile.h treeUtil.h fastStartTree.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h sequenceContainer.h \
+ sequence.h alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ bblEM.h countTableComponent.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h likeDist.h distanceMethod.h jcDistance.h \
+ likelihoodComputation.h getRandomWeights.h distanceTable.h nj.h \
+ njConstrain.h distances2Tree.h
+fastaFormat.o fastaFormat.debug.o: fastaFormat.cpp fastaFormat.h sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h
+findRateOfGene.o findRateOfGene.debug.o: findRateOfGene.cpp definitions.h findRateOfGene.h \
+ numRec.h errorMsg.h uniformDistribution.h distribution.h logFile.h \
+ likelihoodComputation.h computePijComponent.h tree.h readTree.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h suffStatComponent.h unObservableData.h computeUpAlg.h
+fromCountTableComponentToDistance.o fromCountTableComponentToDistance.debug.o: \
+ fromCountTableComponentToDistance.cpp \
+ fromCountTableComponentToDistance.h definitions.h countTableComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ unObservableData.h tree.h readTree.h errorMsg.h logFile.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h computePijComponent.h likeDist.h distanceMethod.h \
+ jcDistance.h
+fromCountTableComponentToDistancefixRoot.o fromCountTableComponentToDistancefixRoot.debug.o: \
+ fromCountTableComponentToDistancefixRoot.cpp \
+ fromCountTableComponentToDistancefixRoot.h definitions.h \
+ countTableComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h unObservableData.h tree.h readTree.h \
+ errorMsg.h logFile.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h computePijComponent.h \
+ likeDistfixRoot.h distanceMethod.h jcDistance.h
+fromCountTableComponentToDistanceProp.o fromCountTableComponentToDistanceProp.debug.o: \
+ fromCountTableComponentToDistanceProp.cpp \
+ fromCountTableComponentToDistanceProp.h definitions.h \
+ countTableComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h likeDistProp.h
+fromQtoPt.o fromQtoPt.debug.o: fromQtoPt.cpp definitions.h fromQtoPt.h replacementModel.h \
+ errorMsg.h numRec.h uniformDistribution.h distribution.h logFile.h \
+ matrixUtils.h
+generalGammaDistributionFixedCategories.o generalGammaDistributionFixedCategories.debug.o: \
+ generalGammaDistributionFixedCategories.cpp \
+ generalGammaDistributionFixedCategories.h definitions.h \
+ generalGammaDistribution.h distribution.h errorMsg.h gammaUtilities.h \
+ numRec.h uniformDistribution.h logFile.h
+gammaDistribution.o gammaDistribution.debug.o: gammaDistribution.cpp definitions.h \
+ gammaDistribution.h generalGammaDistribution.h distribution.h errorMsg.h \
+ gammaUtilities.h numRec.h uniformDistribution.h logFile.h
+gammaUtilities.o gammaUtilities.debug.o: gammaUtilities.cpp gammaUtilities.h definitions.h \
+ numRec.h errorMsg.h uniformDistribution.h distribution.h logFile.h
+generalGammaDistribution.o generalGammaDistribution.debug.o: generalGammaDistribution.cpp \
+ generalGammaDistribution.h definitions.h distribution.h gammaUtilities.h \
+ numRec.h errorMsg.h uniformDistribution.h logFile.h
+getRandomWeights.o getRandomWeights.debug.o: getRandomWeights.cpp getRandomWeights.h definitions.h \
+ talRandom.h logFile.h
+goldmanYangModel.o goldmanYangModel.debug.o: goldmanYangModel.cpp goldmanYangModel.h definitions.h \
+ replacementModel.h fromQtoPt.h granthamChemicalDistances.h codon.h \
+ errorMsg.h someUtil.h logFile.h alphabet.h geneticCodeHolder.h \
+ readDatMatrix.h datMatrixHolder.h
+granthamChemicalDistances.o granthamChemicalDistances.debug.o: granthamChemicalDistances.cpp \
+ granthamChemicalDistances.h definitions.h
+hky.o hky.debug.o: hky.cpp hky.h replacementModel.h definitions.h errorMsg.h
+khTest.o khTest.debug.o: khTest.cpp definitions.h logFile.h errorMsg.h
+likeDist.o likeDist.debug.o: likeDist.cpp likeDist.h definitions.h countTableComponent.h \
+ distanceMethod.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h jcDistance.h unObservableData.h tree.h \
+ readTree.h sequenceContainer.h gainLossAlphabet.h computePijComponent.h \
+ numRec.h uniformDistribution.h
+likeDistfixRoot.o likeDistfixRoot.debug.o: likeDistfixRoot.cpp likeDistfixRoot.h definitions.h \
+ countTableComponent.h distanceMethod.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h jcDistance.h sequenceContainer.h \
+ gainLossAlphabet.h unObservableData.h tree.h readTree.h \
+ computePijComponent.h numRec.h uniformDistribution.h
+likeDistProp.o likeDistProp.debug.o: likeDistProp.cpp likeDistProp.h definitions.h \
+ countTableComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h numRec.h errorMsg.h \
+ uniformDistribution.h logFile.h
+likelihoodComputation.o likelihoodComputation.debug.o: likelihoodComputation.cpp definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h computeUpAlg.h suffStatComponent.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h computePijComponent.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h \
+ likelihoodComputation.h unObservableData.h gammaUtilities.h numRec.h \
+ uniformDistribution.h
+likelihoodComputationFactors.o likelihoodComputationFactors.debug.o: likelihoodComputationFactors.cpp \
+ definitions.h tree.h readTree.h errorMsg.h logFile.h computeUpAlg.h \
+ suffStatComponent.h sequenceContainer.h sequence.h alphabet.h \
+ mulAlphabet.h someUtil.h gainLossAlphabet.h computePijComponent.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ likelihoodComputationFactors.h
+logFile.o logFile.debug.o: logFile.cpp logFile.h errorMsg.h
+maseFormat.o maseFormat.debug.o: maseFormat.cpp maseFormat.h sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h
+molphyFormat.o molphyFormat.debug.o: molphyFormat.cpp molphyFormat.h sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h
+nexusFormat.o nexusFormat.debug.o: nexusFormat.cpp nexusFormat.h sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h
+nj.o nj.debug.o: nj.cpp nj.h definitions.h tree.h readTree.h errorMsg.h logFile.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h njConstrain.h distances2Tree.h treeUtil.h
+njConstrain.o njConstrain.debug.o: njConstrain.cpp definitions.h njConstrain.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h tree.h readTree.h
+nucJC.o nucJC.debug.o: nucJC.cpp nucJC.h replacementModel.h definitions.h
+nucleotide.o nucleotide.debug.o: nucleotide.cpp nucleotide.h definitions.h alphabet.h \
+ errorMsg.h
+numRec.o numRec.debug.o: numRec.cpp numRec.h definitions.h errorMsg.h \
+ uniformDistribution.h distribution.h logFile.h matrixUtils.h
+Parameters.o Parameters.debug.o: Parameters.cpp Parameters.h ConversionUtils.h definitions.h
+phylipFormat.o phylipFormat.debug.o: phylipFormat.cpp phylipFormat.h definitions.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h
+pijAccelerator.o pijAccelerator.debug.o: pijAccelerator.cpp pijAccelerator.h definitions.h \
+ replacementModel.h
+readDatMatrix.o readDatMatrix.debug.o: readDatMatrix.cpp readDatMatrix.h definitions.h \
+ datMatrixHolder.h replacementModel.h errorMsg.h logFile.h fromQtoPt.h
+readTree.o readTree.debug.o: readTree.cpp definitions.h errorMsg.h someUtil.h logFile.h \
+ alphabet.h readTree.h
+recognizeFormat.o recognizeFormat.debug.o: recognizeFormat.cpp recognizeFormat.h \
+ sequenceContainer.h definitions.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h gainLossAlphabet.h maseFormat.h \
+ molphyFormat.h phylipFormat.h nexusFormat.h fastaFormat.h \
+ clustalFormat.h phylipSequentialFormat.h
+replacementModel.o replacementModel.debug.o: replacementModel.cpp replacementModel.h definitions.h
+searchStatus.o searchStatus.debug.o: searchStatus.cpp searchStatus.h definitions.h
+seqContainerTreeMap.o seqContainerTreeMap.debug.o: seqContainerTreeMap.cpp seqContainerTreeMap.h \
+ definitions.h tree.h readTree.h errorMsg.h logFile.h treeIt.h \
+ sequenceContainer.h sequence.h alphabet.h mulAlphabet.h someUtil.h \
+ gainLossAlphabet.h
+sequence.o sequence.debug.o: sequence.cpp sequence.h definitions.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h
+sequenceContainer.o sequenceContainer.debug.o: sequenceContainer.cpp sequenceContainer.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h gainLossAlphabet.h
+simulateTree.o simulateTree.debug.o: simulateTree.cpp definitions.h treeUtil.h tree.h \
+ readTree.h errorMsg.h logFile.h simulateTree.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h sequenceContainer.h \
+ sequence.h alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ talRandom.h gammaDistribution.h generalGammaDistribution.h codon.h \
+ geneticCodeHolder.h
+siteSpecificRate.o siteSpecificRate.debug.o: siteSpecificRate.cpp siteSpecificRate.h definitions.h \
+ tree.h readTree.h errorMsg.h logFile.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ computePijComponent.h unObservableData.h numRec.h uniformDistribution.h \
+ checkcovFanctors.h likelihoodComputation.h suffStatComponent.h
+someUtil.o someUtil.debug.o: someUtil.cpp someUtil.h logFile.h definitions.h alphabet.h \
+ errorMsg.h talRandom.h
+split.o split.debug.o: split.cpp split.h definitions.h
+splitMap.o splitMap.debug.o: splitMap.cpp splitMap.h definitions.h split.h
+splitTreeUtil.o splitTreeUtil.debug.o: splitTreeUtil.cpp splitTreeUtil.h tree.h definitions.h \
+ readTree.h errorMsg.h logFile.h split.h someUtil.h alphabet.h
+stochasticProcess.o stochasticProcess.debug.o: stochasticProcess.cpp stochasticProcess.h \
+ pijAccelerator.h definitions.h replacementModel.h distribution.h \
+ errorMsg.h
+suffStatComponent.o suffStatComponent.debug.o: suffStatComponent.cpp suffStatComponent.h \
+ definitions.h
+talRandom.o talRandom.debug.o: talRandom.cpp talRandom.h definitions.h logFile.h
+tree.o tree.debug.o: tree.cpp definitions.h tree.h readTree.h errorMsg.h logFile.h \
+ treeUtil.h someUtil.h alphabet.h
+treeIt.o treeIt.debug.o: treeIt.cpp definitions.h treeIt.h errorMsg.h tree.h readTree.h \
+ logFile.h
+treeUtil.o treeUtil.debug.o: treeUtil.cpp definitions.h treeUtil.h tree.h readTree.h \
+ errorMsg.h logFile.h treeIt.h
+uniDistribution.o uniDistribution.debug.o: uniDistribution.cpp uniDistribution.h distribution.h \
+ definitions.h errorMsg.h
+uniformDistribution.o uniformDistribution.debug.o: uniformDistribution.cpp uniformDistribution.h \
+ definitions.h distribution.h
+cmdline2EvolObjs.o cmdline2EvolObjs.debug.o: cmdline2EvolObjs.cpp cmdline2EvolObjs.h amino.h \
+ definitions.h errorMsg.h alphabet.h geneticCodeHolder.h codon.h \
+ someUtil.h logFile.h nucleotide.h sequenceContainer.h sequence.h \
+ mulAlphabet.h gainLossAlphabet.h tree.h readTree.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h uniDistribution.h \
+ trivialAccelerator.h alphaTrivialAccelerator.h readDatMatrix.h \
+ datMatrixHolder.h chebyshevAccelerator.h talRandom.h nucJC.h aaJC.h \
+ hky.h tamura92.h gtrModel.h fromQtoPt.h gammaDistribution.h \
+ generalGammaDistribution.h recognizeFormat.h replacementModelSSRV.h \
+ stochasticProcessSSRV.h
+generalGammaDistributionLaguerre.o generalGammaDistributionLaguerre.debug.o: generalGammaDistributionLaguerre.cpp \
+ generalGammaDistributionLaguerre.h definitions.h \
+ generalGammaDistribution.h distribution.h gammaUtilities.h numRec.h \
+ errorMsg.h uniformDistribution.h logFile.h GLaguer.h
+gammaDistributionLaguerre.o gammaDistributionLaguerre.debug.o: gammaDistributionLaguerre.cpp \
+ gammaDistributionLaguerre.h definitions.h \
+ generalGammaDistributionLaguerre.h generalGammaDistribution.h \
+ distribution.h errorMsg.h gammaUtilities.h numRec.h \
+ uniformDistribution.h logFile.h
+GLaguer.o GLaguer.debug.o: GLaguer.cpp definitions.h GLaguer.h errorMsg.h \
+ gammaUtilities.h numRec.h uniformDistribution.h distribution.h logFile.h
+givenRatesMLDistance.o givenRatesMLDistance.debug.o: givenRatesMLDistance.cpp givenRatesMLDistance.h \
+ definitions.h countTableComponent.h likeDist.h distanceMethod.h \
+ sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ jcDistance.h unObservableData.h tree.h readTree.h sequenceContainer.h \
+ gainLossAlphabet.h computePijComponent.h numRec.h uniformDistribution.h
+distanceBasedSeqs2Tree.o distanceBasedSeqs2Tree.debug.o: distanceBasedSeqs2Tree.cpp \
+ distanceBasedSeqs2Tree.h distanceMethod.h definitions.h sequence.h \
+ errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ sequenceContainer.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h likeDist.h \
+ countTableComponent.h jcDistance.h unObservableData.h tree.h readTree.h \
+ computePijComponent.h distances2Tree.h givenRatesMLDistance.h \
+ posteriorDistance.h gammaDistribution.h generalGammaDistribution.h \
+ uniDistribution.h distanceTable.h bestAlpha.h likelihoodComputation.h \
+ suffStatComponent.h siteSpecificRate.h bblEM.h tamura92.h \
+ bestTamura92param.h bestGtrModelParams.h gtrModel.h fromQtoPt.h \
+ replacementModelSSRV.h trivialAccelerator.h
+posteriorDistance.o posteriorDistance.debug.o: posteriorDistance.cpp posteriorDistance.h likeDist.h \
+ definitions.h countTableComponent.h distanceMethod.h sequence.h \
+ errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ stochasticProcess.h pijAccelerator.h replacementModel.h distribution.h \
+ jcDistance.h unObservableData.h tree.h readTree.h sequenceContainer.h \
+ gainLossAlphabet.h computePijComponent.h gammaDistribution.h \
+ generalGammaDistribution.h numRec.h uniformDistribution.h \
+ uniDistribution.h
+pairwiseGammaDistance.o pairwiseGammaDistance.debug.o: pairwiseGammaDistance.cpp \
+ pairwiseGammaDistance.h likeDist.h definitions.h countTableComponent.h \
+ distanceMethod.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h jcDistance.h unObservableData.h tree.h \
+ readTree.h sequenceContainer.h gainLossAlphabet.h computePijComponent.h \
+ gammaDistribution.h generalGammaDistribution.h numRec.h \
+ uniformDistribution.h uniDistribution.h
+doubleRep.o doubleRep.debug.o: doubleRep.cpp
+logRep.o logRep.debug.o: logRep.cpp
+indel.o indel.debug.o: indel.cpp indel.h definitions.h errorMsg.h alphabet.h
+indelModel.o indelModel.debug.o: indelModel.cpp indelModel.h replacementModel.h \
+ definitions.h
+mulAlphabet.o mulAlphabet.debug.o: mulAlphabet.cpp mulAlphabet.h definitions.h alphabet.h \
+ someUtil.h logFile.h distribution.h errorMsg.h
+replacementModelSSRV.o replacementModelSSRV.debug.o: replacementModelSSRV.cpp replacementModelSSRV.h \
+ replacementModel.h definitions.h distribution.h fromQtoPt.h errorMsg.h \
+ logFile.h
+stochasticProcessSSRV.o stochasticProcessSSRV.debug.o: stochasticProcessSSRV.cpp \
+ stochasticProcessSSRV.h stochasticProcess.h pijAccelerator.h \
+ definitions.h replacementModel.h distribution.h replacementModelSSRV.h \
+ fromQtoPt.h errorMsg.h
+bestAlphaAndNu.o bestAlphaAndNu.debug.o: bestAlphaAndNu.cpp bestAlphaAndNu.h definitions.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h gammaDistribution.h \
+ generalGammaDistribution.h tree.h readTree.h replacementModelSSRV.h \
+ fromQtoPt.h tamura92.h stochasticProcessSSRV.h C_evalParamUSSRV.h \
+ likelihoodComputation.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h likelihoodComputation2USSRV.h ussrvModel.h \
+ computeUpAlg.h bestAlpha.h numRec.h uniformDistribution.h bblEM.h \
+ countTableComponent.h
+C_evalParamUSSRV.o C_evalParamUSSRV.debug.o: C_evalParamUSSRV.cpp C_evalParamUSSRV.h definitions.h \
+ likelihoodComputation.h computePijComponent.h tree.h readTree.h \
+ errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h likelihoodComputation2USSRV.h \
+ ussrvModel.h stochasticProcessSSRV.h replacementModelSSRV.h fromQtoPt.h \
+ gammaDistribution.h generalGammaDistribution.h computeUpAlg.h tamura92.h
+matrixUtils.o matrixUtils.debug.o: matrixUtils.cpp matrixUtils.h definitions.h logFile.h \
+ errorMsg.h
+betaOmegaDistribution.o betaOmegaDistribution.debug.o: betaOmegaDistribution.cpp \
+ betaOmegaDistribution.h definitions.h distribution.h betaDistribution.h \
+ logFile.h gammaUtilities.h numRec.h errorMsg.h uniformDistribution.h \
+ betaUtilities.h
+betaUtilities.o betaUtilities.debug.o: betaUtilities.cpp definitions.h betaUtilities.h numRec.h \
+ errorMsg.h uniformDistribution.h distribution.h logFile.h \
+ gammaUtilities.h
+betaDistribution.o betaDistribution.debug.o: betaDistribution.cpp betaDistribution.h definitions.h \
+ distribution.h gammaUtilities.h numRec.h errorMsg.h \
+ uniformDistribution.h logFile.h betaUtilities.h
+geneticCodeHolder.o geneticCodeHolder.debug.o: geneticCodeHolder.cpp geneticCodeHolder.h \
+ replacementMatrixSource/nuclearStandard.code \
+ replacementMatrixSource/nuclearEuplotid.code \
+ replacementMatrixSource/nuclearCiliate.code \
+ replacementMatrixSource/nuclearBlepharisma.code \
+ replacementMatrixSource/mitochondriaYeast.code \
+ replacementMatrixSource/mitochondriaVertebrate.code \
+ replacementMatrixSource/mitochondriaProtozoan.code \
+ replacementMatrixSource/mitochondriaInvertebrate.code \
+ replacementMatrixSource/mitochondriaFlatworm.code \
+ replacementMatrixSource/mitochondriaEchinoderm.code \
+ replacementMatrixSource/mitochondriaAscidian.code
+samplingSequences.o samplingSequences.debug.o: samplingSequences.cpp samplingSequences.h \
+ definitions.h distanceMethod.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h sequenceContainer.h \
+ gainLossAlphabet.h pDistance.h talRandom.h
+bblEM2USSRV.o bblEM2USSRV.debug.o: bblEM2USSRV.cpp bblEM2USSRV.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ ussrvModel.h stochasticProcessSSRV.h replacementModelSSRV.h fromQtoPt.h \
+ gammaDistribution.h generalGammaDistribution.h computeUpAlg.h \
+ computeDownAlg.h computeCounts.h treeIt.h \
+ fromCountTableComponentToDistance2USSRV.h likeDist2USSRV.h \
+ distanceMethod.h likelihoodComputation2USSRV.h likelihoodComputation.h \
+ unObservableData.h
+bestParamUSSRV.o bestParamUSSRV.debug.o: bestParamUSSRV.cpp bestParamUSSRV.h definitions.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h gammaDistribution.h \
+ generalGammaDistribution.h tree.h readTree.h replacementModelSSRV.h \
+ fromQtoPt.h stochasticProcessSSRV.h C_evalParamUSSRV.h \
+ likelihoodComputation.h computePijComponent.h suffStatComponent.h \
+ unObservableData.h likelihoodComputation2USSRV.h ussrvModel.h \
+ computeUpAlg.h tamura92.h bestAlpha.h numRec.h uniformDistribution.h \
+ bblEM.h countTableComponent.h bestAlphaAndNu.h bblEM2USSRV.h \
+ computeDownAlg.h computeCounts.h treeIt.h \
+ fromCountTableComponentToDistance2USSRV.h likeDist2USSRV.h \
+ distanceMethod.h
+likeDist2USSRV.o likeDist2USSRV.debug.o: likeDist2USSRV.cpp likeDist2USSRV.h definitions.h \
+ countTableComponent.h distanceMethod.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h ussrvModel.h stochasticProcessSSRV.h \
+ replacementModelSSRV.h fromQtoPt.h gammaDistribution.h \
+ generalGammaDistribution.h numRec.h uniformDistribution.h
+ussrvModel.o ussrvModel.debug.o: ussrvModel.cpp ussrvModel.h stochasticProcessSSRV.h \
+ stochasticProcess.h pijAccelerator.h definitions.h replacementModel.h \
+ distribution.h replacementModelSSRV.h fromQtoPt.h errorMsg.h \
+ gammaDistribution.h generalGammaDistribution.h logFile.h
+likelihoodComputation2USSRV.o likelihoodComputation2USSRV.debug.o: likelihoodComputation2USSRV.cpp \
+ likelihoodComputation2USSRV.h definitions.h computePijComponent.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h ussrvModel.h stochasticProcessSSRV.h \
+ replacementModelSSRV.h fromQtoPt.h gammaDistribution.h \
+ generalGammaDistribution.h computeUpAlg.h likelihoodComputation.h \
+ unObservableData.h
+fromCountTableComponentToDistance2USSRV.o fromCountTableComponentToDistance2USSRV.debug.o: \
+ fromCountTableComponentToDistance2USSRV.cpp \
+ fromCountTableComponentToDistance2USSRV.h definitions.h \
+ countTableComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h ussrvModel.h stochasticProcessSSRV.h \
+ replacementModelSSRV.h fromQtoPt.h errorMsg.h gammaDistribution.h \
+ generalGammaDistribution.h logFile.h likeDist2USSRV.h distanceMethod.h \
+ sequence.h alphabet.h mulAlphabet.h someUtil.h likeDist.h jcDistance.h \
+ unObservableData.h tree.h readTree.h sequenceContainer.h \
+ gainLossAlphabet.h computePijComponent.h
+normalDist.o normalDist.debug.o: normalDist.cpp normalDist.h definitions.h
+tamura92.o tamura92.debug.o: tamura92.cpp tamura92.h replacementModel.h definitions.h \
+ errorMsg.h
+bestTamura92param.o bestTamura92param.debug.o: bestTamura92param.cpp bestTamura92param.h \
+ definitions.h likelihoodComputation.h computePijComponent.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h gammaDistribution.h \
+ generalGammaDistribution.h tamura92.h bblEM.h countTableComponent.h \
+ numRec.h uniformDistribution.h bestAlpha.h
+phylipSequentialFormat.o phylipSequentialFormat.debug.o: phylipSequentialFormat.cpp \
+ phylipSequentialFormat.h definitions.h sequenceContainer.h sequence.h \
+ errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ gainLossAlphabet.h
+simulateCodonsJumps.o simulateCodonsJumps.debug.o: simulateCodonsJumps.cpp simulateCodonsJumps.h \
+ simulateJumpsAbstract.h definitions.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h alphabet.h talRandom.h someUtil.h codon.h \
+ geneticCodeHolder.h
+simulateJumpsAbstract.o simulateJumpsAbstract.debug.o: simulateJumpsAbstract.cpp \
+ simulateJumpsAbstract.h definitions.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h alphabet.h
+ssrvDistanceSeqs2Tree.o ssrvDistanceSeqs2Tree.debug.o: ssrvDistanceSeqs2Tree.cpp \
+ ssrvDistanceSeqs2Tree.h distanceBasedSeqs2Tree.h distanceMethod.h \
+ definitions.h sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h \
+ logFile.h sequenceContainer.h gainLossAlphabet.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h likeDist.h \
+ countTableComponent.h jcDistance.h unObservableData.h tree.h readTree.h \
+ computePijComponent.h distances2Tree.h givenRatesMLDistance.h \
+ posteriorDistance.h gammaDistribution.h generalGammaDistribution.h \
+ bestParamUSSRV.h replacementModelSSRV.h fromQtoPt.h \
+ stochasticProcessSSRV.h C_evalParamUSSRV.h likelihoodComputation.h \
+ suffStatComponent.h likelihoodComputation2USSRV.h ussrvModel.h \
+ computeUpAlg.h tamura92.h bestAlpha.h numRec.h uniformDistribution.h \
+ bblEM.h bestAlphaAndNu.h bblEM2USSRV.h computeDownAlg.h computeCounts.h \
+ treeIt.h fromCountTableComponentToDistance2USSRV.h likeDist2USSRV.h
+multipleStochasticProcess.o multipleStochasticProcess.debug.o: multipleStochasticProcess.cpp \
+ multipleStochasticProcess.h stochasticProcess.h pijAccelerator.h \
+ definitions.h replacementModel.h distribution.h errorMsg.h
+distributionPlusInvariant.o distributionPlusInvariant.debug.o: distributionPlusInvariant.cpp definitions.h \
+ distributionPlusInvariant.h distribution.h errorMsg.h logFile.h
+gammaDistributionFixedCategories.o gammaDistributionFixedCategories.debug.o: gammaDistributionFixedCategories.cpp \
+ gammaDistributionFixedCategories.h definitions.h \
+ generalGammaDistributionFixedCategories.h generalGammaDistribution.h \
+ distribution.h errorMsg.h gammaUtilities.h numRec.h \
+ uniformDistribution.h logFile.h matrixUtils.h
+generalGammaDistributionPlusInvariant.o generalGammaDistributionPlusInvariant.debug.o: \
+ generalGammaDistributionPlusInvariant.cpp \
+ generalGammaDistributionPlusInvariant.h definitions.h \
+ distributionPlusInvariant.h distribution.h generalGammaDistribution.h \
+ errorMsg.h gammaUtilities.h numRec.h uniformDistribution.h logFile.h
+gammaDistributionPlusInvariant.o gammaDistributionPlusInvariant.debug.o: gammaDistributionPlusInvariant.cpp \
+ gammaDistributionPlusInvariant.h definitions.h \
+ distributionPlusInvariant.h distribution.h gammaDistribution.h \
+ generalGammaDistribution.h errorMsg.h gammaUtilities.h numRec.h \
+ uniformDistribution.h logFile.h
+distributionPlusCategory.o distributionPlusCategory.debug.o: distributionPlusCategory.cpp \
+ distributionPlusCategory.h definitions.h distribution.h logFile.h \
+ errorMsg.h
+simulateJumps.o simulateJumps.debug.o: simulateJumps.cpp simulateJumps.h \
+ simulateJumpsAbstract.h definitions.h tree.h readTree.h errorMsg.h \
+ logFile.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h alphabet.h talRandom.h someUtil.h
+computeJumps.o computeJumps.debug.o: computeJumps.cpp computeJumps.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h alphabet.h talRandom.h someUtil.h \
+ matrixUtils.h
+seqeuncesFilter.o seqeuncesFilter.debug.o: seqeuncesFilter.cpp seqeuncesFilter.h definitions.h \
+ sequenceContainer.h sequence.h errorMsg.h alphabet.h mulAlphabet.h \
+ someUtil.h logFile.h gainLossAlphabet.h codon.h geneticCodeHolder.h \
+ amino.h fastaFormat.h nucleotide.h
+optGammaMixtureLS.o optGammaMixtureLS.debug.o: optGammaMixtureLS.cpp optGammaMixtureLS.h \
+ definitions.h suffStatGammaMixture.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h sequenceContainer.h \
+ sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ gainLossAlphabet.h tree.h readTree.h mixtureDistribution.h \
+ generalGammaDistribution.h computePijComponent.h gammaUtilities.h \
+ numRec.h uniformDistribution.h likelihoodComputation.h \
+ suffStatComponent.h unObservableData.h
+mixtureDistribution.o mixtureDistribution.debug.o: mixtureDistribution.cpp mixtureDistribution.h \
+ definitions.h generalGammaDistribution.h distribution.h \
+ generalGammaDistributionLaguerre.h talRandom.h logFile.h someUtil.h \
+ alphabet.h errorMsg.h
+suffStatGammaMixture.o suffStatGammaMixture.debug.o: suffStatGammaMixture.cpp suffStatGammaMixture.h \
+ definitions.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h sequenceContainer.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h gainLossAlphabet.h tree.h readTree.h \
+ mixtureDistribution.h generalGammaDistribution.h computePijComponent.h \
+ likelihoodComputation.h suffStatComponent.h unObservableData.h \
+ gammaUtilities.h numRec.h uniformDistribution.h uniDistribution.h
+GamMixtureOptimizer.o GamMixtureOptimizer.debug.o: GamMixtureOptimizer.cpp GamMixtureOptimizer.h \
+ definitions.h stochasticProcess.h pijAccelerator.h replacementModel.h \
+ distribution.h sequenceContainer.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h gainLossAlphabet.h tree.h readTree.h \
+ mixtureDistribution.h generalGammaDistribution.h unObservableData.h \
+ computePijComponent.h optGammaMixtureEM.h suffStatGammaMixture.h \
+ gammaUtilities.h numRec.h uniformDistribution.h optGammaMixtureLS.h \
+ likelihoodComputation.h suffStatComponent.h
+optGammaMixtureEM.o optGammaMixtureEM.debug.o: optGammaMixtureEM.cpp optGammaMixtureEM.h \
+ definitions.h suffStatGammaMixture.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h sequenceContainer.h \
+ sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ gainLossAlphabet.h tree.h readTree.h mixtureDistribution.h \
+ generalGammaDistribution.h computePijComponent.h gammaUtilities.h \
+ numRec.h uniformDistribution.h likelihoodComputation.h \
+ suffStatComponent.h unObservableData.h uniDistribution.h
+gainLossAlphabet.o gainLossAlphabet.debug.o: gainLossAlphabet.cpp gainLossAlphabet.h alphabet.h \
+ errorMsg.h
+wYangModel.o wYangModel.debug.o: wYangModel.cpp wYangModel.h replacementModel.h \
+ definitions.h fromQtoPt.h codon.h errorMsg.h someUtil.h logFile.h \
+ alphabet.h geneticCodeHolder.h readDatMatrix.h datMatrixHolder.h
+codonUtils.o codonUtils.debug.o: codonUtils.cpp codonUtils.h nucleotide.h definitions.h \
+ alphabet.h codon.h errorMsg.h someUtil.h logFile.h geneticCodeHolder.h \
+ amino.h fastaFormat.h sequenceContainer.h sequence.h mulAlphabet.h \
+ gainLossAlphabet.h clustalFormat.h recognizeFormat.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h wYangModel.h \
+ fromQtoPt.h evaluateCharacterFreq.h numRec.h uniformDistribution.h
+likelihoodComputation2Codon.o likelihoodComputation2Codon.debug.o: likelihoodComputation2Codon.cpp \
+ likelihoodComputation2Codon.h definitions.h computePijComponent.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h wYangModel.h fromQtoPt.h codon.h geneticCodeHolder.h \
+ computeUpAlg.h likelihoodComputation.h unObservableData.h
+likeDist2Codon.o likeDist2Codon.debug.o: likeDist2Codon.cpp likeDist2Codon.h definitions.h \
+ countTableComponent.h distanceMethod.h sequence.h errorMsg.h alphabet.h \
+ mulAlphabet.h someUtil.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h wYangModel.h fromQtoPt.h codon.h \
+ geneticCodeHolder.h numRec.h uniformDistribution.h
+unObservableData.o unObservableData.debug.o: unObservableData.cpp unObservableData.h definitions.h \
+ tree.h readTree.h errorMsg.h logFile.h stochasticProcess.h \
+ pijAccelerator.h replacementModel.h distribution.h sequenceContainer.h \
+ sequence.h alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ computePijComponent.h likelihoodComputation.h suffStatComponent.h \
+ likelihoodComputationGL.h computeUpAlg.h
+likelihoodComputationGL.o likelihoodComputationGL.debug.o: likelihoodComputationGL.cpp \
+ likelihoodComputationGL.h definitions.h computePijComponent.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h computeUpAlg.h \
+ likelihoodComputation.h
+threeStateModel.o threeStateModel.debug.o: threeStateModel.cpp threeStateModel.h definitions.h \
+ replacementModel.h fromQtoPt.h errorMsg.h matrixUtils.h logFile.h \
+ someUtil.h alphabet.h
+threeStateAlphabet.o threeStateAlphabet.debug.o: threeStateAlphabet.cpp threeStateAlphabet.h \
+ alphabet.h errorMsg.h
+betaDistributionFixedCategories.o betaDistributionFixedCategories.debug.o: betaDistributionFixedCategories.cpp \
+ betaDistributionFixedCategories.h definitions.h betaDistribution.h \
+ distribution.h errorMsg.h gammaUtilities.h numRec.h \
+ uniformDistribution.h logFile.h
+betaDistributionFixedCategoriesWithOmegaUniform.o betaDistributionFixedCategoriesWithOmegaUniform.debug.o: \
+ betaDistributionFixedCategoriesWithOmegaUniform.cpp \
+ betaDistributionFixedCategoriesWithOmegaUniform.h definitions.h \
+ betaDistributionFixedCategories.h betaDistribution.h distribution.h \
+ errorMsg.h uniformDistribution.h gammaUtilities.h numRec.h logFile.h \
+ matrixUtils.h
+bblEM2codon.o bblEM2codon.debug.o: bblEM2codon.cpp bblEM2codon.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ countTableComponent.h computePijComponent.h suffStatComponent.h \
+ likelihoodComputation.h unObservableData.h likelihoodComputation2Codon.h \
+ fromCountTableComponentToDistance2Codon.h computeUpAlg.h \
+ computeDownAlg.h computeCounts.h treeIt.h
+bestAlphaAndK.o bestAlphaAndK.debug.o: bestAlphaAndK.cpp bestAlphaAndK.h definitions.h tree.h \
+ readTree.h errorMsg.h logFile.h likelihoodComputation.h \
+ computePijComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h likelihoodComputation2Codon.h \
+ generalGammaDistribution.h wYangModel.h fromQtoPt.h codon.h \
+ geneticCodeHolder.h bblEM2codon.h countTableComponent.h computeUpAlg.h \
+ numRec.h uniformDistribution.h betaOmegaDistribution.h \
+ betaDistribution.h codonUtils.h nucleotide.h amino.h fastaFormat.h \
+ clustalFormat.h recognizeFormat.h evaluateCharacterFreq.h
+fromCountTableComponentToDistance2Codon.o fromCountTableComponentToDistance2Codon.debug.o: \
+ fromCountTableComponentToDistance2Codon.cpp \
+ fromCountTableComponentToDistance2Codon.h definitions.h \
+ countTableComponent.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h likeDist2Codon.h distanceMethod.h \
+ sequence.h errorMsg.h alphabet.h mulAlphabet.h someUtil.h logFile.h \
+ wYangModel.h fromQtoPt.h codon.h geneticCodeHolder.h likeDist.h \
+ jcDistance.h unObservableData.h tree.h readTree.h sequenceContainer.h \
+ gainLossAlphabet.h computePijComponent.h
+gtrModel.o gtrModel.debug.o: gtrModel.cpp gtrModel.h replacementModel.h definitions.h \
+ fromQtoPt.h readDatMatrix.h datMatrixHolder.h errorMsg.h matrixUtils.h \
+ logFile.h
+bestGtrModelParams.o bestGtrModelParams.debug.o: bestGtrModelParams.cpp bestGtrModelParams.h \
+ definitions.h likelihoodComputation.h computePijComponent.h tree.h \
+ readTree.h errorMsg.h logFile.h stochasticProcess.h pijAccelerator.h \
+ replacementModel.h distribution.h sequenceContainer.h sequence.h \
+ alphabet.h mulAlphabet.h someUtil.h gainLossAlphabet.h \
+ suffStatComponent.h unObservableData.h gammaDistribution.h \
+ generalGammaDistribution.h gtrModel.h fromQtoPt.h bblEM.h \
+ countTableComponent.h numRec.h uniformDistribution.h bestAlpha.h
diff --git a/libs/phylogeny/maseFormat.cpp b/libs/phylogeny/maseFormat.cpp
new file mode 100644
index 0000000..e97883f
--- /dev/null
+++ b/libs/phylogeny/maseFormat.cpp
@@ -0,0 +1,86 @@
+// $Id: maseFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "maseFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+
+sequenceContainer maseFormat::read(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+
+sequenceContainer maseFormat::readUnAligned(istream &infile, const alphabet* alph) {
+ if (!infile) {
+ errorMsg::reportError("unable to read mase format, could not open file");
+ }
+ sequenceContainer mySeqData;;
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+
+ vector<string>::const_iterator it1;
+ for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ++it1) {
+ if (it1->empty()) continue; // empty line continue
+ if (it1->size()>1) {
+ if ( ((*it1)[0] == ';') && ((*it1)[1] == ';')) {// general file remarks
+ mySeqData.addGeneralRemark(*it1);
+ }
+ }
+ }
+ int localid=0;
+ for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
+ if (it1->empty()) {++it1;continue; }// empty line continue
+ if (it1->size()>1) {
+ if ( ((*it1)[0] == ';') && ((*it1)[1] == ';')) {// general file remarks
+ ++it1;continue;
+ }
+ }
+
+ string remark;
+ string name;
+ string seqStr;
+ if ((*it1)[0] != ';') {
+ LOG(5,<<"problem in line: "<<*it1<<endl);
+ errorMsg::reportError("Error reading mase file, error finding sequence remark",1);
+ }
+ if ((*it1)[0] == ';') {remark += *it1;++it1;}
+ while ((*it1)[0] == ';') {
+ remark += "\n";
+ remark += *it1;
+ ++it1;
+ }
+ while (it1->empty()) it1++; // empty line continue
+ name = *it1;
+ ++it1;
+
+ while (it1!= seqFileData.end()) {
+ if ((*it1)[0] == ';') break;
+ // the following lines are taking care of a format which is like "10 aact"
+ // in mase format
+ string withoutNumberAndSpaces =
+ takeCharOutOfString("0123456789 ",*it1);
+ seqStr+=withoutNumberAndSpaces;
+ ++it1;
+ }
+ mySeqData.add(sequence(seqStr,name,remark,localid,alph));
+ localid++;
+ }
+
+ return mySeqData;
+}
+
+void maseFormat::write(ostream &out, const sequenceContainer& sd) {
+ vector<string> gfr = sd.getGeneralRemarks();
+
+ if (gfr.empty()) out<<";;\n;;\n";
+ for (vector<string>::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k )
+ out<<(*k)<<endl;
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+ if ((*it5).remark().size() > 0) out<<";"<<(*it5).remark()<<endl;
+ else out<<";\n";
+ out<<it5->name()<<endl;
+ out<<it5->toString()<<endl;
+ }
+}
+
diff --git a/libs/phylogeny/maseFormat.h b/libs/phylogeny/maseFormat.h
new file mode 100644
index 0000000..806ddf9
--- /dev/null
+++ b/libs/phylogeny/maseFormat.h
@@ -0,0 +1,42 @@
+// $Id: maseFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___MASE_FORMAT
+#define ___MASE_FORMAT
+
+#include "sequenceContainer.h"
+
+class maseFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF THE FORMAT:
+
+;;this is the place for general remarks.
+;here we put sequence specific remark.
+Langur
+KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
+;
+Baboon
+KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
+;
+Human
+KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
+;
+Rat
+KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
+;
+Cow
+KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
+;
+Horse
+KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
+
+*/
+
+
diff --git a/libs/phylogeny/matrixUtils.cpp b/libs/phylogeny/matrixUtils.cpp
new file mode 100644
index 0000000..1f08a2f
--- /dev/null
+++ b/libs/phylogeny/matrixUtils.cpp
@@ -0,0 +1,331 @@
+#include "matrixUtils.h"
+#include "errorMsg.h"
+#include <cmath>
+#include <string>
+#include <ctype.h>
+#include <cctype>
+#include <cstdlib>
+
+Vdouble getDiagonalFromMatrix(VVdouble &mat){
+ Vdouble diagonal;
+ for (int i=0; i<mat.size(); i++)
+ diagonal.push_back(mat[i][i]);
+ return diagonal;
+}
+
+Vdouble getSubDiagonalFromMatrix(VVdouble &mat){
+ Vdouble diagonal;
+ for (int i=0; i<mat.size()-1; i++)
+ diagonal.push_back(mat[i+1][i]);
+ return diagonal;
+}
+
+
+
+
+
+
+void readMatrixFromFile(VVdouble &mat,string fileName){
+ ifstream in(fileName.c_str());
+ if (!in){
+ string err="in function readMatrixFromFile, empty file or non-existant:";
+ err+=fileName;
+ errorMsg::reportError(err);
+ }
+ int i=0;
+ mat.resize(1);
+ while (!in.eof()) {
+ string row;
+ int k=0;
+ getline(in,row,'\n');
+ while (k<row.size()){
+ string value;
+ while (row[k]!=' ' && k<row.size()){
+ value+=row[k];
+ k++;
+ }
+ k++;
+ mat[i].push_back(atof(value.c_str()));
+ //j++;
+ //mat.resize(j);
+ }
+ if (!in.eof())
+ mat.resize(++i+1);
+ }
+ in.close();
+}
+
+
+void printMatrix(const VVdouble &mat, ostream &out) {
+ int num=mat.size();
+ for (int row=0; row<num; row++) {
+ for (int position=0; position<mat[row].size(); position++) {
+ out << mat[row][position] << '\t';
+ }
+ out << endl ;
+ }
+ out << endl ;
+
+}
+
+void printMatrix(const VVint &mat, ostream &out) {
+ int num=mat.size();
+ for (int row=0; row<num; row++) {
+ for (int position=0; position<mat[row].size(); position++) {
+ out << mat[row][position] << '\t';
+ }
+ out << endl ;
+ }
+ out << endl ;
+ out<<"---------------------------------------------"<<endl;
+
+}
+
+
+
+VVdouble transpose(const VVdouble &mat){
+ VVdouble matT;
+ int n=mat.size();
+ resizeMatrix(matT,n,n);
+ for (int i=0; i<n;i++){
+ for (int j=0; j<n;j++) {
+ matT[i][j]=mat[j][i];
+ }
+ }
+ return matT;
+}
+
+
+
+
+VVdouble subtract(const VVdouble &mat1,const VVdouble &mat2){
+ VVdouble newMat=add(mat1,reverseSign(mat2));
+ return newMat;
+}
+
+VVdouble reverseSign(const VVdouble &mat1){
+ VVdouble newMat(mat1.size());
+ for (int i=0;i<mat1.size();i++){
+ newMat[i].resize(mat1[i].size());
+ for (int j=0;j<mat1.size();j++){
+ newMat[i][j]=-mat1[i][j];
+ }
+ }
+ return newMat;
+
+}
+
+
+void findMaxInVector(const Vdouble &vec, MDOUBLE &maxValue, int &argmax){
+ MDOUBLE tempMax=VERYSMALL;
+ int tempArgMax=0;
+ for (int i=0; i<vec.size(); i++){
+ if (vec[i]>tempMax){
+ tempMax=vec[i];
+ tempArgMax=i;
+ }
+ }
+ maxValue=tempMax;
+ argmax=tempArgMax;
+}
+
+void findMinInVector(const Vdouble &vec, MDOUBLE &minValue, int &argmin) {
+ Vdouble minusCopy(vec.size());
+ for (int i=0; i<vec.size(); i++){
+ minusCopy[i] = -vec[i];
+ }
+ findMaxInVector(minusCopy, minValue, argmin);
+ minValue = -minValue;
+}
+
+MDOUBLE averageElementInVector(const Vdouble &vec) {
+ MDOUBLE sum=0.0;
+ for (int i=0; i<vec.size(); i++){
+ sum+=vec[i];
+ }
+ return sum/vec.size();
+}
+
+void appendBinaryVectors(Vint &vec1, const Vint &vec2){
+ for (int i=0; i < vec2.size(); i++)
+ if (vec2[i]==1)
+ vec1[i]=1;
+}
+
+void appendVectors(Vint &vec1, const Vint &vec2) {
+ for (int i=0; i<vec2.size();i++)
+ vec1.push_back(vec2[i]);
+}
+
+Vint complementBinaryVec(Vint&bufferVec) {
+ for (int i=0; i<bufferVec.size(); i++)
+ bufferVec[i]=abs(bufferVec[i]-1);
+ return bufferVec;
+}
+
+
+//reads a vertical vector of float numbers(separated by \n)
+void readDoubleVecFromFile(Vdouble &vec,string fileName){
+ ifstream in(fileName.c_str());
+ if (!in){
+ string err="in function readDoubleVecFromFile, empty file or non-existant:";
+ err+=fileName;
+ errorMsg::reportError(err);
+ }
+ string row;
+ while (!in.eof()){
+ getline(in,row,'\n');
+ //if (isalnum(*(row.c_str())) || (row[0]=="."))
+ if (isspace(*(row.c_str())) || row=="") continue;
+ vec.push_back(atof(row.c_str()));
+ }
+
+ in.close();
+}
+
+void normalize(Vdouble &vec){
+ MDOUBLE sum=0.0;
+ MDOUBLE squareSum=0.0;
+ int N=vec.size();
+ int i=0;
+ for (i=0;i<N;i++) sum+=vec[i];
+ for (i=0;i<N;i++) squareSum+=(vec[i]*vec[i]);
+ MDOUBLE avg=sum/N;
+ MDOUBLE sqrAvg=squareSum/N;
+ MDOUBLE stdDev=sqrt(sqrAvg-avg*avg);
+ for (i=0;i<N;i++) vec[i]=(vec[i]-avg)/stdDev;
+
+}
+
+void scaleByAverage(Vdouble &vec){
+ MDOUBLE sum=0.0;
+ MDOUBLE squareSum=0.0;
+ int N=vec.size();
+ int i=0;
+ for (i=0;i<N;i++) sum+=vec[i];
+ for (i=0;i<N;i++) squareSum+=(vec[i]*vec[i]);
+ MDOUBLE avg=sum/N;
+ for (i=0;i<N;i++) vec[i]=(vec[i])/avg;
+}
+
+Vdouble solveLinearEquations(VVdouble A,Vdouble b){
+// VVdouble Acopy=A; //creating a copy, since ludcmp&lubksb destroy the input
+// Vdouble bcopy=b;
+ MDOUBLE d; //required for ludcmp; irrelevant for us.
+ Vdouble indx; //required for ludcmp; irrelevant for us.
+ ludcmp(A,indx,d); //decomposes A into product of diagonal matrices
+ lubksb(A,indx,b); //solves
+ return b;
+}
+
+
+void ludcmp(VVdouble &a, Vdouble &indx, MDOUBLE &d)
+{
+ const MDOUBLE TINY=1.0e-20;
+ int i,imax=0,j,k;
+ MDOUBLE big,dum,sum,temp;
+
+ int n=a.size();
+ Vdouble vv(n);
+ indx.resize(n);//my addition
+ d=1.0;
+ for (i=0;i<n;i++) {
+ big=0.0;
+ for (j=0;j<n;j++)
+ if ((temp=fabs(a[i][j])) > big) big=temp;
+ if (big == 0.0) errorMsg::reportError("Singular matrix in routine ludcmp");
+ vv[i]=1.0/big;
+ }
+ for (j=0;j<n;j++) {
+ for (i=0;i<j;i++) {
+ sum=a[i][j];
+ for (k=0;k<i;k++) sum -= a[i][k]*a[k][j];
+ a[i][j]=sum;
+ }
+ big=0.0;
+ for (i=j;i<n;i++) {
+ sum=a[i][j];
+ for (k=0;k<j;k++) sum -= a[i][k]*a[k][j];
+ a[i][j]=sum;
+ if ((dum=vv[i]*fabs(sum)) >= big) {
+ big=dum;
+ imax=i;
+ }
+ }
+ if (j != imax) {
+ for (k=0;k<n;k++) {
+ dum=a[imax][k];
+ a[imax][k]=a[j][k];
+ a[j][k]=dum;
+ }
+ d = -d;
+ vv[imax]=vv[j];
+ }
+ indx[j]=imax;
+ if (a[j][j] == 0.0) a[j][j]=TINY;
+ if (j != n-1) {
+ dum=1.0/(a[j][j]);
+ for (i=j+1;i<n;i++) a[i][j] *= dum;
+ }
+ }
+}
+
+
+
+void lubksb(VVdouble &a, Vdouble &indx, Vdouble &b)
+{
+ int i,ii=0,ip,j;
+ MDOUBLE sum;
+
+ int n=a.size();
+ for (i=0;i<n;i++) {
+ ip=(int)(indx[i]);
+ sum=b[ip];
+ b[ip]=b[i];
+ if (ii != 0)
+ for (j=ii-1;j<i;j++) sum -= a[i][j]*b[j];
+ else if (sum != 0.0)
+ ii=i+1;
+ b[i]=sum;
+ }
+ for (i=n-1;i>=0;i--) {
+ sum=b[i];
+ for (j=i+1;j<n;j++) sum -= a[i][j]*b[j];
+ b[i]=sum/a[i][i];
+ }
+}
+
+//get the first norm sum{abs(Mij)}
+MDOUBLE getMatrixNorm(const VVdouble &mat) {
+ MDOUBLE res(0.0);
+ for (int i=0; i<mat.size(); i++){
+ for (int j=0; j<mat[i].size();j++){
+ res += fabs(mat[i][j]);
+ }
+ }
+ return res;
+}
+
+/********************************************************************************************
+*********************************************************************************************/
+void resize_VVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor){
+
+ vetor.resize(dim1);
+ for (int posNum=0;posNum<vetor.size();++posNum){
+ vetor[posNum].resize(dim2);
+ for (int n=0;n<vetor[posNum].size();++n){
+ resizeMatrix(vetor[posNum][n],dim3,dim4);
+ }
+ }
+}
+/********************************************************************************************
+*********************************************************************************************/
+void resize_VVV(int dim1, int dim2, int dim3, VVVdouble& vetor){
+ vetor.resize(dim1);
+ for (int n=0;n<vetor.size();++n){
+ resizeMatrix(vetor[n],dim2,dim3);
+ }
+}
+
+
+
diff --git a/libs/phylogeny/matrixUtils.h b/libs/phylogeny/matrixUtils.h
new file mode 100644
index 0000000..0eb7174
--- /dev/null
+++ b/libs/phylogeny/matrixUtils.h
@@ -0,0 +1,148 @@
+#ifndef ___MATRIX_UTIL_H
+#define ___MATRIX_UTIL_H
+
+#include "definitions.h"
+#include "logFile.h"
+#include "errorMsg.h"
+#include <string>
+#include <vector>
+#include <fstream>
+#include <iostream>
+
+class sequenceContainer;
+using namespace std;
+
+
+
+void printMatrix(const VVdouble &mat, ostream &out);
+void printMatrix(const VVint &mat, ostream &out) ;
+
+void readMatrixFromFile(VVdouble &mat,string fileName);
+
+Vdouble getDiagonalFromMatrix(VVdouble &mat);
+Vdouble getSubDiagonalFromMatrix(VVdouble &mat);
+
+//get the first norm sum{abs(Mij)}
+MDOUBLE getMatrixNorm(const VVdouble &mat);
+
+template<typename _T>
+void resizeMatrix(vector<vector< _T> > &mat, int rows, int columns){
+ mat.resize(rows);
+ for (int i=0; i<rows;i++){
+ mat[i].resize(columns);
+ for (int j=0;j<columns;j++){ // initializing all values as zero
+ mat[i][j] = 0.0;
+ }
+ }
+}
+
+template<typename _T>
+void unitMatrix(vector<vector< _T> > &m, int n){
+ resizeMatrix(m,n,n);
+ for (int i=0; i<n; i++){
+ for (int j=0; j<n;j++){
+ if (i==j) m[i][j]=1;
+ else m[i][j]=0;
+ }
+ }
+}
+
+template<typename _T>
+void zeroMatrix(vector<vector< _T> > &m){
+ for (int i=0; i < m.size(); i++)
+ for (int j=0; j<m[i].size();j++)
+ m[i][j]=0;
+}
+
+template<typename _T>
+void oneMatrix(vector<vector< _T> > &m){
+ for (int i=0; i < m.size(); i++)
+ for (int j=0; j<m[i].size();j++)
+ m[i][j]=1;
+}
+
+
+//assumes that #columns in mat1=#rows in mat2
+template<typename _T>
+vector<vector< _T> > multiplyMatrixes(vector<vector< _T> > &mat1, vector<vector< _T> > &mat2){
+ vector<vector< _T> > mat;
+ if ((mat1.size()==0) || (mat2.size() ==0))
+ errorMsg::reportError("Error in multiplyMatrixes, one of the matrices inputted is of size 0");;
+ int numColumns=mat1[0].size();
+ int numRows = mat2.size();
+ resizeMatrix(mat,numColumns,numRows);
+ for (int i=0; i<numColumns; i++){
+ for (int j=0; j<numRows;j++){
+ for (int k=0;k<numColumns;k++){
+ mat[i][j]+=mat1[i][k]*mat2[k][j];
+ }
+ }
+ }
+ return mat;
+}
+
+template<typename _T>
+vector<vector< _T> > multiplyMatrixByScalar(const vector<vector< _T> > &mat, MDOUBLE scalar) {
+ vector<vector< _T> > mat_copy = mat;
+ for (int i=0; i<mat.size(); i++){
+ for (int j=0; j<mat[i].size();j++){
+ mat_copy[i][j]*=scalar;
+ }
+ }
+ return mat_copy;
+}
+
+template<typename _T>
+vector<vector< _T> > add(const vector<vector< _T> > &mat1,const vector<vector< _T> > &mat2){
+ if (mat1.size()!=mat2.size()) errorMsg::reportError("different sized matrices in matrixUtils::add");
+ vector<vector< _T> > newMat(mat1.size());
+ for (int i=0;i<mat1.size();i++){
+ if (mat1[i].size()!=mat2[i].size()) errorMsg::reportError("different sized matrices in matrixUtils::add");
+ newMat[i].resize(mat1[i].size());
+ for (int j=0;j<mat1.size();j++){
+ newMat[i][j]=mat1[i][j]+mat2[i][j];
+ }
+ }
+ return newMat;
+}
+
+template<typename _T>
+void printVec(vector< _T> &vec,ostream &out=cout,bool printVertical=true) {
+ for (int i=0; i<vec.size();i++){
+ out<< vec[i];
+ out<<(printVertical?"\n":" ");
+ }
+ out<<endl;
+}
+
+
+
+VVdouble transpose(const VVdouble &mat);
+VVdouble subtract(const VVdouble &mat1,const VVdouble &mat2);
+VVdouble reverseSign(const VVdouble &mat1);
+
+void findMaxInVector(const Vdouble &vec, MDOUBLE &maxValue, int &argmax) ;
+void findMinInVector(const Vdouble &vec, MDOUBLE &minValue, int &argmin) ;
+MDOUBLE averageElementInVector(const Vdouble &vec) ;
+void appendBinaryVectors(vector <int> &vec1, const vector <int> &vec2);
+void appendVectors(Vint &vec1, const Vint &vec2);
+Vint complementBinaryVec(vector <int>&bufferVec) ; // returns complementary binary vector
+void readDoubleVecFromFile(Vdouble &vec,string fileName); //reads a vertical vector (separated by \n)
+
+void normalize(Vdouble &vec);
+void scaleByAverage(Vdouble &vec);
+
+
+//solve nxn linear equations of the form Ax=b; return x;
+Vdouble solveLinearEquations(VVdouble A,Vdouble b);
+// functions from numerical recipes that solve nxn linear equations
+void lubksb(VVdouble &a, Vdouble &indx, Vdouble &b);
+void ludcmp(VVdouble &a, Vdouble &indx, MDOUBLE &d);
+
+void resize_VVVV(int dim1, int dim2, int dim3, int dim4, VVVVdouble& vetor);
+void resize_VVV(int dim1, int dim2, int dim3, VVVdouble& vetor);
+
+
+
+
+#endif
diff --git a/libs/phylogeny/mixtureDistribution.cpp b/libs/phylogeny/mixtureDistribution.cpp
new file mode 100644
index 0000000..ab29188
--- /dev/null
+++ b/libs/phylogeny/mixtureDistribution.cpp
@@ -0,0 +1,311 @@
+#include "mixtureDistribution.h"
+#include "generalGammaDistributionLaguerre.h"
+#include "talRandom.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+
+#include <cmath>
+
+
+mixtureDistribution::mixtureDistribution(const vector<generalGammaDistribution*>& components, const Vdouble& componentsProb, quadratureType gammaType)
+{
+ if (components.size() < 1)
+ errorMsg::reportError("the number of Gamma components must be positive");
+
+ _components.clear();
+ for (int i = 0; i < components.size(); ++i)
+ {
+ generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(components[i]->clone());
+ _components.push_back(comp);
+ }
+
+ _globalRate = 1.0;
+ setComponentsProb(componentsProb);
+}
+
+
+//init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned "randomly"
+mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType/*=LAGUERRE*/, MDOUBLE maxAlpha/*=5.0*/, MDOUBLE maxBeta/*=5.0*/)
+{
+ if (componentsNum < 1)
+ errorMsg::reportError("the number of Gamma components must be positive");
+
+ _components.clear();
+ Vdouble componentsProb(componentsNum, 0);
+ for (int i = 0; i < componentsNum; ++i)
+ {
+ MDOUBLE alpha = talRandom::giveRandomNumberBetweenZeroAndEntry(maxAlpha);
+ MDOUBLE beta = talRandom::giveRandomNumberBetweenZeroAndEntry(maxBeta);
+ componentsProb[i] = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
+ generalGammaDistribution* pComp;
+ switch (gammaType)
+ {
+ case LAGUERRE:
+ pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent);
+ break;
+ case QUANTILE:
+ pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent);
+ break;
+ default:
+ errorMsg::reportError("unknown quadrature type in mixtureDistribution");
+ }
+ _components.push_back(pComp);
+ }
+
+ scaleVec(componentsProb, 1.0/componentsNum);
+ setComponentsProb(componentsProb);
+ _globalRate = 1.0;
+}
+//init the mixture with componentsNum components - the alpha, beta, and probability for each component is assigned with given values
+mixtureDistribution::mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType/*=LAGUERRE*/, MDOUBLE maxAlpha/*=5.0*/, MDOUBLE maxBeta/*=5.0*/)
+{
+ if (componentsNum < 1)
+ errorMsg::reportError("the number of Gamma components must be positive");
+
+ _components.clear();
+ Vdouble componentsProb(componentsNum, 0);
+ for (int i = 0; i < componentsNum; ++i)
+ {
+ MDOUBLE alpha = AlphaInit[i];
+ MDOUBLE beta = BetaInit[i];
+ componentsProb[i] = componentProbInit[i];
+ generalGammaDistribution* pComp;
+ switch (gammaType)
+ {
+ case LAGUERRE:
+ pComp = new generalGammaDistributionLaguerre(alpha, beta, categoriesNumInComponent);
+ break;
+ case QUANTILE:
+ pComp = new generalGammaDistribution(alpha, beta, categoriesNumInComponent);
+ break;
+ default:
+ errorMsg::reportError("unknown quadrature type in mixtureDistribution");
+ }
+ _components.push_back(pComp);
+ }
+
+ scaleVec(componentsProb, 1.0/componentsNum);
+ setComponentsProb(componentsProb);
+ _globalRate = 1.0;
+}
+
+mixtureDistribution::mixtureDistribution(const mixtureDistribution& other)
+: _componentsWeight(other._componentsWeight),
+ _globalRate(other._globalRate),
+ _totalWeight(other._totalWeight)
+{
+ _components.clear();
+ for (int i = 0; i < other.getComponentsNum(); ++i)
+ {
+ generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(other._components[i]->clone());
+ _components.push_back(comp);
+ }
+}
+
+
+mixtureDistribution& mixtureDistribution::operator=(const mixtureDistribution &otherDist)
+{
+ _globalRate = otherDist._globalRate;
+ _componentsWeight = otherDist._componentsWeight;
+ _totalWeight = otherDist._totalWeight;
+ if (this != &otherDist) // Check for self-assignment
+ {
+ for (int i = 0; i < getComponentsNum(); ++i)
+ {
+ if (_components[i] != NULL)
+ {
+ generalGammaDistribution* pComp = static_cast<generalGammaDistribution*>(otherDist.getComponent(i)->clone());
+ delete _components[i];
+ _components[i] = pComp;;
+ }
+ }
+ }
+ return *this;
+}
+
+
+void mixtureDistribution::clear()
+{
+ for (int i = 0; i < getComponentsNum(); ++i)
+ {
+ if (_components[i] != NULL)
+ {
+ delete _components[i];
+ _components[i] = NULL;
+ }
+ }
+ _components.clear();
+}
+
+
+mixtureDistribution::~mixtureDistribution()
+{
+ clear();
+}
+
+const int mixtureDistribution::categories() const
+{
+ int res = 0;
+ for (int i = 0; i < getComponentsNum(); ++i)
+ {
+ res += _components[i]->categories();
+ }
+ return res;
+}
+
+void mixtureDistribution::setComponentsProb(const Vdouble& componentsProb)
+{
+ if (getComponentsNum() != componentsProb.size())
+ errorMsg::reportError("the number of Gamma components is not the same as the number of probabilities");
+ _totalWeight = 0.0;
+ for (int i = 0; i < componentsProb.size(); ++i)
+ _totalWeight += componentsProb[i];
+ if (!DEQUAL(_totalWeight, 1.0))
+ errorMsg::reportError("the sum of components probabilities must sum to 1.0");
+ _componentsWeight = componentsProb;
+}
+
+
+void mixtureDistribution::change_number_of_categoriesPerComp(int in_number_of_categories)
+{
+ for (int i = 0; i <getComponentsNum(); ++i)
+ _components[i]->change_number_of_categories(in_number_of_categories);
+}
+
+//change_number_of_components: if the newCompNum is getComponentsNum()-1
+//then duplicate one of the components and adjust the probabilities
+void mixtureDistribution::change_number_of_components(const int in_number_of_components)
+{
+ if (getComponentsNum() == in_number_of_components)
+ return;
+ else if (getComponentsNum() == in_number_of_components - 1)
+ {
+ //duplicate the first component
+ normalizeProbabilities();
+ generalGammaDistribution* comp = static_cast<generalGammaDistribution*>(_components[0]->clone());
+ _components.push_back(comp);
+ //adjust the components probabilities so that the probs of the
+ //two identical components (i.e., 0 and the new Comp) are equal
+ _componentsWeight[0] /= 2;
+ _componentsWeight.push_back(_componentsWeight[0]);
+ normalizeProbabilities();
+ }
+ else
+ errorMsg::reportError("cannot change the number of components in mixtureDistribution::change_number_of_components()");
+}
+
+
+const MDOUBLE mixtureDistribution::getCumulativeProb(const MDOUBLE x) const
+{
+ MDOUBLE res = 0.0;
+ for (int i = 0; i < getComponentsNum(); ++i)
+ res += _components[i]->getCumulativeProb(x) * getComponentProb(i);
+ return res;
+}
+
+const MDOUBLE mixtureDistribution::rates(const int category) const
+{
+ if (category > categories() - 1)
+ errorMsg::reportError("the required category does not exist!");
+ int componentNum, categoryInComponent, totalCat = 0;
+ for (int i = 0; i < getComponentsNum(); ++i)
+ {
+ if (category < _components[i]->categories() + totalCat)
+ {
+ componentNum = i;
+ categoryInComponent = category - totalCat;
+ break;
+ }
+ totalCat += _components[i]->categories();
+ }
+ return _components[componentNum]->rates(categoryInComponent) * _globalRate;
+}
+
+const MDOUBLE mixtureDistribution::ratesProb(const int category) const
+{
+ if (category > categories() - 1)
+ errorMsg::reportError("there required category does not exist!");
+ int componentNum, categoryInComponent, totalCat = 0;
+ for (int i = 0; i < getComponentsNum(); ++i)
+ {
+ if (category < _components[i]->categories() + totalCat)
+ {
+ componentNum = i;
+ categoryInComponent = category - totalCat;
+ break;
+ }
+ totalCat += _components[i]->categories();
+ }
+ return getComponentProb(componentNum) * _components[componentNum]->ratesProb(categoryInComponent);
+}
+
+
+void mixtureDistribution::setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb)
+{
+ if (alphaVec.size() != getComponentsNum())
+ errorMsg::reportError("the size of the alphas vector is not identical to the number of components");
+ if (betaVec.size() != getComponentsNum())
+ errorMsg::reportError("the size of the batas vector is not identical to the number of components");
+ if (componentsProb.size() != getComponentsNum())
+ errorMsg::reportError("the size of the components probabilities vector is not identical to the number of components");
+
+ setComponentsProb(componentsProb);
+ int categoriesInComponent = _components[0]->categories();
+ for (int i = 0; i < getComponentsNum(); ++i)
+ _components[i]->setGammaParameters(categoriesInComponent, alphaVec[i], betaVec[i]);
+}
+
+//the following functions set the components probabilities.
+//Note, that the new prob is not inWeight, but is scaled so that the total probabilities are 1.0
+void mixtureDistribution::setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight/*=0.01*/)
+{
+ if((inWeight<0.0) || (inWeight>1.0)){
+ errorMsg::reportError("the probability assignment is not [0,1]");
+ }
+ if (inWeight < minWeight)
+ inWeight = minWeight;
+ MDOUBLE otherProbs = 1-inWeight;
+ Vdouble probs(getComponentsNum(), 0.0);
+ MDOUBLE sumOther = 0.0;
+ int i;
+ for (i = 0; i < getComponentsNum(); ++i)
+ {
+ if (i != componentNum)
+ sumOther += _componentsWeight[i];
+ }
+ MDOUBLE factor = otherProbs / sumOther;
+ for (i = 0; i < getComponentsNum(); ++i)
+ {
+ probs[i] = _componentsWeight[i] * factor ;
+ }
+ probs[componentNum] = inWeight;
+ setComponentsProb(probs);
+
+ //_totalWeight -= _componentsWeight[componentNum];
+ // _componentsWeight[componentNum] = inWeight;
+ //_totalWeight += _componentsWeight[componentNum];
+}
+
+//scale the components weights so that they sum to 1.0.
+void mixtureDistribution::normalizeProbabilities()
+{
+ if (_componentsWeight.size() != getComponentsNum())
+ errorMsg::reportError("problem in mixtureDistribution::normalizeProbabilities()");
+ int i;
+ for(i = 0; i < getComponentsNum(); ++i)
+ {
+ _componentsWeight[i] /= _totalWeight;
+ }
+ _totalWeight = 1.0;
+}
+
+void mixtureDistribution::printParams(ostream& outF)
+{
+ MDOUBLE avgRate = 0.0;
+ for (int k = 0; k < getComponentsNum(); ++k)
+ {
+ outF << "comp="<<k<<" Alp/Beta= "<<getAlpha(k)/getBeta(k)<<" alpha= "<<getAlpha(k) << " beta= " <<getBeta(k)<<" Prob= "<<getComponentProb(k)<<endl;
+ avgRate += (getAlpha(k) / getBeta(k)) * getComponentProb(k);
+ }
+ outF<<"# The prior average rate is: " <<avgRate<<endl;
+}
\ No newline at end of file
diff --git a/libs/phylogeny/mixtureDistribution.h b/libs/phylogeny/mixtureDistribution.h
new file mode 100644
index 0000000..1de0a42
--- /dev/null
+++ b/libs/phylogeny/mixtureDistribution.h
@@ -0,0 +1,67 @@
+#ifndef ___MIXTURE_DIST
+#define ___MIXTURE_DIST
+/************************************************************
+The mixture distribution is combined of several gamma distributions (components).
+Each one of the gamma component has its own probability of occurance = Hi,
+such that the sum of Hi equals 1.0.
+The categories probabilities are the probability of each component multiply by the category probabilty in the component.
+In case the Laguerre option is on:
+the actuall number of cateories (per component) can be lower than the requested number of categories.
+************************************************************/
+#include "definitions.h"
+#include "generalGammaDistribution.h"
+
+class mixtureDistribution : public distribution {
+public:
+ explicit mixtureDistribution(const vector<generalGammaDistribution*>& components, const Vdouble& componentsProb, quadratureType gammaType);
+ explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent, quadratureType gammaType = LAGUERRE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0);
+ explicit mixtureDistribution(int componentsNum, int categoriesNumInComponent,Vdouble AlphaInit ,Vdouble BetaInit, Vdouble componentProbInit ,quadratureType gammaType = QUANTILE, MDOUBLE maxAlpha = 15.0, MDOUBLE maxBeta = 15.0);
+
+ mixtureDistribution(const mixtureDistribution& other);
+
+ mixtureDistribution& operator=(const mixtureDistribution &otherDist);
+ virtual distribution* clone() const { return new mixtureDistribution(*this); }
+ virtual ~mixtureDistribution();
+
+ //get+set the parameters of the mixture
+ void setMixtureParameters(const Vdouble& alphaVec, const Vdouble& betaVec, const Vdouble& componentsProb);
+ const generalGammaDistribution* getComponent(int componentNum) const {return _components[componentNum];}
+ const int getComponentsNum() const {return _components.size();}
+ const int categories() const;
+ //change_number_of_categoriesPerComp: change the number of categorites for each component. The total number of categories will be (in_number_of_categories*componentNum)
+ void change_number_of_categoriesPerComp(int in_number_of_categories);
+ void change_number_of_components(const int in_number_of_components);
+ const int categoriesForOneComponent() const {return _components[0]->categories();}
+ MDOUBLE getAlpha(int componentNum) const {return _components[componentNum]->getAlpha();}
+ void setAlpha(MDOUBLE newAlpha, int componentNum) {_components[componentNum]->setAlpha(newAlpha);}
+ MDOUBLE getBeta(int componentNum) const {return _components[componentNum]->getBeta();}
+ void setBeta(MDOUBLE newBeta, int componentNum) {_components[componentNum]->setBeta(newBeta);}
+ void setGammaParameters(int numOfCategories ,MDOUBLE alpha, MDOUBLE beta, int componentNum) {_components[componentNum]->setGammaParameters(numOfCategories ,alpha, beta);}
+ const MDOUBLE getComponentProb(int componentNum) const {return _componentsWeight[componentNum] / _totalWeight;}
+ void setComponentsProb(const Vdouble& componentsProb);
+ void setGlobalRate(const MDOUBLE r) {_globalRate = r;}
+ MDOUBLE getGlobalRate() const {return _globalRate;}
+
+ //the following function set the components weights.
+ //Note that the new component prob is not inWeight, but is scaled so that the total probabilities are 1.0
+ void setComponentWeight(MDOUBLE inWeight, const int componentNum, const MDOUBLE minWeight =0.01);
+ const MDOUBLE getComponentWeight(int componentNum) const {return _componentsWeight[componentNum];}
+ //scale the components weights so that they sum to 1.0.
+ void normalizeProbabilities();
+
+ //get distribution statistics
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ virtual const MDOUBLE rates(const int category) const;
+ virtual const MDOUBLE ratesProb(const int i) const;
+
+ void printParams(ostream& outF );
+
+private:
+ void clear();
+private:
+ vector<generalGammaDistribution*> _components;
+ Vdouble _componentsWeight;
+ MDOUBLE _globalRate;
+ MDOUBLE _totalWeight; //holds the sum of the components probabilities. This is saved so that we don't need to sum all weight each time getProb() is called
+};
+#endif
diff --git a/libs/phylogeny/molphyFormat.cpp b/libs/phylogeny/molphyFormat.cpp
new file mode 100644
index 0000000..8956ae2
--- /dev/null
+++ b/libs/phylogeny/molphyFormat.cpp
@@ -0,0 +1,85 @@
+// $Id: molphyFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "molphyFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+
+sequenceContainer molphyFormat::read(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+sequenceContainer molphyFormat::readUnAligned(istream &infile, const alphabet* alph) {
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+ if (seqFileData.empty()){
+ errorMsg::reportError("unable to open file, or file is empty in molphy format");
+ }
+ vector<string>::iterator currentLinePosition = seqFileData.begin();
+
+ string::const_iterator itStr = seqFileData.begin()->begin();
+ string::const_iterator itStrEnd = seqFileData.begin()->end();
+
+ int f_numSeq;
+ bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
+ if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading MOLPHY sequence format");
+ int f_seqLength;
+ bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
+ if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading MOLPHY sequence format");
+ currentLinePosition++; // we read the first line.
+
+//---------------------------------------------------------------------
+ sequenceContainer mySeqData;
+
+//---------------------------------------------------------------------
+// vector<sequenceContainer::sequenceDatum*> vec;
+// seqDataPtr->getSequenceDatumPtrVectorNonConst(vec);
+
+ int localID=-1;
+
+ vector<string>::const_iterator it1 = seqFileData.begin();
+ ++it1; //skipping the first line that was read already.
+ while (it1!= seqFileData.end()) {
+ localID++;
+ if (it1->empty()) {
+ it1++;
+ continue; // empty line continue
+ }
+ // read the name.
+ string name(*it1);
+ it1++;
+
+ string tmpString;
+ while (it1 != seqFileData.end()) {
+ if (tmpString.size() < f_seqLength) {
+ tmpString+=*it1;
+ ++it1;
+ }
+ else break;
+ }
+
+ mySeqData.add(sequence(tmpString,name,"",localID,alph));
+
+ }
+ return mySeqData;
+}
+
+
+
+
+void molphyFormat::write(ostream &out, const sequenceContainer& sd) {
+ out<<sd.numberOfSeqs()<<" "<<sd.seqLen()<<endl;
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+ out<<it5->name()<<endl;
+ string seqString = it5->toString();
+ int k=0;
+ for (string::const_iterator cPos=seqString.begin() ; cPos != seqString.end() ; cPos ++,k++ ) {
+ if (k>0 && ((k%60)==0)) out<<endl;
+ out<<*cPos;
+ }
+ out<<endl;
+ }
+}
+
+
+
diff --git a/libs/phylogeny/molphyFormat.h b/libs/phylogeny/molphyFormat.h
new file mode 100644
index 0000000..a2b4897
--- /dev/null
+++ b/libs/phylogeny/molphyFormat.h
@@ -0,0 +1,47 @@
+// $Id: molphyFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___MOLPHY_FORMAT
+#define ___MOLPHY_FORMAT
+
+#include "sequenceContainer.h"
+
+class molphyFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF MOLPHY FORMAT:
+
+6 128
+Langur
+KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQIN
+SRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVS
+QYVKGCGV
+Baboon
+KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQIN
+SHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVS
+QYVQGCGV
+Human
+KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQIN
+SRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVR
+QYVQGCGV
+Rat
+KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQIN
+SRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLS
+GYIRNCGV
+Cow
+KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQIN
+SKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVS
+SYVEGCTL
+Horse
+KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLN
+NKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLS
+EYLASCNL
+
+*/
+
diff --git a/libs/phylogeny/mtREV24.dat.q b/libs/phylogeny/mtREV24.dat.q
new file mode 100644
index 0000000..c4653f8
--- /dev/null
+++ b/libs/phylogeny/mtREV24.dat.q
@@ -0,0 +1,35 @@
+" "
+" 23.18 "
+" 26.95 13.24 "
+" 17.67 1.90 794.38 "
+" 59.93 103.33 58.94 1.90 "
+" 1.90 220.99 173.56 55.28 75.24 "
+" 9.77 1.90 63.05 583.55 1.90 313.56 "
+" 120.71 23.03 53.30 56.77 30.71 6.75 28.28 "
+" 13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90 "
+" 96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26 "
+" 25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09 "
+" 8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88 "
+" 141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37 "
+" 6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82 "
+" 54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31 "
+" 387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90 "
+" 480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21 "
+" 1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99 "
+" 6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25 "
+" 195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90 "
+" 0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169 "
+" 0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043 "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" S_ij = S_ji and PI_i for the mtREV24 model (Adachi and Hasegawa 1996). "
+" The PI's used to sum to 0.999 and I changed one of the freq from 0.168 "
+" into 0.169 so that the sum is 1. Prepared by Z. Yang according to "
+" data sent by Dr M. Hasegawa. This matrix was obtained from the 12 "
+" mitochondrial proteins encoded by the same strand of the DNA from a "
+" diverse range of species including bird, fish, frog, lamprey, as well "
+" as mammals (see Adachi and Hasegawa 1996 for details). The other "
+" matrix (mtmam.dat) included in the package is based on the same "
+" proteins from mammals only. "
+" Adachi, J. and Hasegawa, M. (1996) MOLPHY version 2.3: programs for "
+" molecular phylogenetics based on maximum likelihood. Computer Science "
+" Monographs of Institute of Statistical Mathematics 28:1-150. "
diff --git a/libs/phylogeny/mulAlphabet.cpp b/libs/phylogeny/mulAlphabet.cpp
new file mode 100644
index 0000000..bfdfd1b
--- /dev/null
+++ b/libs/phylogeny/mulAlphabet.cpp
@@ -0,0 +1,175 @@
+// $Id: mulAlphabet.cpp 1927 2007-04-04 16:44:23Z privmane $
+
+#include "mulAlphabet.h"
+#include "distribution.h"
+#include "errorMsg.h"
+#include <iostream>
+#include "logFile.h"
+
+
+mulAlphabet::mulAlphabet(const alphabet* baseAlphabet, int mulFactor) :
+_baseAlphabet(baseAlphabet->clone()),
+_mulFactor(mulFactor),
+_size(baseAlphabet->size() * mulFactor)
+{}
+
+mulAlphabet::mulAlphabet(const mulAlphabet& other) :
+_baseAlphabet(other._baseAlphabet->clone()),
+_mulFactor(other._mulFactor),
+_size(other._size)
+{}
+
+mulAlphabet::~mulAlphabet()
+{
+ if (_baseAlphabet) delete (_baseAlphabet);
+}
+
+mulAlphabet& mulAlphabet::operator=(const mulAlphabet &other)
+{
+ if (_baseAlphabet) delete (_baseAlphabet);
+ _baseAlphabet = other._baseAlphabet->clone();
+ _mulFactor = other._mulFactor;
+ _size = other._size;
+ return (*this);
+}
+
+int mulAlphabet::unknown() const
+{
+ return (convertFromBasedAlphaInt(_baseAlphabet->unknown()));
+}
+
+int mulAlphabet::gap() const
+{
+ return (convertFromBasedAlphaInt(_baseAlphabet->gap()));
+}
+
+int mulAlphabet::stringSize() const
+{
+ return _baseAlphabet->stringSize();
+}
+
+bool mulAlphabet::isSpecific(const int id) const
+{
+ if (id >= _size)
+ return false;
+ else
+ return _baseAlphabet->isSpecific(convertToBasedAlphaInt(id));
+}
+
+/* The first _size characters should be first. The rest of the characters aren't multiplied.
+For example, when using nucleotides as the based alphabet and _mulFactor = 2 :
+0 A0
+1 C0
+2 G0
+3 T0
+4 A1
+5 C1
+6 G1
+7 T1
+8 A
+9 C
+10 G
+11 T
+12 U
+13 R
+14 Y
+15 K
+16 M
+17 S
+18 W
+19 B
+20 D
+21 H
+22 V
+23 N
+-1 -
+*/
+
+string mulAlphabet::fromInt(const int id) const
+{
+ // category and categoryName are for debug purpose
+ int category(_mulFactor);
+ if (id>=0)
+ category = min(id / _baseAlphabet->size() , _mulFactor) ;
+ string categoryName("");
+ categoryName = int2string(category);
+ int inCategoryId = convertToBasedAlphaInt(id);
+ return (_baseAlphabet->fromInt(inCategoryId) + categoryName);
+}
+
+int mulAlphabet::convertFromBasedAlphaInt(int id) const
+{
+ if (id < 0)
+ return (id);
+
+ return (id + _size);
+}
+
+int mulAlphabet::fromChar(const string& str, const int pos) const
+{
+ int id = _baseAlphabet->fromChar(str,pos);
+ return (convertFromBasedAlphaInt(id));
+}
+
+
+vector<int> mulAlphabet::fromString(const string &str) const
+{
+ vector<int> result = _baseAlphabet->fromString(str);
+ vector<int>::iterator itr = result.begin();
+ for (; itr != result.end(); ++itr)
+ *itr = convertFromBasedAlphaInt(*itr);
+
+ return (result);
+}
+
+
+int mulAlphabet::convertToBasedAlphaInt(int id) const
+{
+ if (id<0)
+ return (id);
+ if (id >= _size)
+ return (id - _size);
+
+ return (id % _baseAlphabet->size());
+}
+
+
+
+int mulAlphabet::relations(const int charInSeq, const int charToCheck) const
+{
+ int baseAlphabetSize = _baseAlphabet->size();
+ int categoryInSeq(_mulFactor);
+ if (charInSeq>=0)
+ categoryInSeq = min(charInSeq/baseAlphabetSize , _mulFactor);
+
+ int categoryToCheck(_mulFactor);
+ if (charToCheck>=0)
+ categoryToCheck = min(charToCheck/baseAlphabetSize , _mulFactor);
+
+ if (categoryToCheck == _mulFactor)
+ LOG(4,<<"mulAlphabet::relations charToCheck should belong to category < _mulFactor = " << _mulFactor << endl);
+
+ if ((categoryInSeq == categoryToCheck) || (categoryInSeq == _mulFactor))
+ return _baseAlphabet->relations(convertToBasedAlphaInt(charInSeq),convertToBasedAlphaInt(charToCheck));
+
+ return 0;
+}
+
+
+int mulAlphabet::compareCategories(int charA, int charB) const
+{
+ int baseAlphabetSize = _baseAlphabet->size();
+ int categoryA(_mulFactor);
+ if (categoryA>=0)
+ categoryA = min(charA/baseAlphabetSize,_mulFactor);
+
+ int categoryB(_mulFactor);
+ if (categoryB>=0)
+ categoryB = min(charB/baseAlphabetSize,_mulFactor);
+
+ if (categoryA<categoryB)
+ return 1;
+ else if (categoryB<categoryA)
+ return -1;
+ return (0);
+}
diff --git a/libs/phylogeny/mulAlphabet.h b/libs/phylogeny/mulAlphabet.h
new file mode 100644
index 0000000..3f98d02
--- /dev/null
+++ b/libs/phylogeny/mulAlphabet.h
@@ -0,0 +1,51 @@
+// $Id: mulAlphabet.h 1901 2007-03-15 13:21:06Z nimrodru $
+
+// version 1.01
+// last modified 1 Jan 2004
+
+#ifndef ___MUL_ALPHABET_H
+#define ___MUL_ALPHABET_H
+
+#include "definitions.h"
+#include "alphabet.h"
+#include "someUtil.h"
+
+class mulAlphabet : public alphabet {
+
+public:
+ mulAlphabet(const alphabet* baseAlphabet, int mulFactor);
+ mulAlphabet(const mulAlphabet& other);
+ virtual ~mulAlphabet();
+ virtual alphabet* clone() const { return new mulAlphabet(*this); }
+ mulAlphabet& operator=(const mulAlphabet &other);
+
+ int unknown() const ;
+ int gap() const;
+
+ int size() const {return _size;}
+ int stringSize() const ;
+ bool isSpecific(const int id) const ;
+
+ int fromChar(const string& str, const int pos) const;
+ vector<int> fromString(const string& str) const;
+
+ string fromInt(const int id) const;
+
+ int relations(const int charInSeq, const int charToCheck) const;
+ int compareCategories(int charA, int charB) const;
+ const alphabet* getBaseAlphabet() const {return _baseAlphabet;}
+
+public:
+ int convertFromBasedAlphaInt(int id) const;
+ int convertToBasedAlphaInt(int id) const;
+
+private:
+ alphabet* _baseAlphabet; // This alphabet must use single characters, i.e. - not codon. (or we will have to add to every alphabet a member which holds its character's size)
+ int _mulFactor ; // number of times that the alphabet is multiplied by = Number of categories (g in Galtier paper)
+ int _size ; // this is simply the _baseAlphabet->size() * _mulFactor
+
+
+};
+
+#endif
+
diff --git a/libs/phylogeny/multipleStochasticProcess.cpp b/libs/phylogeny/multipleStochasticProcess.cpp
new file mode 100644
index 0000000..7a9a0ed
--- /dev/null
+++ b/libs/phylogeny/multipleStochasticProcess.cpp
@@ -0,0 +1,38 @@
+#include "multipleStochasticProcess.h"
+#include "errorMsg.h"
+
+multipleStochasticProcess::multipleStochasticProcess()
+{
+}
+
+
+multipleStochasticProcess::~multipleStochasticProcess()
+{
+}
+
+
+void multipleStochasticProcess::copy(const multipleStochasticProcess *pOther)
+{
+ _spVec = pOther->_spVec;
+ _spProb = pOther->_spProb;
+}
+
+
+MDOUBLE multipleStochasticProcess::getProb(int spPlace) const {
+ if (spPlace >= _spProb.size())
+ errorMsg::reportError("error in multipleStochasticProcess::getProb");
+ return _spProb[spPlace];
+}
+
+stochasticProcess* multipleStochasticProcess::getSp(int spPlace) {
+ if (spPlace >= _spVec.size())
+ errorMsg::reportError("error in multipleStochasticProcess::getSp");
+ return &_spVec[spPlace];
+}
+
+void multipleStochasticProcess::setSpVec(vector<stochasticProcess>& spVec)
+{
+ _spVec.clear();
+ _spVec = spVec;
+}
+
diff --git a/libs/phylogeny/multipleStochasticProcess.h b/libs/phylogeny/multipleStochasticProcess.h
new file mode 100644
index 0000000..c644b4e
--- /dev/null
+++ b/libs/phylogeny/multipleStochasticProcess.h
@@ -0,0 +1,23 @@
+#ifndef _MULTIPLE_STOCHASTIC_PROCESS
+#define _MULTIPLE_STOCHASTIC_PROCESS
+
+#include "stochasticProcess.h"
+
+
+class multipleStochasticProcess {
+public:
+ multipleStochasticProcess();
+ virtual ~multipleStochasticProcess();
+ virtual MDOUBLE getProb(int spPlace) const;
+ virtual stochasticProcess* getSp(int spPlace);
+ virtual int getSPVecSize() const {return _spVec.size();}
+ virtual void setSpVec(vector<stochasticProcess>& spVec);
+
+
+protected:
+ virtual void copy(const multipleStochasticProcess * pOther);
+protected:
+ vector<stochasticProcess> _spVec;
+ Vdouble _spProb;
+};
+#endif
diff --git a/libs/phylogeny/nexusFormat.cpp b/libs/phylogeny/nexusFormat.cpp
new file mode 100644
index 0000000..b9bb166
--- /dev/null
+++ b/libs/phylogeny/nexusFormat.cpp
@@ -0,0 +1,152 @@
+// $Id: nexusFormat.cpp 5987 2009-03-18 18:13:53Z itaymay $
+
+#include "nexusFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+#include <map>
+
+sequenceContainer nexusFormat::read(istream &infile, const alphabet* pAlph) {
+ sequenceContainer mySeqData = readUnAligned(infile, pAlph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+
+sequenceContainer nexusFormat::readUnAligned(istream &infile, const alphabet* pAlph) {
+ if (!infile) {
+ errorMsg::reportError("unable to read mase format, could not open file");
+ }
+ sequenceContainer mySeqData;;
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+
+ vector<string>::const_iterator it1 = seqFileData.begin();
+ // make sure that the first 6 chars in the first line is #NEXUS
+ if (it1->size()<6) errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1);
+ if ( ((*it1)[0] != '#')
+ || (((*it1)[1] != 'N') && ((*it1)[1] != 'n'))
+ || (((*it1)[2] != 'E') && ((*it1)[2] != 'e'))
+ || (((*it1)[3] != 'X') && ((*it1)[3] != 'x'))
+ || (((*it1)[4] != 'U') && ((*it1)[4] != 'u'))
+ || (((*it1)[5] != 'S') && ((*it1)[5] != 's')) ) {
+ errorMsg::reportError("first word in a nexus sequence file format must be #NEXUS",1);
+ }
+ it1++;
+
+ while ( ( (*it1).find("matrix") == -1) && ( (*it1).find("MATRIX") == -1) && (it1!= seqFileData.end()))
+ { //check for the word matrix
+ ++it1;
+ }
+
+ int localid=0;
+ //int x1 = ((*it1).find("matrix") != -1);
+ //int x2 = ((*it1).find("MATRIX") != -1);
+ if (((*it1).find("matrix") != -1) || ((*it1).find("MATRIX") != -1))
+ {
+ //taken from clustalFormat:
+ //In case of codon alpahabet we cannot add a seqeunce that is not dividable by 3.
+ //In this case the last nucleotides in each line (zero, one or two)
+ //should be saved. The next time the same sequence name appears -
+ //these saveed nucleotidea and are added to the begining of the line.
+ map<string ,string> stringsToAdd;
+
+
+ for (++it1; it1 != seqFileData.end() ; ++it1)
+ {
+ if (((*it1).find("end;") != -1) || ((*it1).find("END;") != -1))
+ break;
+ if (it1->empty() || ((*it1).find(';') != -1))
+ { // empty line constinue
+ continue;
+ }
+ sequence seq(pAlph);
+
+ string taxonName;
+ string remark;
+ string stringSeq;
+ bool beforeName = true;
+ string::const_iterator stringIt = (it1)->begin();
+ for (; stringIt != (it1)->end(); ++stringIt)
+ { //first loop finds the taxon name
+ if ( ((*stringIt) == ' ') || ((*stringIt) == '\t'))
+ if (beforeName == true)
+ continue; //spaces before taxon name are legal
+ else
+ break; //A space marks the end of the taxon name
+ else
+ {
+ taxonName += (*stringIt);
+ beforeName = false;
+ }
+ }
+
+ //check if a new sequence.
+ //if the name already exists then init stringSeq with the nucleotide from the previous line of the same sequence
+ if (stringsToAdd.find(taxonName)!=stringsToAdd.end())
+ stringSeq = stringsToAdd[taxonName];
+
+ for (; stringIt != (it1)->end(); ++stringIt)
+ {//second loop finds the sequecne
+ if ( ((*stringIt)==' ') || ((*stringIt) == '\t'))
+ continue;
+ else stringSeq += (*stringIt);
+ }
+
+ //when alphabet is codon stringSeq must be dividable by 3.
+ // 1. save the reminder (0,1 or 2 last nucleotides) in stringToAdd
+ // 2. substr the reminder from the sequence line.
+ // 3. keep stringToAdd in map (according the name) to be added later.
+ string stringToAdd="";
+ if (pAlph->size()>=60){ // codon?
+ if ((stringSeq.size()%3)==1){ //add the last nucleotide to the next line
+ stringToAdd += stringSeq[stringSeq.size()-1];
+ stringSeq = stringSeq.substr(0,stringSeq.size()-1);
+ }
+ if ((stringSeq.size() % 3) == 2){ //add the 2 last nucleotide to the next line
+ stringToAdd+=stringSeq[stringSeq.size()-2];
+ stringToAdd+=stringSeq[stringSeq.size()-1];
+ stringSeq = stringSeq.substr(0, stringSeq.size() - 2);
+ }
+ }
+ stringsToAdd[taxonName] = stringToAdd; //update the map with the new stringToAdd
+ //add sequence to container
+ int id = mySeqData.getId(taxonName, false);
+ if (id==-1) { // new sequence.
+ mySeqData.add(sequence(stringSeq, taxonName,remark,localid, pAlph));
+ localid++;
+ }
+ else {// the sequence is already there...
+ sequence tmp(stringSeq,taxonName, remark, id, pAlph);
+ mySeqData[id].operator += (tmp);
+ }
+ }
+ }
+ else
+ {
+ errorMsg::reportError("no sequence data in nexus file - no matrix keyword found");
+ }
+
+ return mySeqData;
+}
+
+void nexusFormat::write(ostream &out, const sequenceContainer& sc) {
+ //vector<string> gfr = sd.getGeneralFileRemarks();
+ //if (gfr.empty()) out<<";;\n;;\n";
+ //for (vector<string>::const_iterator k=gfr.begin() ; k != gfr.end() ; ++k )
+ // out<<(*k)<<endl;
+ out<<"#NEXUS"<<endl;
+ out<<"begin data;"<<endl;
+ out<<"dimensions ntax="<<sc.numberOfSeqs()<<" nchar="<<sc.seqLen() <<";"<<endl;
+ if (sc.alphabetSize() == 4)
+ out<<"format datatype=dna gap=-;"<<endl;
+ else
+ out<<"format datatype=protein gap=-;"<<endl;
+ out<<"matrix"<<endl;
+
+ for (sequenceContainer::constTaxaIterator itSeq=sc.constTaxaBegin();itSeq!=sc.constTaxaEnd();++itSeq) {
+ out<<"\t"<<itSeq->name()<<"\t"<<itSeq->toString()<<endl;
+ }
+ out<<";"<<endl;
+ out<<"end;"<<endl;
+}
+
diff --git a/libs/phylogeny/nexusFormat.h b/libs/phylogeny/nexusFormat.h
new file mode 100644
index 0000000..08987a8
--- /dev/null
+++ b/libs/phylogeny/nexusFormat.h
@@ -0,0 +1,43 @@
+// $Id: nexusFormat.h 5158 2008-11-06 17:44:08Z itaymay $
+
+#ifndef ___NEXUS_FORMAT
+#define ___NEXUS_FORMAT
+
+#include "sequenceContainer.h"
+
+class nexusFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF THE FORMAT:
+#NEXUS
+
+begin data;
+ dimensions ntax=6 nchar=128;
+ format datatype=Protein gap=-;
+ matrix
+ Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
+ Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
+ Human KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV
+ Rat KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV
+ Cow KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL
+ Baboon KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV
+ ;
+end;
+
+NOTE!!!!
+The seqeunces can also be ordered in an "interleaved" way:
+Horse KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGS
+Langur KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDES
+
+Horse SDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL
+Langur TDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV
+*/
+
+
diff --git a/libs/phylogeny/nj.cpp b/libs/phylogeny/nj.cpp
new file mode 100644
index 0000000..6809108
--- /dev/null
+++ b/libs/phylogeny/nj.cpp
@@ -0,0 +1,410 @@
+// $Id: nj.cpp 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#include "nj.h"
+#include "errorMsg.h"
+#include "logFile.h"
+#include "treeUtil.h"
+#include <cassert>
+#include <algorithm>
+#include <map>
+using namespace std;
+
+
+//------------------------------------------
+// general outline:
+// we follow Swofford's book, "Molecular Systematics" pg489.
+// currentNodes is the vector of the nodes that are "in process".
+// in the beggining, these are all the leaves. Once, 2 leaves are separeted,
+// they are excluded from currentNodes, and their father is added to currentNodes.
+// we (almost) finish the algorithm when currentNodes's size is 3. (i.e., we know the topology).
+// thus when we start from an evolutionary tree, all we do, is to construct a star (start) tree
+//------------------------------------------
+
+
+
+
+//------------------------------------------
+// constructor and start
+//------------------------------------------
+tree NJalg::computeTree(VVdouble distances,const vector<string>& names, const tree * const constriantTree /*= NULL*/){
+ assert(distances.size() == names.size());
+ tree resTree = startingTree(names);
+ if (distances.size()<3) return resTree;
+ vector<tree::nodeP> currentNodes;
+ resTree.getAllLeaves(currentNodes,resTree.getRoot());
+ if (constriantTree) {
+ njConstraint njc(resTree, *constriantTree);
+ while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances, njc);
+ } else {
+ while (currentNodes.size() >= 3) NJiterate(resTree,currentNodes,distances);
+ }
+ resTree.create_names_to_internal_nodes();
+ LOGDO(5,resTree.output(myLog::LogFile()));
+ return resTree;
+}
+
+tree NJalg::startingTree(const vector<string>& names) {
+ return starTree(names);
+}
+
+tree NJalg::startingTree(const tree& inTree) {
+ tree et;
+ et.createRootNode();
+ vector<tree::nodeP> allLeaves;
+ inTree.getAllLeaves(allLeaves,inTree.getRoot());
+
+ vector<string> names(allLeaves.size());
+ for (int k = 0 ; k < allLeaves.size(); ++k)
+ names[k]=allLeaves[k]->name();
+
+ return startingTree(names);
+}
+
+void NJalg::updateBranchDistance(const VVdouble& distanceTable,
+ const Vdouble& rValues,
+ tree::nodeP nodeNew,
+ tree::nodeP nodeI,
+ tree::nodeP nodeJ,
+ int Iplace,
+ int Jplace) {
+ MDOUBLE dis= (Iplace<Jplace) ? distanceTable[Iplace][Jplace] : distanceTable[Jplace][Iplace];
+ MDOUBLE DisI_new = dis/2.0;
+ MDOUBLE tmp = rValues[Iplace] - rValues[Jplace];
+ tmp/= ( 2.0*(distanceTable.size()-2) );
+ DisI_new = DisI_new+ tmp;
+ MDOUBLE DisJ_new = dis - DisI_new;
+ if (DisI_new<tree::SHORT_LENGTH_VALUE) DisI_new=tree::SHORT_LENGTH_VALUE; // no negative..
+ if (DisJ_new<tree::SHORT_LENGTH_VALUE) DisJ_new=tree::SHORT_LENGTH_VALUE; // no negative..
+ nodeI->setDisToFather(DisI_new);
+ nodeJ->setDisToFather(DisJ_new);
+}
+
+void NJalg::NJiterate(tree& et,
+ vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable) {
+ Vdouble rVector = calc_r_values(currentNodes,distanceTable);//CHECK2
+
+ if (currentNodes.size() == 3) {
+ update3taxaLevel(distanceTable,rVector,currentNodes);
+ currentNodes.clear();
+ return;
+ }
+
+ int minRaw,minCol;
+ calc_M_matrix(currentNodes,distanceTable,rVector,minRaw,minCol);//CHECK3
+ tree::nodeP nodeI = currentNodes[minRaw];
+ tree::nodeP nodeJ = currentNodes[minCol];
+ tree::nodeP theNewNode;
+ theNewNode= SeparateNodes(et,nodeI,nodeJ);
+ //CHECK4
+ updateBranchDistance(distanceTable,rVector,theNewNode,nodeI,nodeJ,minRaw,minCol);
+ //CHECK6
+ et.create_names_to_internal_nodes();
+ UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol);
+}
+
+void NJalg::NJiterate(tree& et,
+ vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable,
+ njConstraint& njc) {
+ Vdouble rMatrix = calc_r_values(currentNodes,distanceTable);//CHECK2
+
+ if (currentNodes.size() == 3) {
+ update3taxaLevel(distanceTable,rMatrix,currentNodes);
+ currentNodes.clear();
+ return;
+ }
+
+ int minRaw,minCol;
+ calc_M_matrix(currentNodes,distanceTable,rMatrix,minRaw,minCol, njc);//CHECK3
+ tree::nodeP nodeI = currentNodes[minRaw];
+ tree::nodeP nodeJ = currentNodes[minCol];
+ tree::nodeP theNewNode;
+ theNewNode= SeparateNodes(et,nodeI,nodeJ);
+ njc.join(nodeI, nodeJ, theNewNode);
+ //CHECK4
+ updateBranchDistance(distanceTable,rMatrix,theNewNode,nodeI,nodeJ,minRaw,minCol);
+ //CHECK6
+ et.create_names_to_internal_nodes();
+ UpdateDistanceTableAndCurrentNodes(currentNodes,distanceTable,nodeI,nodeJ,theNewNode,minRaw,minCol);
+ LOGDO(15,et.output(myLog::LogFile(),tree::ANCESTORID));
+
+}
+
+
+
+Vdouble NJalg::calc_r_values(vector<tree::nodeP>& currentNodes,
+ const VVdouble& distanceTable) {
+ Vdouble r_values(currentNodes.size(),0.0);
+ for (int i=0; i <r_values.size();++i) {
+ for (int j =0; j < r_values.size();++j) {
+ MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
+ r_values[i] += dis;
+ }
+ }
+ return r_values;
+}
+
+void NJalg::calc_M_matrix(vector<tree::nodeP>& currentNodes,
+ const VVdouble& distanceTable,
+ const Vdouble & r_values,
+ int& minRaw,int& minCol){
+ MDOUBLE min = VERYBIG;
+ for (int i=0; i < currentNodes.size();++i){
+ for (int j =i+1; j < currentNodes.size();++j) {
+ MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
+ MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
+ if (tmp<min) {minRaw = i;minCol=j;min=tmp;}
+
+ }
+ }
+}
+
+void NJalg::calc_M_matrix(vector<tree::nodeP>& currentNodes,
+ const VVdouble& distanceTable,
+ const Vdouble & r_values,
+ int& minRaw,int& minCol,
+ const njConstraint& njc){
+ MDOUBLE min = VERYBIG;
+ MDOUBLE min_noc = VERYBIG;
+ int minRaw_noc=-1,minCol_noc=-1;
+ for (int i=0; i < currentNodes.size();++i){
+ for (int j =i+1; j < currentNodes.size();++j) {
+ if (njc.isCompatible(currentNodes[i],currentNodes[j])) {
+ MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
+ MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
+ if (tmp<min) {minRaw = i;minCol=j;min=tmp;}
+ }
+ LOGDO(10,{
+ MDOUBLE dis= (i<j) ? distanceTable[i][j] : distanceTable[j][i];
+ MDOUBLE tmp = dis-(r_values[i]+r_values[j])/(currentNodes.size()-2);
+ if (tmp<min_noc) {minRaw_noc = i;minCol_noc=j;min_noc=tmp;}
+ });
+
+ }
+ }
+ LOGDO(10, {if (min_noc != min) {myLog::LogFile()
+ << "NJ-constratin changes outcome " <<
+ currentNodes[minRaw_noc]->name()<<","<<currentNodes[minCol_noc]->name() <<"-> " <<
+ currentNodes[minRaw] ->name()<<","<<currentNodes[minCol] ->name()<<
+ " ("<<min-min_noc<<")"<<endl;
+ njc.isCompatible(currentNodes[minRaw_noc], currentNodes[minCol_noc], true);
+ myLog::LogFile() << njc <<endl;
+ }
+ });
+}
+
+tree::nodeP NJalg::SeparateNodes(tree& et, tree::nodeP node1,
+ tree::nodeP node2) {
+ if (node1->father() != node2->father())
+ errorMsg::reportError(" error in function NJalg::SeparateNodes - nodes don't have the same father");
+
+ tree::nodeP fatherNode = node1->father();
+
+ tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum());
+ node1->setFather(theNewNode);
+ theNewNode->setSon(node1);
+ node2->setFather(theNewNode);
+ theNewNode->setSon(node2);
+
+ // remove from son list of father node.
+ fatherNode->removeSon(node1);
+
+ fatherNode->removeSon(node2);
+ return theNewNode;
+}
+
+void NJalg::update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values,
+ vector<tree::nodeP>& currentNodes) {
+ // update the distance of the 3 taxa that are left in the end, to the root.
+
+ MDOUBLE dis0root = distanceTable[0][1]/2+0.5*(r_values[0]-r_values[1]);
+ MDOUBLE dis1root = distanceTable[0][1]/2+0.5*(r_values[1]-r_values[0]);
+ MDOUBLE dis2root = distanceTable[0][2]/2+0.5*(r_values[2]-r_values[0]);
+ if (dis0root<tree::SHORT_LENGTH_VALUE) dis0root=tree::SHORT_LENGTH_VALUE; // no negative..
+ if (dis1root<tree::SHORT_LENGTH_VALUE) dis1root=tree::SHORT_LENGTH_VALUE; // no negative..
+ if (dis2root<tree::SHORT_LENGTH_VALUE) dis2root=tree::SHORT_LENGTH_VALUE; // no negative..
+ currentNodes[0]->setDisToFather(dis0root);
+ currentNodes[1]->setDisToFather(dis1root);
+ currentNodes[2]->setDisToFather(dis2root);
+}
+
+void NJalg::UpdateDistanceTableAndCurrentNodes(vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable,
+ tree::nodeP nodeI,
+ tree::nodeP nodeJ,
+ tree::nodeP theNewNode,
+ int Iplace,
+ int Jplace) {
+ // Iplace is the place of i in the "old" currentNodes vector
+ int i,j;
+ // updating currentNodes
+ vector<tree::nodeP> newCurrentNode= currentNodes;
+
+ vector<tree::nodeP>::iterator vec_iter1=remove(
+ newCurrentNode.begin(),newCurrentNode.end(),nodeI );
+ newCurrentNode.erase(vec_iter1,newCurrentNode.end());
+
+ vector<tree::nodeP>::iterator vec_iter2=remove(
+ newCurrentNode.begin(),newCurrentNode.end(),nodeJ );
+ newCurrentNode.erase(vec_iter2,newCurrentNode.end());
+
+ newCurrentNode.push_back(theNewNode);
+
+ map<tree::nodeP,int> nodeIntMap1;
+ for (int z=0; z<currentNodes.size();++z) {
+ nodeIntMap1.insert(map<tree::nodeP,int>::value_type(currentNodes[z],z));
+ }
+
+ VVdouble newDisTable;
+ newDisTable.resize(newCurrentNode.size());
+ for (int z1=0;z1<newDisTable.size();++z1) newDisTable[z1].resize(newCurrentNode.size(),0.0);
+
+// updatine the table
+ for (i=0; i < newCurrentNode.size(); i++) {
+ for (j=i+1; j < newCurrentNode.size() ; j++) {
+ if ((i!=newCurrentNode.size()-1) && (j!=newCurrentNode.size()-1)) {// both old nodes
+ int oldI = nodeIntMap1[newCurrentNode[i]];
+ int oldJ = nodeIntMap1[newCurrentNode[j]];
+ MDOUBLE dis= (oldI<oldJ) ? distanceTable[oldI][oldJ] : distanceTable[oldJ][oldI];
+ newDisTable[i][j] = dis;
+ } //else if (i==newCurrentNode.size()-1) { // i is new
+ // newDisTable[i][j] = (dis(Iplace,NewOldPlaces[j])+dis(Jplace,NewOldPlaces[j])-dis(Iplace,Jplace))/2.0;
+ //}
+ else if (j==newCurrentNode.size()-1) { // j is new
+ int oldI = Iplace;
+ int oldJ = Jplace;
+ int oldK = nodeIntMap1[newCurrentNode[i]];
+ MDOUBLE disIK= (oldI<oldK) ? distanceTable[oldI][oldK] : distanceTable[oldK][oldI];
+ MDOUBLE disIJ= (oldI<oldJ) ? distanceTable[oldI][oldJ] : distanceTable[oldJ][oldI];
+ MDOUBLE disJK= (oldJ<oldK) ? distanceTable[oldJ][oldK] : distanceTable[oldK][oldJ];
+ newDisTable[i][j] = 0.5*(disIK+disJK-disIJ); //EQ. 43 SWOFFORD PAGE 489.
+ }
+ }
+ }
+
+ currentNodes=newCurrentNode;
+ distanceTable=newDisTable;
+}
+
+/*
+NJalg::NJalg(){
+ _myET = NULL;
+}
+
+
+
+//-----------------------------
+// The algorithm
+//-----------------------------
+
+void NJalg::GetDisTable(const sequenceContainer& sd,const vector<MDOUBLE> * weights) {
+
+ VVresize(_startingDistanceTable,distanceTable.size(),distanceTable.size());// for printing stuff later.
+ VVresize(LTable,distanceTable.size(),distanceTable.size());// for printing stuff later.
+
+ int i,j;
+ _nodeNames.resize(currentNodes.size());
+ for ( i=0; i < currentNodes.size(); i++) {
+ _nodeNames[i] =(currentNodes[i]->name());
+ for ( j=i+1; j < currentNodes.size(); j++) {
+ MDOUBLE tempDis = -2000.0;
+ MDOUBLE resLikelihood;
+ int seqnodeI_ID = sd.getId(currentNodes[i]->name());
+ int seqnodeJ_ID = sd.getId(currentNodes[j]->name());
+ const sequence& snodeI = *sd.getSeqPtr(seqnodeI_ID,true);
+ const sequence& snodeJ = *sd.getSeqPtr(seqnodeJ_ID,true);
+ tempDis = _cd->giveDistance(snodeI,snodeJ,weights,&resLikelihood);
+ distanceTable[i][j] = tempDis;
+ LTable[i][j] = resLikelihood;
+ }
+ }
+ if (myLog::LogLevel()>4) {
+ for (i=0; i < currentNodes.size(); i++) {
+ for (j=i+1; j < currentNodes.size(); j++) {
+ LOG(100,<<"nj distance ["<<i<<"]["<<j<<"] ="<<distanceTable[i][j]<<endl);
+ }
+ }
+ }
+ //if (myLog::LogLevel()>4) {
+ // for (i=0; i < currentNodes.size(); i++) {
+ // for (j=i+1; j < currentNodes.size(); j++) {
+ // LOG(4,<<"nj likelihood for distance["<<i<<"]["<<j<<"] ="<<LTable[i][j]<<endl);
+ // }
+ // }
+ //}
+ // for printing stuff later.
+ for (int tmp1=0; tmp1<distanceTable.size();++tmp1)
+ for (int tmp2=0; tmp2<distanceTable.size();++tmp2)
+ _startingDistanceTable[tmp1][tmp2] = distanceTable[tmp1][tmp2];
+}
+
+
+
+
+
+
+void NJalg::NJiterate() {
+ getMmatrixFromDistanceTable();
+ int minRaw,minCol;
+ findMinM(minRaw,minCol);
+
+ tree::nodeP nodeI = currentNodes[minRaw];
+ tree::nodeP nodeJ = currentNodes[minCol];
+ tree::nodeP theNewNode;
+ theNewNode= SeparateNodes(nodeI,nodeJ);
+
+ //CHECK4
+
+ updateBranchDistance(theNewNode,nodeI,nodeJ,minRaw,minCol);
+ //CHECK6
+
+ UpdateDistanceTableAndCurrentNodes(nodeI,nodeJ,theNewNode,minRaw,minCol);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//CHECK1
+//cout<<"\n-----------------------------------------------"<<endl;
+//for (int h=0; h < currentNodes.size(); h++) cout<<currentNodes[h]->name()<<" = "<<h<<endl;
+
+//CHECK2
+// for (int i =0; i < r_values.size();++i) cout<<"r["<<i<<"] = "<<r_values[i]<<endl;
+
+//CHECK3
+// for (i =0; i < currentNodes.size();++i)
+// for (int j =i+1; j <currentNodes.size();++j)
+// cout<<"M["<<i<<"]["<<j<<"] = "<<Mmatrix[i][j]<<endl;
+
+//CHECK4
+// string htuname = "HTU";
+// char k = 'a'+currentNodes.size();
+// htuname+=k;
+// theNewNode->SetName(htuname);
+
+//CHECK5
+//_myET->getRoot()->SetName("RootOfStar");
+
+//CHECK6
+// et.output(cout,et.getRoot(),tree::ANCESTOR);
+
+
+
+
+
+*/
diff --git a/libs/phylogeny/nj.h b/libs/phylogeny/nj.h
new file mode 100644
index 0000000..3e437df
--- /dev/null
+++ b/libs/phylogeny/nj.h
@@ -0,0 +1,90 @@
+// $Id: nj.h 962 2006-11-07 15:13:34Z privmane $
+
+// version 1.00
+// last modified 3 Nov 2002
+
+#ifndef ___NJ
+#define ___NJ
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "njConstrain.h"
+#include "distances2Tree.h"
+using namespace std;
+
+class NJalg : public distances2Tree {
+public:
+ virtual NJalg* clone() const {return new NJalg(*this);}
+ // changed from computeNJtree to computeTree for competability to "distances2Tree"
+ virtual tree computeTree(VVdouble distances, const vector<string>& names, const tree * const constriantTree = NULL);
+ tree startingTree(const vector<string>& names);
+ tree startingTree(const tree& inTree);
+ void NJiterate(tree& et,vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable);
+ void NJiterate(tree& et,vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable, njConstraint& njc);
+ void calc_M_matrix(vector<tree::nodeP>& currentNodes,
+ const VVdouble& distanceTable,
+ const Vdouble & r_values,
+ int& minRaw,int& minCol);
+ void calc_M_matrix(vector<tree::nodeP>& currentNodes,
+ const VVdouble& distanceTable,
+ const Vdouble & r_values,
+ int& minRaw,int& minCol, const njConstraint& njc);
+ Vdouble calc_r_values(vector<tree::nodeP>& currentNodes,const VVdouble& distanceTable);
+ tree::nodeP SeparateNodes(tree& et,tree::nodeP node1,tree::nodeP node2);
+ void update3taxaLevel(VVdouble& distanceTable,Vdouble & r_values,vector<tree::nodeP>& currentNodes);
+ void updateBranchDistance(const VVdouble& disT,
+ const Vdouble& rValues,
+ tree::nodeP nodeNew,
+ tree::nodeP nodeI,
+ tree::nodeP nodeJ,
+ int Iplace, int Jplace);
+
+ void UpdateDistanceTableAndCurrentNodes(vector<tree::nodeP>& currentNodes,
+ VVdouble& distanceTable,
+ tree::nodeP nodeI,
+ tree::nodeP nodeJ,
+ tree::nodeP theNewNode,
+ int Iplace, int Jplace);
+
+};
+
+/*
+ //explicit NJalg(const tree& inTree, const computeDistance* cd);
+ explicit NJalg();
+ tree getNJtree() const {return *_myET;}// return a copy...
+ void computeTree(const sequenceContainer& sd,const computeDistance* cd,const vector<MDOUBLE> * weights = NULL);
+ VVdouble getDistanceTable(vector<string>& names) {
+ names.erase(names.begin(),names.end());
+ names = _nodeNames;
+ return _startingDistanceTable;}
+ VVdouble getLTable(vector<string>& names) {
+ names.erase(names.begin(),names.end());
+ names = _nodeNames;
+ return LTable;}
+private:
+ //void starTreeFromInputTree(const tree& inTree);
+ void starTreeFromInputsequenceContainer(const sequenceContainer& sd);
+ void GetDisTable(const sequenceContainer& sd,const vector<MDOUBLE> * weights);
+ MDOUBLE dis(const int i, const int j) const{
+ return (i<j) ? distanceTable[i][j] : distanceTable[j][i];
+ }
+ void findMinM(int& minRaw,int& minCol);
+
+
+ tree* _myET;
+ VVdouble distanceTable;
+ VVdouble Mmatrix;
+ Vdouble r_values;
+ vector<tree::nodeP> currentNodes;
+ const computeDistance* _cd;
+
+ VVdouble _startingDistanceTable; // for printing etc... not used by the algorithm.
+ vector<string> _nodeNames; // for printing etc... not used by the algorithm.
+ VVdouble LTable;// for printing etc... not used by the algorithm.
+
+*/
+#endif
+
+
diff --git a/libs/phylogeny/njConstrain.cpp b/libs/phylogeny/njConstrain.cpp
new file mode 100644
index 0000000..3b169fd
--- /dev/null
+++ b/libs/phylogeny/njConstrain.cpp
@@ -0,0 +1,130 @@
+// $Id: njConstrain.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include <cassert>
+#include "njConstrain.h"
+#include "logFile.h"
+
+
+
+njConstraint::njConstraint(const tree& starttree, const tree& constraintTree):_cTree(constraintTree), _interTreeMap(){
+ vector<tree::nodeP> currentNodes;
+ starttree.getAllLeaves(currentNodes,starttree.getRoot());
+ vector<tree::nodeP> constraintNodes;
+ _cTree.getAllLeaves(constraintNodes,_cTree.getRoot());
+ assert(currentNodes.size()==constraintNodes.size());
+
+ map<string,tree::nodeP> name2Node;
+ for (vector<tree::nodeP>::iterator vec_iter=constraintNodes.begin();vec_iter!=constraintNodes.end();++vec_iter){
+ // name2Node[test];//=*vec_iter;
+ name2Node[(*vec_iter)->name()]=*vec_iter;
+ }
+
+ for (vector<tree::nodeP>::iterator vec_iter2=currentNodes.begin();vec_iter2!=currentNodes.end();++vec_iter2){
+ assert(name2Node.find((*vec_iter2)->name()) != name2Node.end()); // cant find the taxa in the constratin tree!
+ _interTreeMap[*vec_iter2]=name2Node[(*vec_iter2)->name()];
+ }
+}
+
+
+bool njConstraint::isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose) const
+{
+ bool compatible;
+ assert( _interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map!
+ assert( _interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map!
+
+ tree::nodeP s1=_interTreeMap.find(n1)->second;
+ tree::nodeP s2=_interTreeMap.find(n2)->second;
+
+ if (s1==_cTree.getRoot()) { // we are asking undirected questions from a directed tree
+ compatible = (s2 != _cTree.getRoot()) && (s2->father() != _cTree.getRoot()) && (s2->father()->father() == _cTree.getRoot());
+ if (verbose) LOG(11,<<"isCompatible - s1 is root"<<endl);
+ } else if (s2==_cTree.getRoot()) { // we are asking undirected questions from a directed tree
+ compatible = (s1 != _cTree.getRoot()) && (s1->father() != _cTree.getRoot()) && (s1->father()->father() == _cTree.getRoot());
+ if (verbose) LOG(11,<<"isCompatible - s2 is root"<<endl);
+ } else {
+ compatible = (s1->father()==s2->father());
+ }
+
+ if (verbose) LOG(11,<<"isCompatible:" <<s1->name()<<" + "<<s2->name()<<"-->" <<compatible<< endl);
+ return (compatible);
+}
+
+tree::nodeP joinNodesToSubtree(tree& t,tree::nodeP& s1, tree::nodeP& s2)
+{
+ assert (s1->father()==s2->father()); // we can only do this if both nodes have same father
+
+ LOG(10,<<endl<<s1->name()<<" and "<<s2->name()<<endl);
+
+ tree::nodeP fatherNode=s1->father();
+
+ if (fatherNode->getNumberOfSons()==2) {
+ // fatherNode->sons.clear();
+ return (fatherNode); // no splitting needed
+ }
+
+ if (s1->father()==t.getRoot() && t.getRoot()->getNumberOfSons()==3) { // no split needed, but the root needs to change
+
+ LOG(10,<<"************************* spacial case of constratin join"<<endl);
+ LOGDO(10,t.output(myLog::LogFile(),tree::ANCESTORID));
+ LOG(10,<<endl<<s1->name()<<" and "<<s2->name()<<endl);
+ LOG(10,<<endl<<s1->father()->name()<<" and father "<<s2->father()->name()<<endl);
+
+ tree::nodeP newFatherNode = s1->father();
+ for (int i=0; i<3; ++i)
+ if (t.getRoot()->getSon(i)!= s1 && t.getRoot()->getSon(i)!= s2){
+ t.rootAt(t.getRoot()->getSon(i));
+ LOGDO(10,t.output(myLog::LogFile(),tree::ANCESTORID));
+ LOG(10,<<endl<<endl);
+ return (newFatherNode); // this is the new root;
+ }
+ }
+
+ tree::nodeP newNode = t.createNode(fatherNode, t.getNodesNum());
+ newNode->setSon(s1);
+ newNode->setSon(s2);
+ newNode->claimSons();
+
+
+ int k = fatherNode->getNumberOfSons();
+ fatherNode->removeSon(s1);
+ fatherNode->removeSon(s2);
+ assert (k=fatherNode->getNumberOfSons()+2); // both s1 and s2 should have been skiped
+ // fatherNode->sons.resize(k);
+
+ t.updateNumberofNodesANDleaves();
+ t.create_names_to_internal_nodes();
+ return(newNode);
+}
+
+void njConstraint::join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather)
+{
+ assert(_interTreeMap.find(n1) != _interTreeMap.end()); // cant find the taxa in the map!
+ assert(_interTreeMap.find(n2) != _interTreeMap.end()); // cant find the taxa in the map!
+ assert(_interTreeMap.find(newFather) == _interTreeMap.end()); // should not find the new father in the map!
+ assert(isCompatible(n1,n2));
+
+ // tree::nodeP origFather=_interTreeMap.find(n1)->father();
+
+ // do tree things
+ LOG(10,<<endl<<n1->name()<<" AND "<<n2->name()<<endl);
+ tree::nodeP newNode=joinNodesToSubtree(_cTree, _interTreeMap[n1], _interTreeMap[n2]);
+
+
+ _interTreeMap.erase(n1);
+ _interTreeMap.erase(n2);
+ _interTreeMap[newFather]=newNode;
+
+
+ LOGDO(17,_cTree.output(myLog::LogFile()));
+
+}
+void njConstraint::output(ostream &out) const{
+ _cTree.output(out,tree::ANCESTORID);
+ out <<endl;
+}
+
+ostream &operator<<(ostream &out, const njConstraint &c){
+ c.output(out);
+ return(out);
+}
diff --git a/libs/phylogeny/njConstrain.h b/libs/phylogeny/njConstrain.h
new file mode 100644
index 0000000..b063bac
--- /dev/null
+++ b/libs/phylogeny/njConstrain.h
@@ -0,0 +1,29 @@
+// $Id: njConstrain.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___NJ_CONSTRAINT
+#define ___NJ_CONSTRAINT
+
+#include <map>
+
+
+#include "sequenceContainer.h"
+#include "tree.h"
+using namespace std;
+
+class njConstraint {
+public:
+ njConstraint(const tree& starttree, const tree& constraintTree);
+ bool isCompatible(const tree::nodeP& n1, const tree::nodeP& n2, const bool verbose=false) const;
+ void join(const tree::nodeP& n1, const tree::nodeP& n2, const tree::nodeP& newFather);
+ void output(ostream &out) const;
+
+private:
+ tree _cTree; // constriant tree
+ map<tree::nodeP,tree::nodeP> _interTreeMap;
+
+
+};
+
+ostream &operator<<(ostream &out, const njConstraint &c);
+
+#endif // ___NJ_CONSTRAINT
diff --git a/libs/phylogeny/normalDist.cpp b/libs/phylogeny/normalDist.cpp
new file mode 100644
index 0000000..8a32ece
--- /dev/null
+++ b/libs/phylogeny/normalDist.cpp
@@ -0,0 +1,67 @@
+// $Id: normalDist.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "normalDist.h"
+#include <cmath>
+
+/*
+ This function evaluates the standard normal density function-N(0,1):
+ integral from -infinity to x over exp(-.5t^2/sqrt(2pi)) (copied from the web) using
+ Milton Abramowiz and Irene A Stegun.
+ Handbook of Mathematical Functions.
+ National Bureau of Standards, 1964.
+ */
+MDOUBLE Phi(MDOUBLE x)
+{
+ if (x>6.0) return 1;
+ if (x<-6.0) return 0;
+ MDOUBLE b1=0.31938153;
+ MDOUBLE b2=-0.356563782;
+ MDOUBLE b3=1.781477937;
+ MDOUBLE b4=-1.821255978;
+ MDOUBLE b5=1.330274429;
+ MDOUBLE p=0.2316419;
+ MDOUBLE c2=0.3989423;
+ MDOUBLE a=fabs(x);
+ MDOUBLE t=1.0/(1.0+a*p);
+ MDOUBLE b=c2*exp((-x)*(x/2.0));
+ MDOUBLE n=((((b5*t+b4)*t+b3)*t+b2)*t+b1)*t;
+ n=1.0-b*n;
+ if (x<0.0) n=1.0-n;
+ return n;
+}
+
+/*
+ Computes the inverse normal distribution function (downloaded from the web)
+ i.e. computes x when c=Phi(x)
+ */
+MDOUBLE normsinv(MDOUBLE p)
+{
+ if (p<EPSILON) return VERYSMALL;
+ if ((1-p)<EPSILON)return VERYBIG;
+ MDOUBLE x(0.0);
+ MDOUBLE q, r;
+ if ((0 < p ) && (p < P_LOW))
+ {
+ q = sqrt(-2*log(p));
+ x = (((((C1*q+C2)*q+C3)*q+C4)*q+C5)*q+C6) / ((((D1*q+D2)*q+D3)*q+D4)*q+1);
+ }
+ else
+ {
+ if ((P_LOW <= p) && (p <= P_HIGH))
+ {
+ q = p - 0.5;
+ r = q*q;
+ x = (((((A1*r+A2)*r+A3)*r+A4)*r+A5)*r+A6)*q /(((((B1*r+B2)*r+B3)*r+B4)*r+B5)*r+1);
+ }
+ else
+ {
+ if ((P_HIGH < p)&&(p < 1))
+ {
+ q = sqrt(-2*log(1-p));
+ x = -(((((C1*q+C2)*q+C3)*q+C4)*q+C5)*q+C6) / ((((D1*q+D2)*q+D3)*q+D4)*q+1);
+ }
+ }
+ }
+ return x;
+}
+
+
diff --git a/libs/phylogeny/normalDist.h b/libs/phylogeny/normalDist.h
new file mode 100644
index 0000000..f446e3f
--- /dev/null
+++ b/libs/phylogeny/normalDist.h
@@ -0,0 +1,35 @@
+// $Id: normalDist.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef ___NORMAL_DIST
+#define ___NORMAL_DIST
+
+#include "definitions.h"
+
+
+#define A1 (-3.969683028665376e+01)
+#define A2 2.209460984245205e+02
+#define A3 (-2.759285104469687e+02)
+#define A4 1.383577518672690e+02
+#define A5 (-3.066479806614716e+01)
+#define A6 2.506628277459239e+00
+#define B1 (-5.447609879822406e+01)
+#define B2 1.615858368580409e+02
+#define B3 (-1.556989798598866e+02)
+#define B4 6.680131188771972e+01
+#define B5 (-1.328068155288572e+01)
+#define C1 (-7.784894002430293e-03)
+#define C2 (-3.223964580411365e-01)
+#define C3 (-2.400758277161838e+00)
+#define C4 (-2.549732539343734e+00)
+#define C5 4.374664141464968e+00
+#define C6 2.938163982698783e+00
+#define D1 7.784695709041462e-03
+#define D2 3.224671290700398e-01
+#define D3 2.445134137142996e+00
+#define D4 3.754408661907416e+00
+#define P_LOW 0.02425
+/* P_high = 1 - p_low*/
+#define P_HIGH 0.97575
+
+MDOUBLE Phi(MDOUBLE x);
+MDOUBLE normsinv(MDOUBLE p);
+#endif
diff --git a/libs/phylogeny/nucJC.cpp b/libs/phylogeny/nucJC.cpp
new file mode 100644
index 0000000..e1f5414
--- /dev/null
+++ b/libs/phylogeny/nucJC.cpp
@@ -0,0 +1,5 @@
+// $Id: nucJC.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "nucJC.h"
+
+
diff --git a/libs/phylogeny/nucJC.h b/libs/phylogeny/nucJC.h
new file mode 100644
index 0000000..404d490
--- /dev/null
+++ b/libs/phylogeny/nucJC.h
@@ -0,0 +1,53 @@
+// $Id: nucJC.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___NUC_JC
+#define ___NUC_JC
+
+#include <cmath>
+#include "replacementModel.h"
+
+namespace nucDef {
+ const MDOUBLE Alp = 4.0;
+ const MDOUBLE odAl = 1.0/Alp; // one divided by alphabet
+ const MDOUBLE om_odAl = 1.0-odAl; // one minus odAl;
+ const MDOUBLE alDiv_omalp = Alp/(Alp-1.0);
+ const MDOUBLE m_alDiv_omalp = -alDiv_omalp;
+}
+
+class nucJC : public replacementModel {
+public:
+ const int alphabetSize() const {return 4;}
+
+ virtual replacementModel* clone() const { return new nucJC(*this); }
+
+ explicit nucJC(){};
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+// return ((i==j) ? 0.25+0.75*exp(-4.0/3.0*d): 0.25-0.25*exp(-4.0/3.0*d));
+ return ((i==j) ? nucDef::odAl+nucDef::om_odAl*exp(nucDef::m_alDiv_omalp*d): nucDef::odAl-nucDef::odAl*exp(nucDef::m_alDiv_omalp*d));
+ }
+
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+// return ((i==j) ? -exp(-4.0/3.0*d): exp(-4.0/3.0*d)/3.0);
+ return ((i==j) ? -exp(nucDef::m_alDiv_omalp*d): exp(nucDef::m_alDiv_omalp*d)/(nucDef::Alp-1));
+ }
+ const MDOUBLE freq(const int i) const {return 0.25;};
+
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ // return ((i==j) ? 4.0/3.0*exp(-4.0/3.0*d): -4.0/3.0*exp(-4.0/3.0*d));
+ return ((i==j) ? nucDef::alDiv_omalp*exp(nucDef::m_alDiv_omalp*d): nucDef::m_alDiv_omalp*exp(nucDef::m_alDiv_omalp*d));
+ }
+
+ const MDOUBLE Q(const int i, const int j) const {
+ return ((i == j) ? ( - 1.0) : (1.0 / 3.0));
+ }
+
+
+};
+
+#endif
+
+// note: according to the new C++ rules, the clone function should be like this:
+// virtual nucJC* clone() const { return new nucJC(*this); }
+// however, not all compiler support it yet. look at More Effective C++ page 126.
+
+
diff --git a/libs/phylogeny/nucleotide.cpp b/libs/phylogeny/nucleotide.cpp
new file mode 100644
index 0000000..b458efe
--- /dev/null
+++ b/libs/phylogeny/nucleotide.cpp
@@ -0,0 +1,122 @@
+// $Id: nucleotide.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "nucleotide.h"
+#include "errorMsg.h"
+
+
+nucleotide::nucleotide() {
+ _relation.resize(4);
+ for (int i=0; i < _relation.size(); ++i) {
+ _relation[i].resize(16);
+ }
+ for (int s=0;s<4;++s) {
+ for (int t=0;t<16;++t){
+ _relation[s][t]=relationsInternal(s,t);
+ }
+ }
+}
+
+int nucleotide::fromChar(const string& str, const int pos) const {
+ return fromChar(str[pos]);
+}
+
+vector<int> nucleotide::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+int nucleotide::fromChar(const char s) const {
+ switch (s) {
+ case 'A' : case'a' : return 0 ; break;// A = adenine
+ case 'C' : case'c' : return 1 ; break;// C = cytosine
+ case 'G' : case'g' : return 2 ; break;// G = guanine
+ case 'T' : case't' : return 3 ; break;// T = thymine
+ case 'U' : case'u' : return 4 ; break; // U = uracil
+ case 'R' : case'r' : return 5 ; break;// R = purine (same as [GA])
+ case 'Y' : case'y' : return 6 ; break;// Y = pyrimidine (same as [TC])
+ case 'K' : case'k' : return 7 ; break;// K = keto (same as [GT])
+ case 'M' : case'm' : return 8 ; break;// M = amino (same as [AC])
+ case 'S' : case's' : return 9 ; break;// S = strong (same as [GC])
+ case 'W' : case'w' : return 10; break;// W = weak (same as [AT])
+ case 'B' : case'b' : return 11; break;// B = (same as [GTC])
+ case 'D' : case'd' : return 12; break;// D = (same as [GAT])
+ case 'H' : case'h' : return 13; break;// H = (same as [ACT])
+ case 'V' : case'v' : return 14; break;// V = (same as [GCA])
+ case 'N' : case'n' : return 15; break;// N = any (same as [ACGT])
+ case '?' : case'*' : return 15; break;
+ case '-' : case'_' : return -1; break;
+ case 'x' : case'X' : return 15; break;
+ case '.' : return -3; break; // . is used in some sequence files as the character just in the line above...
+ default:
+ vector<string> err;
+ err.push_back(" The nucleotide sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" The nucleotide was not one of the following: ");
+ err.push_back("A, C, G, T, X, -, ?");
+ err.push_back("a, c, g, t, x, _, *");
+ errorMsg::reportError(err);
+ }
+ return -99;
+}
+
+string nucleotide::fromInt(const int id) const {
+ char x= fromIntInternal(id);
+ string res;
+ res.append(1,x);
+ return res;
+}
+
+char nucleotide::fromIntInternal(const int in_id) const {
+ switch (in_id) {
+ case 0 : return 'A' ; break;
+ case 1 : return 'C' ; break;
+ case 2 : return 'G' ; break;
+ case 3 : return 'T' ; break;
+ case -1: return '-' ; break;
+ case 4 : return 'U'; break;
+ case 5 : return 'R'; break;
+ case 6 : return 'Y'; break;
+ case 7 : return 'K'; break;
+ case 8 : return 'M'; break;
+ case 9 : return 'S'; break;
+ case 10 : return 'W'; break;
+ case 11 : return 'B'; break;
+ case 12 : return 'D'; break;
+ case 13 : return 'H'; break;
+ case 14 : return 'V'; break;
+ case 15 : return 'N'; break;
+ default:
+ vector<string> err;
+ err.push_back(" unable to print nucleotide. nucleotide was not one of the following: ");
+ err.push_back("A, C, G, T, -, ?");
+ err.push_back("a, c, g, t, _, *");
+ errorMsg::reportError(err); // make the program quit
+ }//end of switch
+ return '!' ; // for the lousy compiler
+}
+
+int nucleotide::relationsInternal(const int ctc,const int charInSeq
+ ) const{ //ctc=charToCheck
+ switch (charInSeq){
+ case 0 : if (ctc==0) return 1 ; break;// A = adenine
+ case 1 : if (ctc==1) return 1 ; break;// C = cytosine
+ case 2 : if (ctc==2) return 1 ; break;// G = guanine
+ case 3 : if (ctc==3) return 1 ; break;// T = thymine
+ case 4 : if (ctc==4) return 1 ; break; // U = uracil
+ case 5 : if (ctc==2||ctc==0) return 1 ; break;// R = purine (same as [GA])
+ case 6 : if (ctc==3||ctc==1) return 1 ; break;// Y = pyrimidine (same as [TC])
+ case 7 : if (ctc==2||ctc==3) return 1 ; break;// K = keto (same as [GT])
+ case 8 : if (ctc==0||ctc==1) return 1 ; break;// M = amino (same as [AC])
+ case 9 : if (ctc==2||ctc==1) return 1 ; break;// S = (same as [GC])
+ case 10: if (ctc==0||ctc==3) return 1 ; break;// W = (same as [AT])
+ case 11: if (ctc==2||ctc==3||ctc==1) return 1 ; break;// B = (same as [GTC])
+ case 12: if (ctc==2||ctc==0||ctc==3) return 1 ; break;// D = (same as [GAT])
+ case 13: if (ctc==0||ctc==1||ctc==3) return 1 ; break;// H = (same as [ACT])
+ case 14: if (ctc==2||ctc==1||ctc==0) return 1 ; break;// V = (same as [GCA])
+ case 15: if (ctc==0||ctc==1||ctc==2||ctc==3) return 1 ; break;// N = any (same as [ACGT])
+ };
+ return 0;
+};
+
diff --git a/libs/phylogeny/nucleotide.h b/libs/phylogeny/nucleotide.h
new file mode 100644
index 0000000..b82f2e2
--- /dev/null
+++ b/libs/phylogeny/nucleotide.h
@@ -0,0 +1,110 @@
+// $Id: nucleotide.h 1901 2007-03-15 13:21:06Z nimrodru $
+
+#ifndef ___NUCLEOTIDE_H
+#define ___NUCLEOTIDE_H
+
+#include <cassert>
+#include "definitions.h"
+#include "alphabet.h"
+
+/* =======================================================================
+This is the nucleotide class. It is derived from the class alphabet.
+All alphabets are internally stored as integers. So what has to implement
+is a way to translate from strings to array (vector) of integers and back.
+
+Starting with the easiest functions to explain:
+size() gives the size of the alphabet: 4 in this case.
+stringSize() say if it is a one letter code (unlike codon which is 3 letters code).
+
+clone() is a general machanism in C++. The idea is that if you have a derived class,
+and a pointer to the base class, and you want to self-copy the derived class.
+In such case you use the clone() machanism. Ref: More effective C++ page. 126.
+
+int unknown(): sometimes one doesn't know if it is A, C, G, or T. In such case we use
+the int that represents unknown. In this class it is set to 15. This is used for example
+when gap characters are converted to unknown characters.
+
+
+int fromChar(const string& str, const int pos) and int fromChar(const char s)
+give the same answer: there is a map from integers to characters.
+For example, A is zero, C is 1, etc. However, the function fromChar(const char s)
+is specific to nucleotide and to amino - because these are one letter alphabet.
+For codon - this function won't work. This is why the general function is
+in the form int fromChar(const string& str, const int pos);
+In the case of codon - it will read 3 letters each time.
+=========================================================================*/
+
+
+
+class nucleotide : public alphabet {
+public:
+ explicit nucleotide();
+ virtual ~nucleotide() {}
+ virtual alphabet* clone() const { return new nucleotide(*this); }
+ int unknown() const {return 15;}
+ int gap() const {return -1;}
+ int size() const {return 4;}
+ int stringSize() const {return 1;} // one letter code.
+
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ vector<int> fromString(const string& str) const;
+
+ string fromInt(const int id) const;
+ int relations(const int charInSeq, const int charToCheck) const{ // see explanation below
+ assert (charInSeq != -1);//gaps in the sequences
+ return _relation[charToCheck][charInSeq];
+ }
+
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ // in this speical case, in fact it will be true also for U which is coded by 4.
+ // this is why it is <= size.
+ bool isSpecific(const int id) const {return (id>=0 && id <= size());}
+
+private:
+ VVint _relation;
+ char fromIntInternal(const int in_id) const;
+ int relationsInternal(const int ctc,const int charInSeq) const;
+
+};
+
+
+
+#endif
+
+
+// Explanation about relations:
+// Sometimes the sequences contain letters like R which means G or A.
+// When calculating the likelihood of such sequences we have to take this into acount.
+// For example, the tree : A
+/* / \
+ t1 / \ t2
+ / \
+ R A
+
+ L = P(A)*P(A->A)(t1)*P(A->A)(t2) + P(A)*P(A->G)(t1)*P(A->A)(t2)
+ = P(A)*P(A->A)(t2)* [ P(A->A)(t1) + P(A->G)(t1) ]
+
+ Note that we don't divide it by 2.
+
+ VVint _relation keeps this information :
+
+ A C G T
+ A 1 0 0 0
+ C 0 1 0 0
+ G 0 0 1 0
+ T 0 0 0 1
+ U 0 0 0 1
+ R 1 0 1 0
+ Y 0 1 0 1
+ K
+ .
+ .
+ .
+*/
+
+
+
+
+
+
diff --git a/libs/phylogeny/nucleotide_amir.cpp b/libs/phylogeny/nucleotide_amir.cpp
new file mode 100644
index 0000000..2df93b1
--- /dev/null
+++ b/libs/phylogeny/nucleotide_amir.cpp
@@ -0,0 +1,139 @@
+// $Id: nucleotide.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "nucleotide.h"
+#include "errorMsg.h"
+
+
+/*nucleotide::nucleotide() {
+ _relation.resize(4);
+ for (int i=0; i < _relation.size(); ++i) {
+ _relation[i].resize(16);
+ }
+ for (int s=0;s<4;++s) {
+ for (int t=0;t<16;++t){
+ _relation[s][t]=relationsInternal(s,t);
+ }
+ }
+}
+*/
+nucleotide::nucleotide() {
+ _relation.resize(5);
+ for (int i=0; i < _relation.size(); ++i) {
+ _relation[i].resize(17);
+ }
+ for (int s=0;s<5;++s) {
+ for (int t=0;t<17;++t){
+ _relation[s][t]=relationsInternal(s,t);
+ }
+ }
+}
+int nucleotide::fromChar(const string& str, const int pos) const {
+ return fromChar(str[pos]);
+}
+
+vector<int> nucleotide::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+int nucleotide::fromChar(const char s) const {
+ switch (s) {
+ case 'A' : case'a' : return 0 ; break;// A = adenine
+ case 'C' : case'c' : return 1 ; break;// C = cytosine
+ case 'G' : case'g' : return 2 ; break;// G = guanine
+ case 'T' : case't' : return 3 ; break;// T = thymine
+ case '-' : case'_' : return 4 ; break; // U = uracil
+ //case 'U' : case'u' : return 4 ; break; // U = uracil
+ case 'R' : case'r' : return 5 ; break;// R = purine (same as [GA])
+ case 'Y' : case'y' : return 6 ; break;// Y = pyrimidine (same as [TC])
+ case 'K' : case'k' : return 7 ; break;// K = keto (same as [GT])
+ case 'M' : case'm' : return 8 ; break;// M = amino (same as [AC])
+ case 'S' : case's' : return 9 ; break;// S = strong (same as [GC])
+ case 'W' : case'w' : return 10; break;// W = weak (same as [AT])
+ case 'B' : case'b' : return 11; break;// B = (same as [GTC])
+ case 'D' : case'd' : return 12; break;// D = (same as [GAT])
+ case 'H' : case'h' : return 13; break;// H = (same as [ACT])
+ case 'V' : case'v' : return 14; break;// V = (same as [GCA])
+ case 'N' : case'n' : return 15; break;// N = any (same as [ACGT])
+ case '?' : case'*' : return 15; break;
+ //case '-' : case'_' : return -1; break;
+ case 'U' : case'u' : return 16; break;
+ case 'x' : case'X' : return 15; break;
+ case '.' : return -3; break; // . is used in some sequence files as the character just in the line above...
+ default:
+ vector<string> err;
+ err.push_back(" The nucleotide sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" The nucleotide was not one of the following: ");
+ err.push_back("A, C, G, T, X, -, ?");
+ err.push_back("a, c, g, t, x, _, *");
+ errorMsg::reportError(err);
+ }
+ return -99;
+}
+
+string nucleotide::fromInt(const int id) const {
+ char x= fromIntInternal(id);
+ string res;
+ res.append(1,x);
+ return res;
+}
+
+char nucleotide::fromIntInternal(const int in_id) const {
+ switch (in_id) {
+ case 0 : return 'A' ; break;
+ case 1 : return 'C' ; break;
+ case 2 : return 'G' ; break;
+ case 3 : return 'T' ; break;
+ //case -1: return '-' ; break;
+ //case 16 : return '-' ; break;
+ //case 4 : return 'U'; break;
+ case 16 : return 'U' ; break;
+ case 4 : return '-'; break;
+ case 5 : return 'R'; break;
+ case 6 : return 'Y'; break;
+ case 7 : return 'K'; break;
+ case 8 : return 'M'; break;
+ case 9 : return 'S'; break;
+ case 10 : return 'W'; break;
+ case 11 : return 'B'; break;
+ case 12 : return 'D'; break;
+ case 13 : return 'H'; break;
+ case 14 : return 'V'; break;
+ case 15 : return 'N'; break;
+ default:
+ vector<string> err;
+ err.push_back(" unable to print nucleotide. nucleotide was not one of the following: ");
+ err.push_back("A, C, G, T, -, ?");
+ err.push_back("a, c, g, t, _, *");
+ errorMsg::reportError(err); // make the program quit
+ }//end of switch
+ return '!' ; // for the lousy compiler
+}
+
+int nucleotide::relationsInternal(const int ctc,const int charInSeq
+ ) const{ //ctc=charToCheck
+ switch (charInSeq){
+ case 0 : if (ctc==0) return 1 ; break;// A = adenine
+ case 1 : if (ctc==1) return 1 ; break;// C = cytosine
+ case 2 : if (ctc==2) return 1 ; break;// G = guanine
+ case 3 : if (ctc==3) return 1 ; break;// T = thymine
+ case 4 : if (ctc==4) return 1 ; break; // U = uracil
+ case 5 : if (ctc==2||ctc==0) return 1 ; break;// R = purine (same as [GA])
+ case 6 : if (ctc==3||ctc==1) return 1 ; break;// Y = pyrimidine (same as [TC])
+ case 7 : if (ctc==2||ctc==3) return 1 ; break;// K = keto (same as [GT])
+ case 8 : if (ctc==0||ctc==1) return 1 ; break;// M = amino (same as [AC])
+ case 9 : if (ctc==2||ctc==1) return 1 ; break;// S = (same as [GC])
+ case 10: if (ctc==0||ctc==3) return 1 ; break;// W = (same as [AT])
+ case 11: if (ctc==2||ctc==3||ctc==1) return 1 ; break;// B = (same as [GTC])
+ case 12: if (ctc==2||ctc==0||ctc==3) return 1 ; break;// D = (same as [GAT])
+ case 13: if (ctc==0||ctc==1||ctc==3) return 1 ; break;// H = (same as [ACT])
+ case 14: if (ctc==2||ctc==1||ctc==0) return 1 ; break;// V = (same as [GCA])
+ case 15: if (ctc==0||ctc==1||ctc==2||ctc==3) return 1 ; break;// N = any (same as [ACGT])
+ case 16 : if (ctc==16) return 1 ; break;// gap
+ };
+ return 0;
+};
+
diff --git a/libs/phylogeny/nucleotide_amir.h b/libs/phylogeny/nucleotide_amir.h
new file mode 100644
index 0000000..0db118d
--- /dev/null
+++ b/libs/phylogeny/nucleotide_amir.h
@@ -0,0 +1,111 @@
+// $Id: nucleotide.h 1901 2007-03-15 13:21:06Z nimrodru $
+
+#ifndef ___NUCLEOTIDE_H
+#define ___NUCLEOTIDE_H
+
+#include <cassert>
+#include "definitions.h"
+#include "alphabet.h"
+
+/* =======================================================================
+This is the nucleotide class. It is derived from the class alphabet.
+All alphabets are internally stored as integers. So what has to implement
+is a way to translate from strings to array (vector) of integers and back.
+
+Starting with the easiest functions to explain:
+size() gives the size of the alphabet: 4 in this case.
+stringSize() say if it is a one letter code (unlike codon which is 3 letters code).
+
+clone() is a general machanism in C++. The idea is that if you have a derived class,
+and a pointer to the base class, and you want to self-copy the derived class.
+In such case you use the clone() machanism. Ref: More effective C++ page. 126.
+
+int unknown(): sometimes one doesn't know if it is A, C, G, or T. In such case we use
+the int that represents unknown. In this class it is set to 15. This is used for example
+when gap characters are converted to unknown characters.
+
+
+int fromChar(const string& str, const int pos) and int fromChar(const char s)
+give the same answer: there is a map from integers to characters.
+For example, A is zero, C is 1, etc. However, the function fromChar(const char s)
+is specific to nucleotide and to amino - because these are one letter alphabet.
+For codon - this function won't work. This is why the general function is
+in the form int fromChar(const string& str, const int pos);
+In the case of codon - it will read 3 letters each time.
+=========================================================================*/
+
+
+
+class nucleotide : public alphabet {
+public:
+ explicit nucleotide();
+ virtual ~nucleotide() {}
+ virtual alphabet* clone() const { return new nucleotide(*this); }
+ int unknown() const {return 15;}
+ int gap() const {return -1;}
+ //int size() const {return 4;}
+ int size() const {return 5;}
+ int stringSize() const {return 1;} // one letter code.
+
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ vector<int> fromString(const string& str) const;
+
+ string fromInt(const int id) const;
+ int relations(const int charInSeq, const int charToCheck) const{ // see explanation below
+ assert (charInSeq != -1);//gaps in the sequences
+ return _relation[charToCheck][charInSeq];
+ }
+
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ // in this speical case, in fact it will be true also for U which is coded by 4.
+ // this is why it is <= size.
+ bool isSpecific(const int id) const {return (id>=0 && id <= size());}
+
+private:
+ VVint _relation;
+ char fromIntInternal(const int in_id) const;
+ int relationsInternal(const int ctc,const int charInSeq) const;
+
+};
+
+
+
+#endif
+
+
+// Explanation about relations:
+// Sometimes the sequences contain letters like R which means G or A.
+// When calculating the likelihood of such sequences we have to take this into acount.
+// For example, the tree : A
+/* / \
+ t1 / \ t2
+ / \
+ R A
+
+ L = P(A)*P(A->A)(t1)*P(A->A)(t2) + P(A)*P(A->G)(t1)*P(A->A)(t2)
+ = P(A)*P(A->A)(t2)* [ P(A->A)(t1) + P(A->G)(t1) ]
+
+ Note that we don't divide it by 2.
+
+ VVint _relation keeps this information :
+
+ A C G T
+ A 1 0 0 0
+ C 0 1 0 0
+ G 0 0 1 0
+ T 0 0 0 1
+ U 0 0 0 1
+ R 1 0 1 0
+ Y 0 1 0 1
+ K
+ .
+ .
+ .
+*/
+
+
+
+
+
+
diff --git a/libs/phylogeny/numRec.cpp b/libs/phylogeny/numRec.cpp
new file mode 100644
index 0000000..58e960d
--- /dev/null
+++ b/libs/phylogeny/numRec.cpp
@@ -0,0 +1,498 @@
+// $Id: numRec.cpp 5990 2009-03-19 10:21:20Z privmane $
+
+#include "numRec.h"
+#include "matrixUtils.h"
+#include <cassert>
+#include <algorithm>
+
+#ifndef VERBOS
+#define VERBOS
+#endif
+
+void validateSym(VVdouble & v) {
+ const MDOUBLE epsilon = 0.00000001;
+ for (int i=0; i < v.size(); ++i) {
+ for (int j=i+1; j < v.size(); ++j) {
+ if (fabs(v[i][j] - v[j][i])> epsilon) {
+ LOG(5,<<"v["<<i<<"]["<<j<<"]="<<v[i][j]<<endl);
+ LOG(5,<<"v["<<j<<"]["<<i<<"]="<<v[j][i]<<endl);
+
+ errorMsg::reportError("trying to find eigen values to non-sym matrix");
+ }
+ else v[i][j] = v[j][i];
+ }
+ }
+}
+
+int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues) {
+ validateSym(Insym);
+ const int MaxNumberOfSweeps = 100000;
+ VVdouble& v = RightEigenV;
+ VVdouble& a = Insym;
+ Vdouble& d = EigenValues;
+ //CheckSizeAndTypeAndResizeIfNessary();
+ int i,j;
+ const int size = v.size();
+
+ // preparing V to be the indentity matrix
+ for (i=0; i<size; ++i) {
+ for (int j=0; j<size ; ++j) v[i][j]=0.0;
+ v[i][i] = 1.0;
+ }
+
+
+ for (i=0 ; i<size; ++i ) {
+ d[i] = a[i][i];
+ }
+
+ MDOUBLE sm = 0.0; // sm is the sum of the off-diagonal elements
+ int ip, iq;
+ for (i = 0; i< MaxNumberOfSweeps ; ++i) {
+ sm = 0.0;
+ for (ip = 0; ip<size ; ++ip) {
+ for (iq = ip+1; iq <size; ++iq) sm +=fabs (a[ip][iq]);
+ }
+ //if(i%300==0)
+ // LOG(5,<<"sm= "<<sm<<endl);
+ if (sm == 0.0) return 0; // the program is suppose to return here, after some rounds of i.
+ MDOUBLE tresh;
+ if (i<3) tresh = 0.2 * sm / (size*size); else tresh = 0.0;
+
+ MDOUBLE g;
+ for (ip=0 ; ip<size; ++ip) {
+ for (iq = ip+1 ; iq<size; ++iq) {
+ g = 100.0*fabs(a[ip][iq]);
+
+#ifdef VERBOS
+ if (g<10e-50) {
+ LOG(5,<<"small g!"<<endl);
+ if ((i>3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])))==false) {
+ LOG(5,<<"g is small: "<<g<< "yes, it is not zeroed"<<endl);
+ LOG(5,<<"because d[ip] is: "<<d[ip]<<" and d[iq] is: "<<d[iq]<<endl);
+ LOG(5,<<"ip is: "<<ip<<" iq is: "<<iq<<endl);
+ }
+ }
+#endif //VERBOS
+ if (i>3 && (fabs(d[ip]+g) == fabs(d[ip])) && (fabs(d[iq]+g)==fabs(d[iq])) ) {
+ a[ip][iq] = 0.0;
+ }
+ else if (fabs(a[ip][iq]) > tresh) {
+ MDOUBLE h;
+ MDOUBLE t;
+ MDOUBLE theta;
+ h = d[iq]-d[ip];
+ // assert(h!=0);
+ if (fabs(h) + g == fabs(h)) {
+ assert(h!=0);
+ t = a[ip][iq] / h;
+ }
+ else {
+ theta = 0.5*h/(a[ip][iq]);
+ t = 1.0 / (fabs(theta)+sqrt(1.0+theta*theta));
+ if (theta<0.0) t = -t;
+ }
+ MDOUBLE c,s;
+ c = 1.0 / sqrt(1.0+t*t);
+ s = t*c;
+ MDOUBLE tau;
+ tau = s/ (1.0 + c);
+ h = t * a[ip][iq];
+
+ d[ip] = d[ip] - t * a[ip][iq];
+ d[iq] = d[iq] + t * a[ip][iq];
+ a[ip][iq]=0.0;
+ MDOUBLE tmp1, tmp2;
+ for (j = 0; j < ip; ++j) {
+ tmp1 = a[j][ip] - s*(a[j][iq]+a[j][ip]*tau); // updating the above element of a...
+ tmp2 = a[j][iq] + s*(a[j][ip]-a[j][iq]*tau);
+ a[j][ip] = tmp1;
+ a[j][iq] = tmp2;
+ }
+
+ for (j = ip+1;j<iq; ++j) {
+ tmp1 = a[ip][j] - s*(a[j][iq]+a[ip][j]*tau); // updating the above element of a..
+ tmp2 = a[j][iq] + s*(a[ip][j]-a[j][iq]*tau);
+ a[ip][j] = tmp1;
+ a[j][iq] = tmp2;
+ }
+
+ for (j = iq+1; j< size ; ++j) {
+ tmp1 = a[ip][j] - s*(a[iq][j]+a[ip][j]*tau); // updating the above element of a..
+ tmp2 = a[iq][j] + s*(a[ip][j]-a[iq][j]*tau);
+ a[ip][j] = tmp1;
+ a[iq][j] = tmp2;
+ }
+
+ for (j = 0; j< size ; ++j) {
+ tmp1 = v[j][ip] - s*(v[j][iq]+v[j][ip]*tau); // updating v
+ tmp2 = v[j][iq] + s*(v[j][ip]-v[j][iq]*tau);
+ v[j][ip] = tmp1;
+ v[j][iq] = tmp2;
+ }
+ } // end of "else if (fabs(a[ip][iq] > tresh)"
+ } // end of for (iq = ...
+ } // end of for (ip = ...
+ } // end of for (i = 0; i< MaxNumberOfSweeps ; ++i) {
+ vector<string> err;
+ err.push_back("problems in function MyJacobi. more than MaxNumberOfSweeps were necesary.");
+ errorMsg::reportError(err);
+
+ return -1;
+} //end of function
+
+
+
+
+
+///////////////////////////////////////////
+//Adi cahnges //////////////////////////
+/////////////////////////////////////////
+MDOUBLE sign(MDOUBLE a,MDOUBLE b){
+ return (b>0?fabs(a):-fabs(a));
+}
+
+MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b){
+ return sqrt(pow(a,2)+pow(b,2));
+}
+
+
+void houseHolder(VVdouble &mat,VVdouble &Q){
+ MDOUBLE sigma=0,H,sqrtSigma,K=0,tmp;
+ int c,r,j,i,n = mat.size();
+ Q.resize(n);
+ for(i=0;i<n;i++){
+ Q.resize(n);
+ }
+ for (i=0;i<n;i++)
+ Q[i].resize(n,0.0);
+ Vdouble p,q,u;
+ p.resize(n,0.0);
+ q.resize(n,0.0);
+ u.resize(n,0.0);
+ for (i=n-1;i>1;i--){
+ sigma=0; //init sigma
+ K=0; //init K
+
+ for(j=0;j<i;j++)
+ sigma+= mat[i][j]*mat[i][j]; //compute sigma: O(n)
+
+ sqrtSigma = mat[i][i-1]>=0.0 ? sqrt(sigma) : -sqrt(sigma); //compute sqrt of sigma +/-
+
+ H=sigma+mat[i][i-1]*sqrtSigma; //comute H = 0.5*|u|^2. until here O(n)
+
+ /***createing U*******/
+ for(r=0;r<i;r++) { //update vector u with row i the matrix until i; //takes O(n)
+ Q[i][r]= u[r] = mat[i][r];
+ Q[r][i] = u[r]/H;
+ }
+ u[i-1]+=sqrtSigma; //update element (i,i-1)
+ Q[i][i-1]=u[i-1];
+ Q[i-1][i]=u[i-1]/H;
+ for(r=i;r<n;r++) //update elemnts (i,j) =0 for j>=i.
+ u[r]=0.0;
+ /***********************/
+ for(r=0;r<n;r++){ //compute vector p O(n^2)
+ p[r]=0.0;
+ for (c=0;c<i;c++)
+ p[r]+=mat[r][c]*u[c]; //compute AU
+ p[r]/=H; // ->AU/H
+ }
+
+ for(r=0;r<i;r++) // compure K O(n)
+ K+=u[r]*p[r];
+ K/=(2*H);
+ // cout<<"K is: "<<K<<endl;
+
+ for(r=0;r<n;r++) //compute vector q O(n)
+ q[r]=p[r]-K*u[r];
+
+ for(r=0;r<=i;r++) {//update matrix O(n^2) only part of the matrix
+ for(c=0;c<=i;c++)
+ mat[r][c]-=q[r]*u[c]+u[r]*q[c];
+ }
+
+ }
+ for (i=0;i<n;i++){
+ for(j=0;j<i;j++){
+ tmp=0;
+ for(c=0;c<i;c++)
+ tmp+=Q[i][c]*Q[c][j];
+ for(c=0;c<i;c++)
+ Q[c][j]-=tmp*Q[c][i];
+ }
+ Q[i][i]=1;
+ for(j=0;j<i;j++)
+ Q[j][i]=Q[i][j]=0.0;
+ }
+}
+
+void tred2(VVdouble &a, Vdouble &d, Vdouble &e) //a = symmetricMatrix,d = diagonal,e = offdiagonal
+{
+ int l,k,j,i;
+ MDOUBLE scale,hh,h,g,f;
+
+ int n=d.size();
+ for (i=n-1;i>0;i--) {
+ l=i-1;
+ h=scale=0.0;
+ if (l > 0) {
+ for (k=0;k<l+1;k++)
+ scale += fabs(a[i][k]);
+ if (scale == 0.0)
+ e[i]=a[i][l];
+ else {
+ for (k=0;k<l+1;k++) {
+ a[i][k] /= scale;
+ h += a[i][k]*a[i][k];
+ }
+ f=a[i][l];
+ g=(f >= 0.0 ? -sqrt(h) : sqrt(h));
+ e[i]=scale*g;
+ h -= f*g;
+ a[i][l]=f-g;
+ f=0.0;
+ for (j=0;j<l+1;j++) {
+ // Next statement can be omitted if eigenvectors not wanted
+ a[j][i]=a[i][j]/h;
+ g=0.0;
+ for (k=0;k<j+1;k++)
+ g += a[j][k]*a[i][k];
+ for (k=j+1;k<l+1;k++)
+ g += a[k][j]*a[i][k];
+ e[j]=g/h;
+ f += e[j]*a[i][j];
+ }
+ hh=f/(h+h);
+ for (j=0;j<l+1;j++) {
+ f=a[i][j];
+ e[j]=g=e[j]-hh*f;
+ for (k=0;k<j+1;k++)
+ a[j][k] -= (f*e[k]+g*a[i][k]);
+ }
+ }
+ } else
+ e[i]=a[i][l];
+ d[i]=h;
+ }
+ // Next statement can be omitted if eigenvectors not wanted
+ d[0]=0.0;
+ e[0]=0.0;
+ // Contents of this loop can be omitted if eigenvectors not
+ // wanted except for statement d[i]=a[i][i];
+ for (i=0;i<n;i++) {
+ l=i;
+ if (d[i] != 0.0) {
+ for (j=0;j<l;j++) {
+ g=0.0;
+ for (k=0;k<l;k++)
+ g += a[i][k]*a[k][j];
+ for (k=0;k<l;k++)
+ a[k][j] -= g*a[k][i];
+ }
+ }
+ d[i]=a[i][i];
+ a[i][i]=1.0;
+ for (j=0;j<l;j++) a[j][i]=a[i][j]=0.0;
+ }
+}
+
+//called if houseHolder was used - the modified QL implementation corresponding to the modified implementation of householder
+/*
+void QL(Vdouble &d, Vdouble &e, VVdouble &z){
+ int m,l,iter,i,k;
+ MDOUBLE s,r,p,g,f,dd,c,b;
+
+ int n=d.size();
+//* for (i=1;i<n;i++) e[i-1]=e[i];
+//* e[n-1]=0.0;
+//* e.push_back(0);//since in my algorithm I return an n-1 sized e
+ for (l=0;l<n;l++) {
+ iter=0;
+ do {
+ for (m=l;m<n-1;m++) {
+ dd=fabs(d[m])+fabs(d[m+1]);
+ if (fabs(e[m])+dd == dd) break;
+ }
+ if (m != l) {
+ if (iter++ == 30) errorMsg::reportError("Too many iterations in QL");
+ g=(d[l+1]-d[l])/(2.0*e[l]);
+ r=pythag(g,1.0);
+ g=d[m]-d[l]+e[l]/(g+sign(r,g));
+ s=c=1.0;
+ p=0.0;
+ for (i=m-1;i>=l;i--) {
+ f=s*e[i];
+ b=c*e[i];
+ e[i+1]=(r=pythag(f,g));
+ if (r == 0.0) {
+ d[i+1] -= p;
+ e[m]=0.0;
+ break;
+ }
+ s=f/r;
+ c=g/r;
+ g=d[i+1]-p;
+ r=(d[i]-g)*s+2.0*c*b;
+ d[i+1]=g+(p=s*r);
+ g=c*r-b;
+ // Next loop can be omitted if eigenvectors not wanted
+ for (k=0;k<n;k++) {
+ f=z[k][i+1];
+ z[k][i+1]=s*z[k][i]+c*f;
+ z[k][i]=c*z[k][i]-s*f;
+ }
+ }
+ if (r == 0.0 && i >= l) continue;
+ d[l] -= p;
+ e[l]=g;
+ e[m]=0.0;
+ }
+ } while (m != l);
+ }
+}
+*/
+
+
+//called if tred2 was used - the original QL implementation from numerical recepies
+void QL(Vdouble &d, Vdouble &e, VVdouble &z){
+ int m,l,iter,i,k;
+ MDOUBLE s,r,p,g,f,dd,c,b;
+
+ int n=d.size();
+ for(i=1;i<n;i++){
+ e[i-1]=e[i];
+ }
+ e[n-1]=0.0;
+ for(l=0;l<n;l++){
+ iter=0;
+ do {
+ for(m=l;m<n-1;m++){
+ dd=fabs(d[m])+fabs(d[m+1]);
+ if(fabs(e[m])+dd == dd) break;
+ }
+ if(m!=l){
+ if(iter++==30){
+ errorMsg::reportError("too many iteration in QL");
+ }
+ g=(d[l+1]-d[l])/(2.0*e[l]);
+ r=pythag(g,1.0);
+ g=d[m]-d[l]+e[l]/(g+sign(r,g));
+ s=c=1.0;
+ p=0.0;
+ for(i=m-1;i>=l;i--){
+ f=s*e[i];
+ b=c*e[i];
+ e[i+1]=(r=pythag(f,g));
+ if(r==0.0){
+ d[i+1]-=p;
+ e[m]=0.0;
+ break;
+ }
+ s=f/r;
+ c=g/r;
+ g=d[i+1]-p;
+ r=(d[i]-g)*s+2.0*c*b;
+ d[i+1]=g+(p=s*r);
+ g=c*r-b;
+ for(k=0;k<n;k++){
+ f=z[k][i+1];
+ z[k][i+1]=s*z[k][i]+c*f;
+ z[k][i]=c*z[k][i]-s*f;
+ }
+ }
+ if(r==0 && i>=l) continue;
+ d[l]-=p;
+ e[l]=g;
+ e[m]=0.0;
+ }
+ }
+ while(m!=l);
+ }
+}
+
+
+
+/************************************************************************/
+//diaganol will be eigen values and fill matrix of eigen vectors. */
+/************************************************************************/
+
+//A modified implementation for eigen analysis, using the house holder function.
+/*
+void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal){
+
+ houseHolder(symmetricMatrix,eigenVectros);
+
+ Vdouble offdiagonal;
+ offdiagonal.resize(symmetricMatrix.size());
+ for (int i=0; i<symmetricMatrix.size(); i++){
+ diagonal[i]=symmetricMatrix[i][i];
+ }
+ for (int i2=0; i2<symmetricMatrix.size()-1; i2++){
+ offdiagonal[i2]=symmetricMatrix[i2+1][i2];
+ }
+
+ QL(diagonal,offdiagonal,eigenVectros);
+ return;
+}
+*/
+
+//Uses original implementation of tred2 function for eigen analysis, copied from numerical recepies p474.
+void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal){
+
+ Vdouble offdiagonal;
+ offdiagonal.resize(symmetricMatrix.size());
+
+ tred2(symmetricMatrix,diagonal,offdiagonal);
+
+ eigenVectros = symmetricMatrix;
+
+ QL(diagonal,offdiagonal,eigenVectros);
+
+ return;
+}
+
+
+// the following two functions used for Kolomogorov-Smirnoff test
+MDOUBLE performKSTest(const uniformDistribution& empiricalDist, Vdouble& observedDist)
+{
+ MDOUBLE pVal = 0.0;
+ MDOUBLE distance = 0.0;
+
+ int j;
+ MDOUBLE dt,en,fn,fo = 0.0;
+
+ int n = observedDist.size();
+ sort(observedDist.begin(),observedDist.end());
+ en = n;
+ MDOUBLE cdfObserved = 0.0;
+ for(j = 0; j < n; ++j){
+ cdfObserved+=observedDist[j];
+ fn = (j+1)/en;
+ dt = max(fabs(fo-cdfObserved),fabs(fn-cdfObserved));
+ if(dt > distance)
+ distance = dt;
+ fo = fn;
+ }
+ en = sqrt(en);
+ pVal = computeProbForKS((en+0.12+0.11/en)*distance);
+ return pVal;
+}
+
+// function called only by performKSTest
+MDOUBLE computeProbForKS (const MDOUBLE QsParam)
+{
+ const MDOUBLE EPS1 = 1.0e-6,EPS2 = 1.0e-16;
+ int j;
+ MDOUBLE a2,fac = 2.0, sum = 0.0, term, termbf = 0.0;
+
+ a2 = -2.0*QsParam*QsParam;
+ for(j = 1; j <= 100; ++j){
+ term = fac*exp(a2*j*j);
+ sum += term;
+ if(fabs(term) <= EPS1*termbf || fabs(term) <= EPS2*sum)
+ return sum;
+ fac = -fac;
+ termbf = fabs(term);
+ }
+ return 1.0; //get here only by failing to converge
+}
diff --git a/libs/phylogeny/numRec.h b/libs/phylogeny/numRec.h
new file mode 100644
index 0000000..2c52602
--- /dev/null
+++ b/libs/phylogeny/numRec.h
@@ -0,0 +1,275 @@
+// $Id: numRec.h 5790 2009-01-19 22:29:26Z rubi $
+
+// version 1.00
+// last modified 2 Nov 2002
+
+#ifndef ___NUM_REC
+#define ___NUM_REC
+
+#include <cmath>
+#include <cassert>
+#include <iostream>
+using namespace std;
+#include "definitions.h"
+#include "errorMsg.h"
+#include "uniformDistribution.h"
+#include "logFile.h"
+
+//#define VERBOS
+#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
+
+//========================== function brent =========================================
+template <typename regF>
+MDOUBLE brent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f, MDOUBLE tol,
+ MDOUBLE *xmin) {
+
+ const int ITMAX = 100;
+ const MDOUBLE CGOLD = 0.3819660f;
+ const MDOUBLE ZEPS = 1.0e-10f;
+
+ int iter;
+ MDOUBLE a,b,d=0.0,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
+ MDOUBLE e=0.0;
+
+ a=(ax < cx ? ax : cx);
+ b=(ax > cx ? ax : cx);
+ x=w=v=bx;
+ fw=fv=fx=f(x);
+ LOG(10,<<"brent, f("<<x<<")="<<fx<<endl);
+ for (iter=1;iter<=ITMAX;iter++) {
+ xm=0.5*(a+b);
+ tol2=2.0*(tol1=tol*fabs(x)+ZEPS);
+ if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
+ *xmin=x;
+ return fx;
+ }
+ if (fabs(e) > tol1) {
+ r=(x-w)*(fx-fv);
+ q=(x-v)*(fx-fw);
+ p=(x-v)*q-(x-w)*r;
+ q=2.0*(q-r);
+ if (q > 0.0) p = -p;
+ q=fabs(q);
+ etemp=e;
+ e=d;
+ if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
+ d=CGOLD*(e=(x >= xm ? a-x : b-x));
+ else {
+ d=p/q;
+ u=x+d;
+ if (u-a < tol2 || b-u < tol2)
+ d=SIGN(tol1,xm-x);
+ }
+ } else {
+ d=CGOLD*(e=(x >= xm ? a-x : b-x));
+ }
+ u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
+ fu=f(u);
+ LOG(10,<<"brent, f("<<u<<")="<<fu<<endl);
+ if (fu <= fx) {
+ if (u >= x) a=x; else b=x;
+ v=w;w=x;x=u;
+ fv=fw;fw=fx; fx=fu;
+ } else {
+ if (u < x) a=u; else b=u;
+ if (fu <= fw || w == x) {
+ v=w;
+ w=u;
+ fv=fw;
+ fw=fu;
+ } else if (fu <= fv || v == x || v == w) {
+ v=u;
+ fv=fu;
+ }
+ }
+ }
+ errorMsg::reportError(" too many iterations in function, brent. "); // also quit the program
+ return -1;
+}
+
+// ===================================== function dbrent ========================================
+/* The efficiency of this function for likelihood computations can be improved by replacing
+ functors regF f and dF df with one objects that preforms the likelihood computation once
+ and produces both L(t) and dL(t)/dt. This object will provide methods:
+ MDOUBLE f(MDOUBLE x)
+ MDOUBLE df(MDOUBLE x)
+*/
+
+#define ITMAX 100
+#define ZEPS 1.0e-10
+#define MOV3(a,b,c, d,e,f) (a)=(d);(b)=(e);(c)=(f);
+
+template <typename regF, typename dF>
+MDOUBLE dbrent(MDOUBLE ax, MDOUBLE bx, MDOUBLE cx, regF f,
+ dF df, MDOUBLE tol, MDOUBLE *xmin) {
+
+ int iter,ok1,ok2;
+ MDOUBLE a,b,d=0.0,d1,d2,du,dv,dw,dx,e=0.0;
+ MDOUBLE fu,fv,fw,fx,olde,tol1,tol2,u,u1,u2,v,w,x,xm;
+
+ a=(ax < cx ? ax : cx);
+ b=(ax > cx ? ax : cx);
+ //ensuring x is between a and b
+ if (bx>b) { x=w=v=b;b=bx;}
+ else if (bx<a) {x=w=v=a; a=bx;}
+ else x=w=v=bx;
+
+ fw=fv=fx=f(x);
+ assert(fv==fv);// throw an exception if answer is nan = not a number.
+ dw=dv=dx=df(x);
+
+ for (iter=1;iter<=ITMAX;iter++) {
+ xm=0.5*(a+b);
+#ifdef VERBOS
+ //if (iter>10) cout<<"iteration: "<<iter<<" xm = "<<xm<<" x= "<<x<<" a= "<<a<<" b= "<<b<<" fx= "<<fx<<endl;
+#endif
+ tol1=tol*fabs(x)+ZEPS;
+ tol2=2.0*tol1;
+
+ if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
+ *xmin=x;
+ return fx;
+ }
+ if (fabs(e) > tol1) {
+ d1=2.0*(b-a);
+ d2=d1;
+ if (dw != dx) d1=(w-x)*dx/(dx-dw);
+ if (dv != dx) d2=(v-x)*dx/(dx-dv);
+ u1=x+d1;
+ u2=x+d2;
+ ok1 = (a-u1)*(u1-b) > 0.0 && dx*d1 <= 0.0;
+ ok2 = (a-u2)*(u2-b) > 0.0 && dx*d2 <= 0.0;
+ olde=e;
+ e=d;
+ if (ok1 || ok2) {
+ if (ok1 && ok2)
+ d=(fabs(d1) < fabs(d2) ? d1 : d2);
+ else if (ok1)
+ d=d1;
+ else
+ d=d2;
+ if (fabs(d) <= fabs(0.5*olde)) {
+ u=x+d;
+ if (u-a < tol2 || b-u < tol2)
+ d=SIGN(tol1,xm-x);
+ } else {
+ d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
+ }
+ } else {
+ d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
+ }
+ } else {
+ d=0.5*(e=(dx >= 0.0 ? a-x : b-x));
+ }
+ if (fabs(d) >= tol1) {
+ u=x+d;
+ fu=f(u);
+ } else {
+ u=x+SIGN(tol1,d);
+ if (u<ax) u=x; // MY LATEST ADDITION!
+ fu=f(u);
+ if (fu > fx) {
+ *xmin=x;
+ return fx;
+ }
+ }
+ du=df(u);
+ if (fu <= fx) {
+ if (u >= x) a=x; else b=x;
+ MOV3(v,fv,dv, w,fw,dw)
+ MOV3(w,fw,dw, x,fx,dx)
+ MOV3(x,fx,dx, u,fu,du)
+ } else {
+ if (u < x) a=u; else b=u;
+ if (fu <= fw || w == x) {
+ MOV3(v,fv,dv, w,fw,dw)
+ MOV3(w,fw,dw, u,fu,du)
+ } else if (fu < fv || v == x || v == w) {
+ MOV3(v,fv,dv, u,fu,du)
+ }
+ }
+
+ }
+ errorMsg::reportError("Too many iterations in routine dbrent"); // also quit the program
+ return -1;
+}
+
+/*================================== function rtbis =========================================
+//Using bisection, find the root of the function func known to lie between
+x1 and x2. The return value is the root will be refined until its accuracy is +- xacc
+*/
+template <typename regF>
+MDOUBLE rtbis(regF func,MDOUBLE x1, MDOUBLE x2, MDOUBLE xacc) {
+ const int max_number_of_iter = 100;
+
+ MDOUBLE f = func(x1);
+ MDOUBLE fmid = func(x2);
+ if (f*fmid >=0.0) {
+ errorMsg::reportError(" error in function rtbis, root must be bracketed for bisection in rtbis ");
+ // also quit the program
+ }
+
+ MDOUBLE dx, rtb;
+ if (f<0.0) {
+ dx = x2-x1;
+ rtb = x1;
+ }
+ else {
+ dx = x1-x2;
+ rtb = x2;
+ }
+
+
+ for (int j=1; j <= max_number_of_iter; ++j) {
+ dx *= 0.5;
+ MDOUBLE xmid = rtb+dx;
+ MDOUBLE fmid = func(xmid);
+ if (fmid <= 0.0) rtb = xmid;
+ if ((fabs(dx) < xacc) || (fmid == 0.0)) return rtb;
+ }
+ errorMsg::reportError("Error in function rtbis..."); // also quit the program...
+ return -1.0;
+}
+
+//Given a function func and an initial guessed range (x1,x2), the routine expands the range
+//geometrically until a root is bracketed by the returned values x1 and x2 (in which case zbrac retruns true)
+//or until the range becomes large unacceptably large (in which case zbrac return false).
+template <typename regF>
+bool zbrac(regF func, MDOUBLE &x1, MDOUBLE &x2) {
+ const int NTRY=50;
+ const MDOUBLE FACTOR= 1.6;
+ int j;
+ MDOUBLE f1,f2;
+
+ if (x1 == x2)
+ errorMsg::reportError("Bad initial range in zbrac");
+ f1 = func(x1);
+ f2 = func(x2);
+ for (j = 0; j < NTRY; j++)
+ {
+ if (f1 * f2 < 0.0)
+ return true;
+ if (fabs(f1) < fabs(f2))
+ f1=func(x1 += FACTOR*(x1-x2));
+ else
+ f2=func(x2 += FACTOR*(x2-x1));
+ }
+ return false;
+}
+
+// ================================ function brent new ======================================
+
+int MyJacobi(VVdouble &Insym, VVdouble &RightEigenV, Vdouble &EigenValues);
+MDOUBLE sign(MDOUBLE a,MDOUBLE b);
+MDOUBLE pythag(const MDOUBLE a, const MDOUBLE b);
+void houseHolder(VVdouble &mat,VVdouble &Q);
+void tred2(VVdouble &a, Vdouble &d, Vdouble &e);
+void QL(Vdouble &d, Vdouble &e, VVdouble &z);
+void computeEigenSystem(VVdouble &symmetricMatrix,VVdouble &eigenVectros,Vdouble &diagonal);
+MDOUBLE performKSTest(const uniformDistribution& empiricalDist, Vdouble& observedDist); // perform Kolomogorov-Smirnoff test
+MDOUBLE computeProbForKS (const MDOUBLE QsParam); // function called only by performKSTest
+
+
+
+#endif
+
diff --git a/libs/phylogeny/nyCodonModel.cpp b/libs/phylogeny/nyCodonModel.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/libs/phylogeny/nyCodonModel.h b/libs/phylogeny/nyCodonModel.h
new file mode 100644
index 0000000..4ce2ba4
--- /dev/null
+++ b/libs/phylogeny/nyCodonModel.h
@@ -0,0 +1,65 @@
+#ifndef _NY_CODON_MODEL
+#define _NY_CODON_MODEL
+
+#include "replacementModel.h"
+#include "fromQtoPt.h"
+#include "codon.h"
+#include "sequenceContainer.h"
+
+class nyCodonModel : public replacementModel {
+public:
+
+ explicit nyCodonModel(const codon &inCodonAlpa, const Vdouble & codonFreq, bool bFillQ2pMatrix, const MDOUBLE synRate=1.0, const MDOUBLE nonsynRate=1.0, const MDOUBLE kappa=1.0);
+ virtual ~nyCodonModel();
+ const int alphabetSize() const {return inCodonAlpa.size();}
+ virtual replacementModel* clone() const { return new nyCodonModel(*this); }
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+ return _q2pt.Pij_t(i,j,d);
+ }
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.dPij_dt(i,j,d);
+ }
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.d2Pij_dt2(i,j,d);
+ }
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+
+ MDOUBLE getKappa() const{return _kappa;}
+ MDOUBLE getSynRate()const {return _synRate;}
+ MDOUBLE getNonsynRate() const {return _nonsynRate;}
+ VVdouble& getQ() {return _Q;}
+
+ void setKappa(const MDOUBLE k) {setParams(_synRate, _nonsynRate, k);}
+ void setSynRate(const MDOUBLE synRate) {setParams(synRate, _nonsynRate, _kappa);}
+ void setNonsynRate(const MDOUBLE nonsynRate) {setParams(_synRate, nonsynRate, _kappa);}
+ void setParams(const MDOUBLE synRate, const MDOUBLE nonsynRate, const MDOUBLE kappa);
+
+ MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];}
+ void norm(MDOUBLE scale);
+ MDOUBLE sumPijQij();
+ bool isFillingQ2pMatrix() const {return _bFillQ2pMatrix;};
+ void updateQ();
+
+private:
+ void init(const codon &inCodonAlpa);
+ void homogenousFreq(const codon &inCodonAlpa);
+ //For each codon position calculates the frequency of each type of codon
+ void calcNucFrequenciesPerPosition(const sequenceContainer& scNuc, const codon &inCodonAlpa);
+ MDOUBLE getModelFreq(int fromCodon, int targetCodon); //this is not the codon frequency but the PI that is used in the Q matrix
+ void initCodonFrequencies(const codon &co);
+
+private:
+ Vdouble _freq; //holds the fequncies of codons
+ MDOUBLE _synRate; //syn
+ MDOUBLE _nonsynRate; // nonsyn
+ MDOUBLE _kappa; //tr/tv
+ q2pt _q2pt;
+ VVdouble _Q;
+ bool _bFillQ2pMatrix; // in case this model is part of a "father model" (multiple stochastic process) then
+ //we don't want to fill the Q2P matrix every time we change a parameter.
+ //The father model is responsible to normalize all its stochastic processes together and then fill the Q2P
+
+};
+
+
+#endif
diff --git a/libs/phylogeny/optGammaMixtureEM.cpp b/libs/phylogeny/optGammaMixtureEM.cpp
new file mode 100644
index 0000000..cf805f7
--- /dev/null
+++ b/libs/phylogeny/optGammaMixtureEM.cpp
@@ -0,0 +1,291 @@
+#include "optGammaMixtureEM.h"
+#include "likelihoodComputation.h"
+#include "numRec.h"
+#include "uniDistribution.h"
+
+#include <fstream>
+#include <algorithm>
+#include <ctime>
+using namespace std;
+using namespace likelihoodComputation;
+
+optGammaMixtureEM::optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree)
+{
+ _pSc = ≻
+ _pTree = &inTree;
+ _pSp = new stochasticProcess(cur_sp);
+}
+
+optGammaMixtureEM::~optGammaMixtureEM()
+{
+ if (_pSp != NULL)
+ {
+ delete _pSp;
+ _pSp = NULL;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
+//The function starts form few starting points.
+//For each point it tries to optimize the likellihood doing only a small number of iterations.
+//It then picks the best points (highest likelihood) and continue the maximization for these points only.
+//The best gammaMixture is stored in _sp and the best likelihood is returned.
+//input Parameters:
+//startPointsNum = the number of starting points.
+//bestStartsNum = the number of best points to continue with the full optimization.
+//startIter = the number of iterations to perform with all starting points.
+//maxIterations = the maximum number of iterations to continue with the best points
+//epsilon = for determining convergence in the maximization process.
+MDOUBLE optGammaMixtureEM::findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF)
+{
+ vector<mixtureDistribution> distVec;
+ Vdouble likelihoodVec(startPointsNum);
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ //create starting distributions
+ int i;
+ for (i = 0; i < startPointsNum; ++i)
+ {
+ //the first distribution will be the current one
+ if (i == 0)
+ distVec.push_back(*pMixture);
+ else
+ distVec.push_back(mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15));
+ }
+
+ //make a small number of iterations for all random starts
+ for (i = 0; i < distVec.size(); ++i)
+ {
+ likelihoodVec[i] = optimizeParam(&distVec[i], startIter, epsilon, epsilomQopt, pOutF);
+ }
+
+ //sort results and make full optimization only on the best starts
+ Vdouble sortedL = likelihoodVec;
+ sort(sortedL.begin(),sortedL.end());
+ MDOUBLE threshold = sortedL[sortedL.size()- bestStartsNum];
+ MDOUBLE bestL = sortedL[0];
+ int bestDistNum = 0;
+ for (i = 0; i < distVec.size(); ++i)
+ {
+ if (likelihoodVec[i] >= threshold)
+ {
+ MDOUBLE newL = optimizeParam(&distVec[i], maxIterations, epsilon, epsilomQopt, pOutF);
+ if (newL > bestL)
+ {
+ bestL = newL;
+ bestDistNum = i;
+ }
+ }
+ }
+ _pSp->setDistribution(&distVec[bestDistNum]);
+ distVec.clear();
+ return bestL;
+}
+
+
+MDOUBLE optGammaMixtureEM::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF)
+{
+ stochasticProcess inSp(pInDistribution, _pSp->getPijAccelerator());
+ MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree, *_pSc, inSp, NULL);
+
+ /////compute piHomPos as in getTreeLikelihoodAllPosAlphTheSame
+ //computePijGam pi;
+ //pi.fillPij(*_pTree, inSp);
+ //MDOUBLE res =0;
+ //doubleRep LofPos;
+ //int k;
+ //for (k=0; k < _pSc->seqLen(); ++k)
+ //{
+ // doubleRep tmp=0;
+ // for (int i=0; i < inSp.categories();++i)
+ // {
+ // tmp += getLofPos(k, *_pTree, *_pSc, pi[i], inSp)* inSp.ratesProb(i);
+ // /*MDOUBLE Pr = pDist->ratesProb(cat) * likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]); */
+ // }
+ // LofPos = tmp;
+ // res += log(LofPos);
+ //}
+ //
+
+
+
+
+
+
+ //int componentNum = pInDistribution->getComponentsNum();
+ ////compute Pij for each component
+ //vector<computePijGam> cpgVec(componentNum);
+ //vector<stochasticProcess> spVec;
+ //for (int comp = 0; comp < componentNum; ++comp) {
+ // //create a local sp so to compute likelihoods of this component only
+ // stochasticProcess compSp(pInDistribution->getComponent(comp), _pSp->getPijAccelerator());
+ // cpgVec[comp].fillPij(*_pTree, compSp);
+ // spVec.push_back(compSp);
+ //}
+
+
+
+ //for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ //{
+ // int comp;
+ // for (comp = 0; comp < componentNum; ++comp)
+ // {
+ // const generalGammaDistribution* pDist = pInDistribution->getComponent(comp);
+ // for (int cat=0; cat < pDist->categories(); ++cat)
+ // {
+ // doubleRep LofPos = likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]);
+ // L2 += log(LofPos);
+ // }
+ // }
+ //}
+
+
+
+ if (maxIterations == 0)
+ {
+ return curL;
+ LOG(4,<<endl<<endl<<"starting Gamma Mixture EM optimization..."<<endl);
+ printIter(inSp, 0, curL);
+ }
+
+ MDOUBLE newL = curL;
+ int it;
+ for (it = 0; it < maxIterations; ++it)
+ {
+ stochasticProcess oldSp(inSp);
+ maximizeGammaParam(&inSp, epsilomQopt);
+ newL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree, *_pSc, inSp, NULL);
+ if (newL < curL + epsilon)
+ {
+ //the improvemnt in Likelihood is smaller than epsilon
+ if (newL < curL)
+ { //ERROR - L went Down!
+ cerr<<"likelihood went down!"<<endl<<"oldL = "<<curL<<" newL= "<<newL<<" Diff= "<<newL-curL<<endl;
+ if (pOutF != NULL) *pOutF <<"likelihood went down!"<<endl<<"oldL = "<<curL<<" newL= "<<newL<<endl;
+ *pInDistribution = *(static_cast<mixtureDistribution*>(oldSp.distr()));
+ if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
+ printIter(inSp, it, curL);
+ return curL;
+ }
+ else
+ {
+ cerr<<"converged!"<<endl;
+ *pInDistribution = *(static_cast<mixtureDistribution*>(inSp.distr()));
+ if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
+ printIter(inSp, it, newL);
+ return newL;
+ }
+ }
+ cerr << "iter " << it <<": cur likelihood= " << curL <<" new likelihood= " << newL <<endl;
+ curL = newL;
+ }
+
+ *pInDistribution = *(static_cast<mixtureDistribution*>(inSp.distr()));
+ if (pOutF != NULL) *pOutF <<"after Gamma Mixture EM optimization..."<<endl;
+ printIter(inSp, it, newL);
+ return newL;
+}
+
+
+void optGammaMixtureEM::maximizeGammaParam(stochasticProcess* pNewSp, MDOUBLE accuracyRtbis)
+{
+ suffStatGammaMixture stats(*pNewSp, *_pSc, *_pTree);
+ stats.computeStatistics();
+ //cerr << "Q BEFORE IS: " << stats.computeQ()<<endl;
+ maximizeGammaParam(stats, pNewSp, accuracyRtbis);
+ //cerr << "Q AFTER IS: " << stats.computeQ()<<endl;
+}
+
+void optGammaMixtureEM::maximizeGammaParam(const suffStatGammaMixture & stats,
+ stochasticProcess* pNewSp, MDOUBLE accuracyRtbis)
+{
+ MDOUBLE upperBoundAlpha = 15.0;
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(pNewSp->distr());
+ int numComponents = pMixture->getComponentsNum();
+ Vdouble compProb(numComponents), alphaVec(numComponents), betaVec(numComponents);
+ for (int k = 0; k < numComponents; ++k)
+ {
+ alphaVec[k] = findBestAlpha(stats, k, accuracyRtbis, upperBoundAlpha);
+ betaVec[k] = alphaVec[k] * (stats.getMk(k) / stats.getAk(k));
+ compProb[k] = stats.getMk(k) / _pSc->seqLen();
+ }
+ pMixture->setMixtureParameters(alphaVec, betaVec, compProb);
+}
+
+void optGammaMixtureEM::printIter(const stochasticProcess& inSp, const int it, const MDOUBLE curL)
+{
+ LOG(4, << "iter " << it <<": cur likelihood= " << curL <<endl);
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(inSp.distr());
+ for (int k = 0; k < pMixture->getComponentsNum(); ++k)
+ {
+ LOG(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
+ }
+}
+
+
+//findBestAlpha: this function finds the alpha which is the root of the function C_evalAlphaEM().
+//BUT - if there is no root in the range (lowerBoundAlpha, upperBoundAlpha)
+//or - the root is higher than upperBoundAlpha - the function returns upperBoundAlpha
+MDOUBLE optGammaMixtureEM::findBestAlpha(const suffStatGammaMixture& stats, const int compNum, const MDOUBLE accuracyRtbis, const MDOUBLE upperBoundAlpha) const
+{
+ MDOUBLE res = upperBoundAlpha;
+ MDOUBLE lowerBoundAlpha = MINIMUM_ALPHA_PARAM;
+ MDOUBLE upperRange = upperBoundAlpha;
+ MDOUBLE lowerRange = lowerBoundAlpha;
+ bool haveRoot = zbrac(C_evalAlphaEM(stats, compNum), lowerRange, upperRange);
+ if (haveRoot == true)
+ res = rtbis(C_evalAlphaEM(stats, compNum), lowerRange, upperRange, accuracyRtbis); ;
+ if (res > upperBoundAlpha)
+ res = upperBoundAlpha;
+ else if (res < lowerBoundAlpha)
+ res = lowerBoundAlpha;
+ return res;
+}
+
+
+void optGammaMixtureEM::checkEntropy(stochasticProcess & oldSp, stochasticProcess & newSp)
+{
+ //the entropy is
+ //sigma_r P(r|D,oldSp)*log(P(r|D,oldSp) / P(r|D,newSp))
+ //VVdouble posteriorBefore,posteriorAfter ;
+ //likelihoodComputation::getPosteriorOfRates(*_pTree, *_pSc, oldSp, posteriorBefore, NULL);
+ //likelihoodComputation::getPosteriorOfRates(*_pTree, *_pSc, newSp, posteriorAfter, NULL);
+
+
+ //MDOUBLE entropyAll = 0.0;
+ //MDOUBLE secondTerm= 0.0;
+ //for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ //{
+ // MDOUBLE entropyPos = 0.0;
+ // for (int cat = 0; cat < oldSp.categories(); ++cat)
+ // {
+ // entropyPos += posteriorBefore[pos][cat] * log(posteriorBefore[pos][cat] / posteriorAfter[pos][cat]);
+ // secondTerm += posteriorBefore[pos][cat] * log(posteriorAfter[pos][cat]);
+ // }
+ // entropyAll += entropyPos;
+ // //cerr <<"Pos Entropy = "<<entropyPos<<endl;
+ //}
+ //cerr <<endl<<endl<<endl;
+ //cerr <<"All Entropy = "<<entropyAll<<endl;
+
+
+ //calculating Q
+ //MDOUBLE QAll = 0.0;
+ //for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ //{
+ // MDOUBLE QPos = 0.0;
+ // for (int cat = 0; cat < oldSp.categories(); ++cat)
+ // {
+ // stochasticProcess localSp(&uniDistribution(), newSp.getPijAccelerator());
+ // MDOUBLE rate = newSp.rates(cat);
+ // MDOUBLE L_after = likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, localSp, rate);
+ // QPos += posteriorBefore[pos][cat] * log(L_after * newSp.ratesProb(cat));
+ // }
+ // QAll += QPos;
+ // //cerr <<"Pos Q = "<<QPos<<endl;
+ //}
+ //cerr <<endl<<endl<<endl;
+ //cerr <<"Q ALL= "<<QAll<<endl;
+ //cerr <<"secondTerm = "<<secondTerm<<endl;
+
+}
diff --git a/libs/phylogeny/optGammaMixtureEM.h b/libs/phylogeny/optGammaMixtureEM.h
new file mode 100644
index 0000000..be6b20e
--- /dev/null
+++ b/libs/phylogeny/optGammaMixtureEM.h
@@ -0,0 +1,102 @@
+#ifndef ___OPT_GAMMA_MIXTURE_EM
+#define ___OPT_GAMMA_MIXTURE_EM
+/************************************************************
+optGammaMixtureEM class is used to maximize the gammaMixture parameters.
+The parameters to otimized are the alpha and beta of each component and the components probabilities.
+In each iteration:
+(1) The sufficient statistics are calculated.
+(2) Based on these statistics the parameters are optimized.
+the procedure stops when no improvment in the tree likelihood is achieved
+************************************************************/
+#include "definitions.h"
+#include "suffStatGammaMixture.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include "gammaUtilities.h"
+
+#include <cmath>
+
+class optGammaMixtureEM{
+
+public:
+ explicit optGammaMixtureEM(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree);
+ virtual ~optGammaMixtureEM();
+
+ //return the logLikelihood. the final distribution is stored in the stochasticProcess
+ MDOUBLE optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF);
+
+ const stochasticProcess* getSp() const {return _pSp;}
+
+ MDOUBLE findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF = NULL);
+
+
+ void maximizeGammaParam(stochasticProcess* pNewSp, MDOUBLE accuracy);
+ void maximizeGammaParam(const suffStatGammaMixture & stats, stochasticProcess* pNewSp, MDOUBLE accuracy);
+private:
+ void printIter(const stochasticProcess& pInSp, const int it, const MDOUBLE curL);
+
+
+ MDOUBLE findBestAlpha(const suffStatGammaMixture& stats, const int compNum, const MDOUBLE accuracy, const MDOUBLE upperBoundAlpha) const;
+
+ void checkEntropy(stochasticProcess & oldSp, stochasticProcess & inSp);
+
+
+private:
+ stochasticProcess* _pSp;
+ const sequenceContainer* _pSc;
+ const tree* _pTree;
+};
+
+
+
+
+class C_evalAlphaEM{
+public:
+ explicit C_evalAlphaEM(const suffStatGammaMixture& stats, const int compNum)
+ :_compNum(compNum) {_pStats = &stats;}
+
+public:
+ MDOUBLE operator() (const MDOUBLE x)
+ {
+ MDOUBLE Ak = _pStats->getAk(_compNum);
+ MDOUBLE Bk = _pStats->getBk(_compNum);
+ MDOUBLE Mk = _pStats->getMk(_compNum);
+
+ MDOUBLE res = log(x) - gammaDerivative(x) + log(Mk) - log(Ak) + (Bk / Mk);
+ //cerr<<"+++++++ x = "<<x<<" Ak = "<<Ak<<" Bk = "<<Bk<<" Mk = "<<Mk<<" RES = "<<res<<endl;
+// when x is beta (checking)
+// MDOUBLE res = Mk * log(x) - Mk * diGamma(Ak * x / Mk) + Bk;
+ return res;
+ }
+
+private:
+ MDOUBLE diGammaPlus(MDOUBLE x) const
+ {
+ MDOUBLE res1 = log(x) + (1/(2*x)) - (1/(12*x*x)) + (1/(120*pow(x, 4))) - (1/(252*pow(x, 6)));
+ MDOUBLE res = log(x) + (0.5/x) - (0.083333333333333333333333333333333/(x*x)) + (0.0083333333333333333333333333333333/(x*x*x*x)) - (0.003968253968253968253968253968254/(pow(x, 6)));
+ return res;
+ }
+ MDOUBLE diGamma(MDOUBLE x) const
+ {
+ //if x<1: use the identity digamma(Z) = digamma(z+1)- (1/z) see http://mathworld.wolfram.com/DigammaFunction.html
+ if (x < 1)
+ return (diGamma(x+1) - (1.0 / x));
+ MDOUBLE res = log(x) - (1/(2*x)) - (1/(12*x*x)) + (1/(120*pow(x, 4))) - (1/(252*pow(x, 6)));
+ //using more terms in the series expansion:
+ MDOUBLE debugRes = log(x) - (1/(2*x)) - (1/(12*x*x)) + (1/(120*pow(x, 4))) - (1/(252*pow(x, 6))) + (1/(240*pow(x, 8))) - (1/(132*pow(x, 10)));
+ return res;
+ }
+
+ MDOUBLE gammaDerivative(MDOUBLE x) const
+ {
+ //MDOUBLE resCheck = (gammln(x+0.001) - gammln(x)) /0.001;
+ MDOUBLE res = diGamma(x);
+ return res;
+ }
+private:
+ const suffStatGammaMixture* _pStats;
+ const int _compNum;
+};
+#endif
+
diff --git a/libs/phylogeny/optGammaMixtureLS.cpp b/libs/phylogeny/optGammaMixtureLS.cpp
new file mode 100644
index 0000000..004daca
--- /dev/null
+++ b/libs/phylogeny/optGammaMixtureLS.cpp
@@ -0,0 +1,261 @@
+#include "optGammaMixtureLS.h"
+#include "likelihoodComputation.h"
+#include "numRec.h"
+//#include "optimizer.h"
+//#include "NRconjugateGradient.h"
+
+#include <fstream>
+#include <algorithm>
+#include <ctime>
+using namespace std;
+using namespace likelihoodComputation;
+
+optGammaMixtureLS::optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha/*=15.0*/, MDOUBLE upperBoundBeta/*=15.0*/,unObservableData* unObservableData_p)
+{
+ _pSc = ≻
+ _pTree = &inTree;
+ _pSp = pSp;
+ _upperBoundAlpha = upperBoundAlpha;
+ _upperBoundBeta = upperBoundBeta;
+ _unObservableData_p = unObservableData_p;
+}
+
+
+optGammaMixtureLS::~optGammaMixtureLS()
+{
+}
+
+MDOUBLE optGammaMixtureLS::optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType)
+{
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ return optimizeParam(pMixture, maxIterations, tol, pWeights, optType);
+}
+
+
+MDOUBLE optGammaMixtureLS::optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType)
+{
+ switch (optType)
+ {
+ case ONE_DIM:
+ return optimizeParamOneDim(pMixture, maxIterations, tol, pWeights);
+ break;
+ //case POWELL:
+ // return optimizeParamPowell(pMixture, maxIterations, tol, pWeights, pOutF);
+ // break;
+ //case CONJUGATE_DERIVATIVES:
+ // return optimizeParamConjugateDeriv(pMixture, maxIterations, tol, pWeights, pOutF);
+ // break;
+ default:
+ errorMsg::reportError("unknown optimization algorithm in optGammaMixtureLS::optimizeParam()");
+ return -1;
+ }
+}
+
+
+//this function finds the best mixture param using a line search maximization. Each time only one parameter is optimized using the regular brent algorithm.
+//CAN BE USED FOR 2 COMPONENTS ONLY (the maximization on components probabilities maximize only P1, the prob of the first component, while the prob of the second is set to 1-P1)
+//total there are 5 parameters to optimize: alpha1, beta1, alpha2, beta2, and P1
+MDOUBLE optGammaMixtureLS::optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights)
+{
+ MDOUBLE lowerBound = 0.0;
+
+ MDOUBLE newL = VERYSMALL; //newL is the LL after a single param optimization.
+ //MDOUBLE curL = VERYSMALL; //the current LL.
+ MDOUBLE curL = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree,*_pSc,*_pSp,pWeights,_unObservableData_p); //the current LL.
+ MDOUBLE prevIterL = VERYSMALL; //The LL of the previous iteration. the loop quit if the increase in LL between iterations is smaller than tol
+ MDOUBLE bestA=0, bestB=0, bestW = 0;
+
+ for (int it = 0; it < maxIterations; ++it)
+ {
+ //prevIterL = newL;
+ prevIterL = curL;
+
+ for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ //optimize alpha
+ MDOUBLE oldAlpha = pMixture->getAlpha(comp);
+ newL = -brent(lowerBound,oldAlpha, _upperBoundAlpha, C_evalAlphaMixture(*_pTree,*_pSc,_pSp,comp,pWeights,_unObservableData_p), tol, &bestA);
+ if (newL < curL)
+ {
+ //the Likelihood wend down
+ pMixture->setAlpha(oldAlpha, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
+ }
+ else
+ {
+ pMixture->setAlpha(bestA, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ curL = newL;
+ LOG(7, <<"iteration: "<<it<<" Optimize alpha comp"<<comp<<" new Likelihood = "<<curL<<endl);
+ }
+
+ //optimize beta
+ MDOUBLE oldBeta = pMixture->getBeta(comp);
+ newL = -brent(lowerBound,oldBeta,_upperBoundBeta, C_evalBetaMixture(*_pTree,*_pSc,_pSp,comp,pWeights,_unObservableData_p), tol, &bestB);
+ if (newL < curL)
+ {
+ //the Likelihood wend down
+ pMixture->setBeta(oldBeta, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
+ }
+ else
+ {
+ pMixture->setBeta(bestB, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ curL = newL;
+ LOG(7, <<"iteration: "<<it<<" Optimize beta comp"<<comp<<" new Likelihood = "<<curL<<endl);
+ }
+ //optimize components probability.
+ if (pMixture->getComponentsNum() == 1)
+ continue;
+
+ MDOUBLE upperBound = 0.0;
+ MDOUBLE lowerBound = 1.0;
+ MDOUBLE oldWeight = pMixture->getComponentWeight(comp);
+ newL = -brent(lowerBound, oldWeight, upperBound, C_evalProbMixture(*_pTree,*_pSc, _pSp, comp, pWeights), tol, &bestW);
+ if (newL < curL)
+ {
+ //the Likelihood wend down
+ pMixture->setComponentWeight(oldWeight, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ LOG(5, <<"likelihood went down in optGammaMixtureLS::optimizeParam()"<<endl<<"old L= "<<curL<<" newL = "<<newL<<endl);
+ }
+ else
+ {
+ pMixture->setComponentWeight(bestW, comp);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ }
+ curL = newL;
+ LOG(7, <<"iteration: "<<it<<" Optimize Prob"<<" new Likelihood = "<<curL<<endl);
+ }
+ }
+ pMixture->normalizeProbabilities(); // why again ???
+ printIter(pMixture, it, curL);
+ if (curL < prevIterL + tol){
+ //if(_unObservableData_p){
+ // _unObservableData_p->setLforMissingData(*_pTree,_pSp);
+ //}
+ return max(curL,prevIterL); // not to reduce likelihood
+ }
+ }
+ return curL;
+}
+
+
+
+/*
+//this function uses a line search maximization. The difference is that it does not use the naive method (optimize each parameter seperatly untill convergence)
+//but uses Powel's quadratically convergent method (Numerical Recipes pp 420).
+//CAN BE USED FOR 2 COMPONENTS ONLY (the maximization on components probabilities maximize only P1, the prob of the first component, while the prob of the second is set to 1-P1)
+//total there are 5 parameters to optimize: alpha1, beta1, alpha2, beta2, and P1
+MDOUBLE optGammaMixtureLS::optimizeParamPowell(mixtureDistribution* pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ if (pMixture->getComponentsNum() == 1)
+ return optimizeParam1CompPowel(pMixture, maxIterations, tol, pWeights, pOutF);
+ else return optimizeParamManyCompPowel(pMixture, maxIterations, tol, pWeights, pOutF);
+}
+
+
+MDOUBLE optGammaMixtureLS::optimizeParam1CompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ tree tree1(*_pTree);
+ sequenceContainer sc1(*_pSc);
+
+ C_evalGammaMixture optPowell(&tree1, &sc1, _pSp, NULL);
+ optimizer<C_evalGammaMixture> opt(optPowell);
+ Vdouble param(2);
+ param[0] = pMixture->getAlpha(0);
+ param[1] = pMixture->getBeta(0);
+
+ MDOUBLE res = opt.findmin(param);
+ return res;
+}
+
+MDOUBLE optGammaMixtureLS::optimizeParamManyCompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ tree tree1(*_pTree);
+ sequenceContainer sc1(*_pSc);
+
+ Vdouble param(pMixture->getComponentsNum() * 3 - 1);
+ int paramNum = 0;
+ for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ param[paramNum++] = pMixture->getAlpha(comp);
+ param[paramNum++] = pMixture->getBeta(comp);
+ param[paramNum++] = pMixture->getComponentWeight(comp);
+ }
+ C_evalGammaMixture optPowell(&tree1, &sc1, _pSp, NULL);
+ optimizer<C_evalGammaMixture> opt(optPowell);
+ MDOUBLE res = opt.findmin(param);
+ cerr <<"optimized Powell result = "<< res<<endl;
+ return res;
+}
+*/
+
+/*
+MDOUBLE optGammaMixtureLS::optimizeParamConjugateDeriv(
+ mixtureDistribution * pMixture, const int maxIterations,
+ const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
+{
+ tree tree1(*_pTree);
+ sequenceContainer sc1(*_pSc);
+
+ Vdouble param(pMixture->getComponentsNum() * 3);
+ int paramNum = 0;
+ int comp;
+ for (comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ param[paramNum++] = pMixture->getAlpha(comp);
+ param[paramNum++] = pMixture->getBeta(comp);
+ param[paramNum++] = pMixture->getComponentWeight(comp);
+ }
+ C_evalGammaMixture func(&tree1, &sc1, _pSp, pWeights);
+ NRconjugateGradient<C_evalGammaMixture> opt;
+ if (pOutF != NULL)
+ {
+ *pOutF <<endl<<endl<<"starting NRconjugateGradient optimization..."<<endl;
+ printIter(pMixture, 0, 0.0, pOutF);
+ }
+
+ MDOUBLE res = opt.findmin(param, &func, tol);
+
+ paramNum = 0;
+ for (comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ pMixture->setAlpha(param[paramNum++], comp);
+ pMixture->setBeta(param[paramNum++], comp);
+ pMixture->setComponentWeight(param[paramNum++], comp);
+ }
+ pMixture->normalizeProbabilities();
+ if (pOutF != NULL)
+ {
+ *pOutF <<endl<<endl<<"after NRconjugateGradient optimization"<<endl;
+ printIter(pMixture, 0, res, pOutF);
+ }
+ cerr <<"optimized Conjugate Deriv result = "<< res<<endl;
+ return res;
+}
+*/
+
+
+void optGammaMixtureLS::printIter(const mixtureDistribution * pMixture, const int it, const MDOUBLE curL)
+{
+ LOG(4,<< "iter " << it <<": cur likelihood= " << curL <<endl);
+ for (int k = 0; k < pMixture->getComponentsNum(); ++k)
+ {
+ LOG(4, << "comp="<<k<<" Alp/Beta= "<<pMixture->getAlpha(k)/pMixture->getBeta(k)<<" alpha= "<<pMixture->getAlpha(k) << " beta= " <<pMixture->getBeta(k)<<" Prob= "<<pMixture->getComponentProb(k)<<endl);
+ }
+}
diff --git a/libs/phylogeny/optGammaMixtureLS.h b/libs/phylogeny/optGammaMixtureLS.h
new file mode 100644
index 0000000..a1fe8bd
--- /dev/null
+++ b/libs/phylogeny/optGammaMixtureLS.h
@@ -0,0 +1,275 @@
+#ifndef ___OPT_GAMMA_MIXTURE_LS
+#define ___OPT_GAMMA_MIXTURE_LS
+/************************************************************
+optGammaMixtureLS class is used to maximize the gammaMixture parameters via a line search maximization.
+The parameters to otimized are the alpha and beta of each component and the components probabilities.
+In each iteration:
+optimized all parameters iteratively
+The procedure stops when no improvment in the tree likelihood is achieved
+************************************************************/
+#include "definitions.h"
+#include "suffStatGammaMixture.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include "gammaUtilities.h"
+#include "likelihoodComputation.h"
+#include "unObservableData.h"
+
+
+
+#include <cmath>
+
+class optGammaMixtureLS{
+public:
+ enum optAlg {ONE_DIM/*, POWELL, CONJUGATE_DERIVATIVES*/};
+
+public:
+ explicit optGammaMixtureLS(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, MDOUBLE upperBoundAlpha =15.0, MDOUBLE upperBoundBeta =15.0, unObservableData* unObservableData_p=NULL);
+ virtual ~optGammaMixtureLS();
+
+ //return the logLikelihood. the final distribution is stored in the stochasticProcess
+ MDOUBLE optimizeParam(const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType);
+ MDOUBLE optimizeParam(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, optAlg optType);
+
+
+private:
+ void printIter(const mixtureDistribution * pMixture, const int it, const MDOUBLE curL);
+
+ MDOUBLE optimizeParamOneDim(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights);
+ //MDOUBLE optimizeParamPowell(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
+ //MDOUBLE optimizeParamConjugateDeriv(mixtureDistribution *pMixture,
+ // const int maxIterations, const MDOUBLE tol, const Vdouble *pWeights, ofstream* pOutF);
+
+ //MDOUBLE optimizeParam1CompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
+ //MDOUBLE optimizeParamManyCompPowel(mixtureDistribution * pMixture, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF=NULL);
+
+private:
+ stochasticProcess* _pSp;
+ const sequenceContainer* _pSc;
+ const tree* _pTree;
+ unObservableData* _unObservableData_p;
+
+ MDOUBLE _upperBoundAlpha;
+ MDOUBLE _upperBoundBeta;
+};
+
+//line search classes for brent
+class C_evalAlphaMixture{
+public:
+ C_evalAlphaMixture(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess* pSp,
+ const int componetNumber,
+ const Vdouble * weights = NULL,
+ unObservableData* unObservableData_p=NULL)
+ : _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
+ {
+ if(unObservableData_p)
+ _unObservableData_p = unObservableData_p->clone();
+ else
+ _unObservableData_p = NULL;
+ };
+ virtual ~C_evalAlphaMixture(){
+ if(_unObservableData_p) delete _unObservableData_p;
+ }
+
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ unObservableData* _unObservableData_p;
+ stochasticProcess* _pSp;
+ const int _compNum;
+public:
+ MDOUBLE operator() (MDOUBLE alpha) {
+ if (_pSp->categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize alpha");
+ }
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ pMixture->setAlpha(alpha, _compNum);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,_pSp);
+ }
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
+#ifdef VERBOS
+ cerr<<"Component = "<<_compNum<<" with alpha = "<<alpha<<" logL = "<<res<<endl;
+#endif
+ return -res;
+ }
+};
+
+class C_evalBetaMixture{
+public:
+ C_evalBetaMixture(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess* pSp,
+ const int componetNumber,
+ const Vdouble * weights = NULL,
+ unObservableData* unObservableData_p=NULL)
+ : _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
+ {
+ if(unObservableData_p)
+ _unObservableData_p = unObservableData_p->clone();
+ else
+ _unObservableData_p = NULL;
+ };
+ virtual ~C_evalBetaMixture(){
+ if(_unObservableData_p) delete _unObservableData_p;
+ }
+
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ unObservableData* _unObservableData_p;
+ stochasticProcess* _pSp;
+ const int _compNum;
+public:
+ MDOUBLE operator() (MDOUBLE beta) {
+ if (_pSp->categories() == 1) {
+ errorMsg::reportError(" one category when trying to optimize beta");
+ }
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ pMixture->setBeta(beta, _compNum);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,_pSp);
+ }
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
+#ifdef VERBOS
+ cerr<<"Component = "<<_compNum<<" with beta = "<<beta<<" logL = "<<res<<endl;
+#endif
+ return -res;
+ }
+};
+
+
+class C_evalProbMixture{
+public:
+ C_evalProbMixture(const tree& et,
+ const sequenceContainer& sc,
+ stochasticProcess* pSp,
+ const int componetNumber,
+ const Vdouble * weights = NULL,
+ unObservableData* unObservableData_p=NULL)
+ : _et(et),_sc(sc),_weights(weights),_pSp(pSp), _compNum(componetNumber)
+ {
+ if(unObservableData_p)
+ _unObservableData_p = unObservableData_p->clone();
+ else
+ _unObservableData_p = NULL;
+ }
+ virtual ~C_evalProbMixture(){
+ if(_unObservableData_p) delete _unObservableData_p;
+ }
+
+private:
+ const tree& _et;
+ const sequenceContainer& _sc;
+ const Vdouble * _weights;
+ stochasticProcess* _pSp;
+ const int _compNum;
+ unObservableData* _unObservableData_p;
+public:
+ MDOUBLE operator() (MDOUBLE w) {
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ pMixture->setComponentWeight(w, _compNum);
+ if(_unObservableData_p){
+ _unObservableData_p->setLforMissingData(_et,_pSp);
+ }
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,_sc,*_pSp,_weights,_unObservableData_p);
+ return -res;
+ }
+};
+
+
+/*
+//the function to optimize using the conjugate Gradient algorithm
+class C_evalGammaMixture {
+public:
+ C_evalGammaMixture(tree* pT,
+ sequenceContainer* pSc,
+ stochasticProcess* pSp,
+ const Vdouble * weights = NULL,
+ const MDOUBLE gradEps = 0.001)
+ : _pTree(pT),_pSc(pSc),_pWeights(weights),_pSp(pSp), _gradEpsilon(gradEps)
+ {};
+
+
+ C_evalGammaMixture() {}
+
+ C_evalGammaMixture& operator= (const C_evalGammaMixture &other)
+ {
+ _pTree = other._pTree;
+ _pSc = other._pSc;
+ _pWeights = other._pWeights;
+ _pSp = other._pSp;
+ _gradEpsilon = other._gradEpsilon;
+ return *this;
+ }
+
+ MDOUBLE operator () (Vdouble ¶m){
+ mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+
+ int paramNum = 0;
+ for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ pMixture->setAlpha(param[paramNum++], comp);
+ pMixture->setBeta(param[paramNum++], comp);
+ pMixture->setComponentWeight(param[paramNum++], comp);
+ }
+ pMixture->normalizeProbabilities();
+
+ if (checkOutOfBounds(pMixture) == true)
+ return 1000000000;
+
+ MDOUBLE res = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree,*_pSc,*_pSp,_pWeights);
+ return -res;
+ }
+
+ void dfunc(const Vdouble ¶msIn, Vdouble& grads){
+ if (paramsIn.size() != grads.size())
+ errorMsg::reportError("C_evalGammaMixture::dfunc(): vectors of prameters and gradients are not the same size");
+ Vdouble myx = paramsIn; // temporary vector, since x is const.
+
+ // calc the likelihood at the current point
+ MDOUBLE fa = (*this)(myx);
+
+ // then calc likelihood at param+deltah for each param to approximate the derivative.
+ int curParam;
+ for(curParam=0; curParam < paramsIn.size(); curParam++)
+ {
+ myx[curParam] += _gradEpsilon;
+ MDOUBLE fb = (*this)(myx);
+ grads[curParam] = (fb - fa)/_gradEpsilon;
+ myx[curParam] -= _gradEpsilon;
+ }
+ }
+
+private:
+ bool checkOutOfBounds(mixtureDistribution * pMixture) {
+ for (int comp = 0; comp < pMixture->getComponentsNum(); ++comp)
+ {
+ if ((pMixture->getAlpha(comp) >= 15) || (pMixture->getAlpha(comp) <= 0.05))
+ return true;
+ if ((pMixture->getBeta(comp) >= 15) || (pMixture->getBeta(comp) <= 0.05))
+ return true;
+ if ((pMixture->getComponentProb(comp) > 1.0) || (pMixture->getComponentProb(comp) < 0.0))
+ return true;
+ }
+ return false;
+ }
+
+private:
+ tree* _pTree;
+ sequenceContainer* _pSc;
+ const Vdouble * _pWeights;
+ stochasticProcess* _pSp;
+ MDOUBLE _gradEpsilon; //the epsilon to calculate the gradiante
+};
+*/
+
+
+
+#endif
+
diff --git a/libs/phylogeny/pDistance.h b/libs/phylogeny/pDistance.h
new file mode 100644
index 0000000..8d8fd7d
--- /dev/null
+++ b/libs/phylogeny/pDistance.h
@@ -0,0 +1,37 @@
+// $Id: pDistance.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___P_DISTANCE
+#define ___P_DISTANCE
+
+#include "definitions.h"
+#include "distanceMethod.h"
+/*********************************************************
+p distance computes distance by counting number of differences and dividing by length of seq.
+Weights are an input vector for giving additional weight to positions in the sequences.
+*******************************************************/
+class pDistance : public distanceMethod {
+public:
+ explicit pDistance(){}
+ const MDOUBLE giveDistance( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const {//score is not used here
+ MDOUBLE p =0;
+ if (weights == NULL) {
+ for (int i = 0; i < s1.seqLen() ; ++i) if (s1[i] != s2[i]) p++;
+ p = p/s1.seqLen();
+ } else {
+ MDOUBLE len=0;
+ for (int i = 0; i < s1.seqLen() ; ++i) {
+ len +=((*weights)[i]);
+ if (s1[i] != s2[i]) p+=((*weights)[i]);
+ }
+ p = p/len;
+ }
+ return p;
+ }
+ virtual pDistance* clone() const {return new pDistance(*this);}
+
+};
+
+#endif
diff --git a/libs/phylogeny/pairwiseGammaDistance.cpp b/libs/phylogeny/pairwiseGammaDistance.cpp
new file mode 100644
index 0000000..21da241
--- /dev/null
+++ b/libs/phylogeny/pairwiseGammaDistance.cpp
@@ -0,0 +1,158 @@
+// $Id: pairwiseGammaDistance.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "pairwiseGammaDistance.h"
+#include "numRec.h"
+#include "countTableComponent.h"
+#include "likeDist.h"
+#include "uniDistribution.h"
+#include <cmath>
+
+// Local utility functions
+MDOUBLE pairwiseGammaDistance::giveInitialGuessOfDistance(
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const {
+ uniDistribution ud;
+ stochasticProcess uniSp(&ud,_sp.getPijAccelerator());
+ likeDist ld(uniSp);
+ return (ld.giveDistance(s1,s2,weights,score));
+}
+
+class C_eval_gammaMLAlpha{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const MDOUBLE _distance;
+ const Vdouble* _weights;
+ // const VVdouble& _posteriorProb; // pos, rate
+public:
+ C_eval_gammaMLAlpha(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE distance,
+ // const VVdouble& posteriorProb,
+ const Vdouble * weights): _sp(sp),
+ _s1(s1),
+ _s2(s2),
+ _distance(distance),
+ _weights(weights)
+ // _posteriorProb(posteriorProb)
+ {};
+
+ // this cast is required as the distribution within the
+ // stochasticProcess is kept as the parent "distribution" class that
+ // knows nothing of Alpha
+ void setAlpha(MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ }
+
+
+ MDOUBLE operator() (MDOUBLE alpha) {
+ setAlpha(alpha);
+ MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights);
+ LOG(11,<<"check alpha="<<alpha<<", bl="<<_distance<<" gives "<<likelihood<<endl);
+ return -likelihood;
+ };
+};
+
+// returns the best alpha for a given distance
+MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(const sequence& s1,
+ const sequence& s2,
+ stochasticProcess & sp,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const { // changes sp.
+ MDOUBLE bestA=0.0;
+ MDOUBLE bestQ=0.0;
+ const MDOUBLE upperBoundOnAlpha = 15.0;
+ const MDOUBLE epsilonAlphaOptimization = 0.01;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ const MDOUBLE bx=cx*0.3;
+ const MDOUBLE ax=0.0;
+
+
+ bestQ = -brent(ax,bx,cx,
+ C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights),
+ epsilonAlphaOptimization,
+ &bestA);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ if (score) *score = bestQ;
+ return bestA;
+}
+
+class C_evalAlphaForPairOfSeq{
+private:
+ const countTableComponentGam& _ctc;
+ stochasticProcess& _sp;
+ const MDOUBLE _branchL;
+public:
+ C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc,
+ const MDOUBLE branchL,
+ stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {};
+
+ MDOUBLE operator() (MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ C_evalLikeDist cev(_ctc,_sp);
+ MDOUBLE L=cev(_branchL);
+ LOG(10,<<"check alpha="<<alpha<<", bl="<<_branchL<<" gives "<<L<<endl);
+ return L;
+ };
+};
+
+// returns the best alpha for a given distance
+MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(stochasticProcess & sp,
+ const countTableComponentGam & ctc,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const { // changes sp.
+ MDOUBLE bestA=0.0;
+ MDOUBLE bestQ=0.0;
+ const MDOUBLE upperBoundOnAlpha = 15.0;
+ const MDOUBLE epsilonAlphaOptimization = 0.01;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ const MDOUBLE bx=cx*0.3;
+ const MDOUBLE ax=0.0;
+
+
+ bestQ = -brent(ax,bx,cx,
+ C_evalAlphaForPairOfSeq(ctc,branchL,sp),
+ epsilonAlphaOptimization,
+ &bestA);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ if (score) *score = bestQ;
+ return bestA;
+}
+
+const MDOUBLE pairwiseGammaDistance::giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score,
+ MDOUBLE* alpha) const {
+
+ MDOUBLE resL = 0.0;
+ MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
+
+ countTableComponentGam ctc; // from technical reasons.
+
+ stochasticProcess tmpSp(_sp);
+
+ const int maxIter = 30;
+ MDOUBLE newDist = 0.0;
+ MDOUBLE lastBestAlpha = 0.0;
+ for (int i=0; i < maxIter; ++i) {
+ lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp.
+ LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<"\t L="<<resL<<"\t");
+ likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
+ newDist = tmpld.giveDistance(s1, s2, weights, &resL);
+ LOG(8,<<"dist="<<newDist<<"(L="<<resL<<")"<<endl);
+ if (fabs(newDist-currentDistance)<_toll) break;
+ currentDistance = newDist;
+ }
+ if (score) *score = resL;
+ if (alpha) *alpha = lastBestAlpha;
+ assert (newDist >=0);
+ return newDist;
+}
+
diff --git a/libs/phylogeny/pairwiseGammaDistance.h b/libs/phylogeny/pairwiseGammaDistance.h
new file mode 100644
index 0000000..e945ff7
--- /dev/null
+++ b/libs/phylogeny/pairwiseGammaDistance.h
@@ -0,0 +1,63 @@
+// $Id: pairwiseGammaDistance.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef PAIRWISE_GAMMA_DISTANCE_H
+#define PAIRWISE_GAMMA_DISTANCE_H
+
+#include "likeDist.h"
+#include "stochasticProcess.h"
+#include "definitions.h"
+#include "sequence.h"
+#include "gammaDistribution.h"
+#include "logFile.h"
+
+#include <cmath>
+using namespace std;
+
+// Finds ML distance with a gamma-ASRV stochasticProcess for a pair of
+// sequences while optimizing the alpha parameter for the given pair of
+// sequences.
+// Was called "njGamma::giveDistanceOptAlphaForPairOfSequences"
+class pairwiseGammaDistance : public likeDist {
+public:
+ explicit pairwiseGammaDistance(const stochasticProcess & sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0)
+ : likeDist(sp,toll,maxPairwiseDistance) {}
+
+ explicit pairwiseGammaDistance(stochasticProcess & sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0)
+ : likeDist(sp,toll,maxPairwiseDistance) {}
+
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights = NULL,
+ MDOUBLE* score=NULL,
+ MDOUBLE* alpha=NULL) const;
+
+ virtual pairwiseGammaDistance* clone() const {return new pairwiseGammaDistance(*this);}
+
+ void setAlpha(MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ }
+
+
+protected:
+ MDOUBLE giveInitialGuessOfDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const;
+ MDOUBLE optimizeAlphaFixedDist(const sequence& s1,
+ const sequence& s2,
+ stochasticProcess & sp,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+ MDOUBLE optimizeAlphaFixedDist(stochasticProcess & sp,
+ const countTableComponentGam & ctc,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+};
+
+#endif
diff --git a/libs/phylogeny/pgetopt.h b/libs/phylogeny/pgetopt.h
new file mode 100644
index 0000000..70dadcf
--- /dev/null
+++ b/libs/phylogeny/pgetopt.h
@@ -0,0 +1,180 @@
+/* Declarations for getopt.
+ Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+ standalone, or this is the first header included in the source file.
+ If we are being used with glibc, we need to include <features.h>, but
+ that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
+ not defined, include <ctype.h>, which will pull in <features.h> for us
+ if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
+ doesn't flood the namespace with stuff the way some other headers do.) */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+# if (defined __STDC__ && __STDC__) || defined __cplusplus
+ const char *name;
+# else
+ char *name;
+# endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+# define no_argument 0
+# define required_argument 1
+# define optional_argument 2
+#endif /* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+ arguments in ARGV (ARGC of them, minus the program name) for
+ options given in OPTS.
+
+ Return the option character from OPTS just read. Return -1 when
+ there are no more options. For unrecognized options, or options
+ missing arguments, `optopt' is set to the option letter, and '?' is
+ returned.
+
+ The OPTS string is a list of characters which are recognized option
+ letters, optionally followed by colons, specifying that that letter
+ takes an argument, to be placed in `optarg'.
+
+ If a letter in OPTS is followed by two colons, its argument is
+ optional. This behavior is specific to the GNU `getopt'.
+
+ The argument `--' causes premature termination of argument
+ scanning, explicitly telling `getopt' that there are no more
+ options.
+
+ If OPTS begins with `--', then non-option arguments are treated as
+ arguments to the option '\0'. This behavior is specific to the GNU
+ `getopt'. */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int __argc, char *const *__argv, const char *__shortopts);
+# else /* not __GNU_LIBRARY__ */
+extern int pgetopt ();
+# endif /* __GNU_LIBRARY__ */
+
+# ifndef __need_getopt
+extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts,
+ const struct option *__longopts, int *__longind);
+extern int getopt_long_only (int __argc, char *const *__argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int __argc, char *const *__argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only);
+# endif
+#else /* not __STDC__ */
+extern int getopt ();
+# ifndef __need_getopt
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+# endif
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations. */
+#undef __need_getopt
+
+#endif /* getopt.h */
diff --git a/libs/phylogeny/phylipFormat.cpp b/libs/phylogeny/phylipFormat.cpp
new file mode 100644
index 0000000..d798426
--- /dev/null
+++ b/libs/phylogeny/phylipFormat.cpp
@@ -0,0 +1,138 @@
+// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "phylipFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+#include "logFile.h"
+
+sequenceContainer phylipFormat::read(istream &infile, const alphabet* alph){
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+sequenceContainer phylipFormat::readUnAligned(istream &infile, const alphabet* alph){
+ sequenceContainer mySeqData;
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+
+ vector<string>::const_iterator currentLinePosition = seqFileData.begin();
+ string::const_iterator itStr = seqFileData.begin()->begin();
+ string::const_iterator itStrEnd = seqFileData.begin()->end();
+
+ int f_numSeq;
+ bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
+ if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
+ int f_seqLength;
+ bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
+ if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
+ currentLinePosition++; // we read the first line.
+
+ int localid=0;
+ for (; currentLinePosition != seqFileData.end() ; ) {
+ if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line constinue
+ string remark;
+ string name;
+ sequence seq(alph);
+
+
+
+ if (mySeqData.numberOfSeqs() < f_numSeq ) {//get from the line a name and a sequence;
+
+ string name1;
+ string stringSeq1;
+ string::const_iterator it2 = (currentLinePosition)->begin();
+ for (; it2 != (currentLinePosition)->end();++it2) {
+ if ((*it2)==' ') break;
+ else name1+=(*it2);
+ }
+ for (; it2 != (currentLinePosition)->end();++it2) {
+ if ((*it2)==' ') continue;
+ else stringSeq1+=(*it2);
+ }
+ mySeqData.add(sequence(stringSeq1,name1,remark,localid,alph));
+ currentLinePosition++;
+ localid++;
+ }
+ else { // adding to the
+ string stringSeq1;
+ string::const_iterator it2 = (currentLinePosition)->begin();
+ int sequenceId=localid%f_numSeq;
+ for (; it2 != (currentLinePosition)->end() &&
+ mySeqData[sequenceId].seqLen() <f_seqLength;++it2) {
+ if ((*it2)==' ') continue;
+ else stringSeq1+=(*it2);
+
+ }
+ sequence tmp(stringSeq1,"","",sequenceId,alph);
+ mySeqData[sequenceId].operator += (tmp);
+ currentLinePosition++;
+ localid++;
+ }
+ }
+ return mySeqData;
+}
+
+void phylipFormat::write(ostream &out, const sequenceContainer& sd,
+ const int numOfPositionInLine,
+ const int spaceEvery) {
+ sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
+ for (;it5!=sd.constTaxaEnd();++it5) {
+ if (it5->name().size() > 10) break;
+ }
+ if (it5 != sd.constTaxaEnd()) {
+ LOG(1,<<"you asked to print in phylip format\n");
+ LOG(1,<<"however, the names in phylip format\n");
+ LOG(1,<<"must be no more than 10 characters.\n");
+ LOG(1,<<"Names are hence trancated to ten \n");
+ LOG(1,<<"characters. Notice, that this might\n");
+ LOG(1,<<"result in a two or more sequences \n");
+ LOG(1,<<"having the same name \n");
+ }
+
+ // vector<const sequenceContainer::sequenceDatum*> vec;
+ // sd.getSequenceDatumPtrVector(vec);
+ out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
+ if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
+
+ int maxLengthOfSeqName =0;
+ maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
+
+ int currentPosition = 0;
+ while (currentPosition < sd.seqLen() ) {
+ out<<endl;
+ out.flush();
+ // for (vector<const sequenceContainer::sequenceDatum*>::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) {
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+
+ for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
+ if (iName<it5->name().size()) {
+ if (currentPosition<numOfPositionInLine) {
+ out<<it5->name()[iName];
+ }
+ else out<<" ";
+ out.flush();
+ }
+ else out<<" ";
+ }
+ out.flush();
+ out<<" ";
+
+ if (it5->seqLen()<numOfPositionInLine)
+ out<<it5->toString()<<endl;
+ else {
+ for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
+ if (k>=it5->seqLen()) break;
+ out<<it5->toString(k);
+ if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
+ }
+ out<<endl;
+ }
+ }
+ currentPosition +=numOfPositionInLine;
+
+ }
+ return;
+}
+
+
diff --git a/libs/phylogeny/phylipFormat.h b/libs/phylogeny/phylipFormat.h
new file mode 100644
index 0000000..7db526f
--- /dev/null
+++ b/libs/phylogeny/phylipFormat.h
@@ -0,0 +1,47 @@
+// $Id: phylipFormat.h 1812 2007-03-01 09:29:12Z adist $
+
+#ifndef ___PHYLIP_FORMAT
+#define ___PHYLIP_FORMAT
+
+#include "definitions.h"
+#include "sequenceContainer.h"
+
+class phylipFormat {
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd,
+ const int numOfPositionInLine = 50,
+ const int spaceEvery = 10);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF PHYLIP FORMAT (interleaved):
+
+6 128
+Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE
+Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ
+Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR
+Rat KTYERCEFAR TLKRNGMSGY YGVSLADWVC LAQHESNYNT QARNYDPGDQ
+Cow KVFERCELAR TLKKLGLDGY KGVSLANWLC LTKWESSYNT KATNYNPSSE
+Horse KVFSKCELAH KLKAQEMDGF GGYSLANWVC MAEYESNFNT RAFNGKNANG
+
+ STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS
+ STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS
+ STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR
+ STDYGIFQIN SRYWCNDGKP RAKNACGIPC SALLQDDITQ AIQCAKRVVR
+ STDYGIFQIN SKWWCNDGKP NAVDGCHVSC SELMENDIAK AVACAKKIVS
+ SSDYGLFQLN NKWWCKDNKR SSSNACNIMC SKLLDENIDD DISCAKRVVR
+
+ DQGIRAWVAW RNHCQNKDVS QYVKGCGV
+ DQGIRAWVAW RNHCQNRDVS QYVQGCGV
+ DQGIRAWVAW RNRCQNRDVR QYVQGCGV
+ DQGIRAWVAW QRHCKNRDLS GYIRNCGV
+ EQGITAWVAW KSHCRDHDVS SYVEGCTL
+ DKGMSAWKAW VKHCKDKDLS EYLASCNL
+
+
+*/
+
diff --git a/libs/phylogeny/phylipSequentialFormat.cpp b/libs/phylogeny/phylipSequentialFormat.cpp
new file mode 100644
index 0000000..935d922
--- /dev/null
+++ b/libs/phylogeny/phylipSequentialFormat.cpp
@@ -0,0 +1,130 @@
+// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "phylipSequentialFormat.h"
+#include "someUtil.h"
+#include "errorMsg.h"
+#include "logFile.h"
+
+sequenceContainer phylipSequentialFormat::read(istream &infile, const alphabet* alph){
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+sequenceContainer phylipSequentialFormat::readUnAligned(istream &infile, const alphabet* alph){
+ sequenceContainer mySeqData;
+
+ vector<string> seqFileData;
+ putFileIntoVectorStringArray(infile,seqFileData);
+
+ vector<string>::const_iterator currentLinePosition = seqFileData.begin();
+ string::const_iterator itStr = seqFileData.begin()->begin();
+ string::const_iterator itStrEnd = seqFileData.begin()->end();
+
+ int f_numSeq;
+ bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
+ if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
+ int f_seqLength;
+ bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
+ if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
+ currentLinePosition++; // we read the first line.
+
+ int localid=0;
+ for (; currentLinePosition != seqFileData.end() ; ) {
+ if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line continue
+ string stringSeq1;
+ string name1;
+ while (stringSeq1.length() < f_seqLength ) { // adding a new seq
+ string::const_iterator it2 = (currentLinePosition)->begin();
+ if ((*it2)==' ') { // line without seq. name, read seq. content only
+ for (; it2 != (currentLinePosition)->end();++it2) {
+ if ((*it2)==' ') continue;
+ else stringSeq1+=(*it2);
+ }
+ }
+ else { // first read sequence name, then read seq itself
+ for (; it2 != (currentLinePosition)->end();++it2) {
+ if ((*it2)==' ') break;
+ else name1+=(*it2);
+ }
+ for (; it2 != (currentLinePosition)->end();++it2) {
+ if ((*it2)==' ') continue;
+ else stringSeq1+=(*it2);
+ }
+ }
+
+ currentLinePosition++;
+ }
+ mySeqData.add(sequence(stringSeq1,name1,"",localid,alph));
+ localid++;
+
+ }
+ return mySeqData;
+}
+
+void phylipSequentialFormat::write(ostream &out, const sequenceContainer& sd,
+ const int numOfPositionInLine,
+ const int spaceEvery) {
+ sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
+ for (;it5!=sd.constTaxaEnd();++it5) {
+ if (it5->name().size() > 10) break;
+ }
+ if (it5 != sd.constTaxaEnd()) {
+ LOG(1,<<"you asked to print in phylip format\n");
+ LOG(1,<<"however, the names in phylip format\n");
+ LOG(1,<<"must be no more than 10 characters.\n");
+ LOG(1,<<"Names are hence trancated to ten \n");
+ LOG(1,<<"characters. Notice, that this might\n");
+ LOG(1,<<"result in a two or more sequences \n");
+ LOG(1,<<"having the same name \n");
+ }
+
+ // vector<const sequenceContainer::sequenceDatum*> vec;
+ // sd.getSequenceDatumPtrVector(vec);
+ out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
+ if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
+
+ int maxLengthOfSeqName =0;
+ maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
+
+
+ for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
+ int currentPosition = 0;
+ out<<endl;
+ out.flush();
+ // first - print name of sequence
+ for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
+ if (iName<it5->name().size()) {
+ if (currentPosition<numOfPositionInLine) {
+ out<<it5->name()[iName];
+ }
+ else out<<" ";
+ out.flush();
+ }
+ else out<<" ";
+ }
+ out.flush();
+ out<<" ";
+ // next - print sequence itself
+ while (currentPosition < sd.seqLen() ) {
+ if (it5->seqLen()<numOfPositionInLine)
+ out<<it5->toString()<<endl;
+ else {
+ for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
+ if (k>=it5->seqLen()) break;
+ out<<it5->toString(k);
+ if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
+ }
+ out<<endl;
+ if (currentPosition+numOfPositionInLine < sd.seqLen()) {
+ for (int i = 0; i < spaceEvery +1; i++) // creates spaces to align properly
+ out << " ";
+ }
+ }
+ currentPosition +=numOfPositionInLine;
+ }
+
+ }
+
+}
+
+
diff --git a/libs/phylogeny/phylipSequentialFormat.h b/libs/phylogeny/phylipSequentialFormat.h
new file mode 100644
index 0000000..22fe69b
--- /dev/null
+++ b/libs/phylogeny/phylipSequentialFormat.h
@@ -0,0 +1,35 @@
+// $Id: phylipFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___PHYLIP_INTERLEAVED_FORMAT
+#define ___PHYLIP_INTERLEAVED_FORMAT
+
+#include "definitions.h"
+#include "sequenceContainer.h"
+
+class phylipSequentialFormat {
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd,
+ const int numOfPositionInLine = 50,
+ const int spaceEvery = 10);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+/* EXAMPLE OF PHYLIP FORMAT (sequential):
+
+6 128
+Langur KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE
+ STDYGIFQIN SRYWCNNGKP GAVDACHISC SALLQNNIAD AVACAKRVVS
+ DQGIRAWVAW RNHCQNKDVS QYVKGCGV
+Baboon KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ
+ STDYGIFQIN SHYWCNDGKP GAVNACHISC NALLQDNITD AVACAKRVVS
+ DQGIRAWVAW RNHCQNRDVS QYVQGCGV
+Human KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNAGDR
+ STDYGIFQIN SRYWCNDGKP GAVNACHLSC SALLQDNIAD AVACAKRVVR
+ DQGIRAWVAW RNRCQNRDVR QYVQGCGV
+
+*/
+
diff --git a/libs/phylogeny/pijAccelerator.cpp b/libs/phylogeny/pijAccelerator.cpp
new file mode 100644
index 0000000..8d2d7ed
--- /dev/null
+++ b/libs/phylogeny/pijAccelerator.cpp
@@ -0,0 +1,9 @@
+// $Id: pijAccelerator.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "pijAccelerator.h"
+
+pijAccelerator::~pijAccelerator(){}
+// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
+// assignment
+
+
diff --git a/libs/phylogeny/pijAccelerator.h b/libs/phylogeny/pijAccelerator.h
new file mode 100644
index 0000000..0e64dec
--- /dev/null
+++ b/libs/phylogeny/pijAccelerator.h
@@ -0,0 +1,26 @@
+// $Id: pijAccelerator.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___PIJ_ACCELERATOR
+#define ___PIJ_ACCELERATOR
+
+#include "definitions.h"
+#include "replacementModel.h"
+
+class pijAccelerator {
+public:
+ virtual pijAccelerator* clone() const = 0;
+ virtual ~pijAccelerator() = 0;
+ virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0;
+ virtual const MDOUBLE freq(const int i) const = 0; // P(i)
+ virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0;
+ virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0;
+ virtual replacementModel* getReplacementModel() const =0; // @@@@ this const is a lie !!!
+ virtual const int alphabetSize() const =0;
+};
+
+
+
+
+
+#endif
+
diff --git a/libs/phylogeny/posteriorDistance.cpp b/libs/phylogeny/posteriorDistance.cpp
new file mode 100644
index 0000000..79a60b3
--- /dev/null
+++ b/libs/phylogeny/posteriorDistance.cpp
@@ -0,0 +1,420 @@
+// $Id: posteriorDistance.cpp 5883 2009-02-06 10:42:11Z privmane $
+
+#include "posteriorDistance.h"
+#include "numRec.h"
+#include "countTableComponent.h"
+#include "likeDist.h"
+#include "uniDistribution.h"
+#include "someUtil.h"
+#include "jcDistance.h"
+#include <cmath>
+
+
+class C_eval_gammaMLDistancesPosterior_d{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const Vdouble* _weights;
+ const VVdoubleRep& _posteriorProb; // pos, rate
+public:
+ C_eval_gammaMLDistancesPosterior_d(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const VVdoubleRep& posteriorProb,
+ const Vdouble * weights)
+ : _sp(sp),
+ _s1(s1),
+ _s2(s2),
+ _weights(weights),
+ _posteriorProb(posteriorProb)
+ {};
+
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ MDOUBLE sumL=0.0;
+ doubleRep posLikelihood = 0.0;
+ MDOUBLE posLikelihood_d = 0.0;
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ posLikelihood = 0.0;
+ posLikelihood_d = 0.0;
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ posLikelihood_d =0.0;
+ }
+ else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ posLikelihood_d =0.0;
+ } else {
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+ MDOUBLE pij= 0.0;
+ MDOUBLE dpij=0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate;
+ doubleRep tmp = _sp.freq(_s1[pos])*_posteriorProb[pos][rateCategor];
+ posLikelihood += pij *tmp;
+ posLikelihood_d += dpij*convert(tmp);
+ }
+ else {// this is the most complicated case, when you have combinations of letters,
+ // for example B in one sequence and ? in the other.
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor];;
+ posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
+ posLikelihood_d += convert(exp) * _sp.dPij_dt(iS1,iS2,dist*rate)*rate;
+ }
+ }
+ }
+ }
+ }// end of for rate categories
+ }
+ assert(posLikelihood!=0.0);
+ sumL += posLikelihood_d/convert(posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
+ }
+ return -sumL;
+ };
+};
+
+class C_eval_gammaMLDistancesPosterior{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const Vdouble* _weights;
+ const VVdoubleRep& _posteriorProb; // pos, rate
+public:
+ C_eval_gammaMLDistancesPosterior(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const VVdoubleRep& posteriorProb,
+ const Vdouble * weights): _sp(sp),
+ _s1(s1),
+ _s2(s2),
+ _weights(weights),
+ _posteriorProb(posteriorProb)
+ {};
+
+
+ MDOUBLE operator() (MDOUBLE dist) {
+ /*DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": dist = "<<dist<<endl); DEBUG*/
+ MDOUBLE sumL=0.0;
+ doubleRep posLikelihood = 0.0;
+
+ for (int pos=0; pos < _s1.seqLen(); ++pos){
+ /*DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": pos = "<<pos<<endl); DEBUG*/
+ if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
+ /*DEBUG LOG(9,<<"_posteriorProb ="<<_posteriorProb[pos]<<endl); DEBUG*/
+ posLikelihood = 0.0;
+ /*DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG*/
+ if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) {
+ // this is the more complicated case, where s1 = ?, s2 = specific
+ posLikelihood = _sp.freq(_s2[pos]);
+ }
+ else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
+ posLikelihood = _sp.freq(_s1[pos]);
+ } else {
+ for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
+ MDOUBLE rate = _sp.rates(rateCategor);
+ /*DEBUG LOG(9,<<"rate = "<<rate<<endl); DEBUG*/
+ MDOUBLE pij= 0.0;
+ if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {//simple case, where AA i is changing to AA j
+ /*DEBUG LOG(9,<<"Both are specific"<<endl); DEBUG*/
+ pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
+ doubleRep exp = _sp.freq(_s1[pos])*_posteriorProb[pos][rateCategor];
+ /*DEBUG LOG(9,<<"exp = "<<exp<<endl); DEBUG*/
+ posLikelihood += pij *exp;
+ /*DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG*/
+ }
+ else {// this is the most complicated case, when you have combinations of letters,
+ // for example B in one sequence and ? in the other.
+ /*DEBUG LOG(9,<<"One or both are non-specific"<<endl); DEBUG*/
+ for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
+ for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
+ if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
+ (_s2.getAlphabet()->relations(_s2[pos],iS2))) {
+ doubleRep exp = _sp.freq(iS1)*_posteriorProb[pos][rateCategor];
+ posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
+ }
+ }
+ }
+ /*DEBUG LOG(9,<<"posLikelihood = "<<posLikelihood<<endl); DEBUG*/
+ }
+ }// end of for rate categories
+ }
+ assert(posLikelihood!=0.0);
+ sumL += log(posLikelihood)*(_weights ? (*_weights)[pos]:1);
+ }
+ /*DEBUG LOG(9,<<"C_eval_gammaMLDistancesPosterior::operator():"); LOGDO(9,printTime(myLog::LogFile())); LOG(9,<<": returning "<<(-sumL)<<endl); DEBUG*/
+ return -sumL;
+ };
+};
+
+posteriorDistance::posteriorDistance(const stochasticProcess & sp,
+ const VVdoubleRep & posteriorProb,
+ const MDOUBLE toll,
+ const MDOUBLE maxPairwiseDistance)
+ :
+ likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(posteriorProb)
+{}
+
+posteriorDistance::posteriorDistance(stochasticProcess & sp,
+ const VVdoubleRep & posteriorProb,
+ const MDOUBLE toll,
+ const MDOUBLE maxPairwiseDistance)
+ :
+ likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(posteriorProb)
+{}
+
+posteriorDistance::posteriorDistance(const stochasticProcess & sp,
+ const MDOUBLE toll,
+ const MDOUBLE maxPairwiseDistance)
+ :
+ likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(0)
+{}
+
+
+posteriorDistance::posteriorDistance(stochasticProcess & sp,
+ const MDOUBLE toll,
+ const MDOUBLE maxPairwiseDistance)
+ :
+ likeDist(sp,toll,maxPairwiseDistance),_posteriorProb(0)
+{}
+
+posteriorDistance::posteriorDistance(const posteriorDistance& other):
+ likeDist(static_cast<likeDist>(other)), _posteriorProb(other._posteriorProb)
+{}
+
+
+
+// distance is computed based on the posterior probability
+const MDOUBLE posteriorDistance::giveDistance(const sequence& s1,
+ const sequence& s2,
+ const Vdouble * weights,
+ MDOUBLE* score) const
+{
+ /*DEBUG LOG(9,<<"posteriorDistance::giveDistance - start"<<endl); LOGDO(9,printTime(myLog::LogFile())); DEBUG*/
+ const MDOUBLE ax=0, cx=_maxPairwiseDistance;
+ MDOUBLE bx=_jcDist.giveDistance(s1,s2,weights,score)/*=1.0*/;
+ if (!(bx==bx)) bx = 1.0;
+ if (!(bx>0.0)) bx = 0.000001;
+ MDOUBLE dist=-1.0;
+ MDOUBLE resL = -dbrent(ax,bx,cx,
+ C_eval_gammaMLDistancesPosterior(_sp,s1,s2,_posteriorProb,weights),
+ C_eval_gammaMLDistancesPosterior_d(_sp,s1,s2,_posteriorProb,weights),
+ _toll,
+ &dist);
+ if (score) *score = resL;
+ return dist;
+}
+
+// =============================
+// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
+class C_evalAlphaForPairOfSeq{
+private:
+ const countTableComponentGam& _ctc;
+ stochasticProcess& _sp;
+ const MDOUBLE _branchL;
+public:
+ C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc,
+ const MDOUBLE branchL,
+ stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {};
+
+ MDOUBLE operator() (MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ C_evalLikeDist cev(_ctc,_sp);
+ MDOUBLE L=cev(_branchL);
+ LOG(10,<<"check alpha="<<alpha<<", bl="<<_branchL<<" gives "<<L<<endl);
+ return L;
+ };
+};
+
+// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
+// returns the best alpha.
+MDOUBLE optimizeAlphaFixedDist(stochasticProcess & sp,
+ const countTableComponentGam & ctc,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL){ // changes sp.
+ MDOUBLE bestA=0.0;
+ MDOUBLE bestQ=0.0;
+ const MDOUBLE upperBoundOnAlpha = 15.0;
+ const MDOUBLE epsilonAlphaOptimization = 0.01;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ const MDOUBLE bx=cx*0.3;
+ const MDOUBLE ax=0.0;
+
+
+ bestQ = -brent(ax,bx,cx,
+ C_evalAlphaForPairOfSeq(ctc,branchL,sp),
+ epsilonAlphaOptimization,
+ &bestA);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ if (score) *score = bestQ;
+ return bestA;
+}
+
+
+
+// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
+class C_eval_gammaMLAlpha{
+private:
+ const stochasticProcess& _sp;
+ const sequence& _s1;
+ const sequence& _s2;
+ const MDOUBLE _distance;
+ const Vdouble* _weights;
+ // const VVdoubleRep& _posteriorProb; // pos, rate
+public:
+ C_eval_gammaMLAlpha(const stochasticProcess& sp,
+ const sequence& s1,
+ const sequence& s2,
+ const MDOUBLE distance,
+ // const VVdoubleRep& posteriorProb,
+ const Vdouble * weights): _sp(sp),
+ _s1(s1),
+ _s2(s2),
+ _distance(distance),
+ _weights(weights)
+ // _posteriorProb(posteriorProb)
+ {};
+
+ // this cast is required as the distribution within the
+ // stochasticProcess is kept as the parent "distribution" class that
+ // knows nothing of Alpha
+ void setAlpha(MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ }
+
+
+ MDOUBLE operator() (MDOUBLE alpha) {
+ setAlpha(alpha);
+ MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights);
+ LOG(11,<<"check alpha="<<alpha<<", bl="<<_distance<<" gives "<<likelihood<<endl);
+ return -likelihood;
+ };
+} ;
+
+
+// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
+// returns the best alpha.
+MDOUBLE optimizeAlphaFixedDist( const sequence& s1,
+ const sequence& s2,
+ stochasticProcess & sp,
+ const MDOUBLE branchL,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL){ // changes sp.
+ MDOUBLE bestA=0.0;
+ MDOUBLE bestQ=0.0;
+ const MDOUBLE upperBoundOnAlpha = 15.0;
+ const MDOUBLE epsilonAlphaOptimization = 0.01;
+ const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
+ const MDOUBLE bx=cx*0.3;
+ const MDOUBLE ax=0.0;
+
+
+ bestQ = -brent(ax,bx,cx,
+ C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights),
+ epsilonAlphaOptimization,
+ &bestA);
+ (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
+ if (score) *score = bestQ;
+ return bestA;
+}
+
+
+
+MDOUBLE posteriorDistance::giveInitialGuessOfDistance(
+ const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const {
+ uniDistribution ud;
+ stochasticProcess uniSp(&ud,_sp.getPijAccelerator());
+ likeDist ld(uniSp);
+ return (ld.giveDistance(s1,s2,weights,score));
+}
+
+// OBSOLETE? What's the difference between this function and giveDistanceOptAlphaForPairOfSequences???
+MDOUBLE posteriorDistance::giveDistanceOptAlphaForEachPairOfSequences( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score,
+ MDOUBLE* alpha) const {
+
+ MDOUBLE toll = 0.0001;
+
+ MDOUBLE resL = 0.0;
+ MDOUBLE resQ = 0.0;
+ MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
+
+ countTableComponentGam ctc; // from technical reasons.
+ ctc.countTableComponentAllocatePlace(_sp.alphabetSize(),_sp.categories());
+
+ stochasticProcess tmpSp(_sp);
+ for (int z=0; z<s1.seqLen(); ++z) {
+ for (int j=0; j < tmpSp.categories(); ++j) {
+ ctc.addToCounts(s1[z],s2[z],j,weights?(*weights)[z]:tmpSp.ratesProb(j));
+ }
+ }
+ const int maxIter = 30;
+ MDOUBLE newDist = 0.0;
+ MDOUBLE lastBestAlpha = 0.0;
+ for (int i=0; i < maxIter; ++i) {
+ lastBestAlpha = optimizeAlphaFixedDist(tmpSp,ctc,currentDistance,weights,&resL); // changes sp.
+ (static_cast<gammaDistribution*>(tmpSp.distr()))->setAlpha(lastBestAlpha);
+ LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<(static_cast<gammaDistribution*>(tmpSp.distr()))->getAlpha()<<")"<<"\t L="<<resL<<"\t");
+ likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
+ newDist = tmpld.giveDistance(ctc,resQ);
+ LOG(8,<<"dist="<<newDist<<endl);
+ if (fabs(newDist-currentDistance)<toll) break;
+ currentDistance = newDist;
+ }
+ if (score) *score = resL;
+ if (alpha) *alpha = lastBestAlpha;
+ assert (newDist >=0);
+ return newDist;
+
+}
+
+
+
+// OBSOLETE: this function was moved to pairwiseGammaDistance.cpp
+MDOUBLE posteriorDistance::giveDistanceOptAlphaForPairOfSequences( const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score,
+ MDOUBLE* alpha) const {
+
+ MDOUBLE toll = 0.0001;
+
+ MDOUBLE resL = 0.0;
+ MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
+
+ countTableComponentGam ctc; // from technical reasons.
+
+ stochasticProcess tmpSp(_sp);
+
+ const int maxIter = 30;
+ MDOUBLE newDist = 0.0;
+ MDOUBLE lastBestAlpha = 0.0;
+ for (int i=0; i < maxIter; ++i) {
+ lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp.
+ LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<"\t L="<<resL<<"\t");
+ likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
+ newDist = tmpld.giveDistance(s1, s2, weights, &resL);
+ LOG(8,<<"dist="<<newDist<<"(L="<<resL<<")"<<endl);
+ if (fabs(newDist-currentDistance)<toll) break;
+ currentDistance = newDist;
+ }
+ if (score) *score = resL;
+ if (alpha) *alpha = lastBestAlpha;
+ assert (newDist >=0);
+ return newDist;
+
+}
diff --git a/libs/phylogeny/posteriorDistance.h b/libs/phylogeny/posteriorDistance.h
new file mode 100644
index 0000000..4b2a05c
--- /dev/null
+++ b/libs/phylogeny/posteriorDistance.h
@@ -0,0 +1,72 @@
+// $Id: posteriorDistance.h 1752 2007-02-26 14:01:09Z itaymay $
+
+
+#ifndef POSTERIOR_DISTANCE_H
+#define POSTERIOR_DISTANCE_H
+
+#include "likeDist.h"
+#include "stochasticProcess.h"
+#include "definitions.h"
+#include "sequence.h"
+#include "gammaDistribution.h"
+#include "logFile.h"
+
+#include <cmath>
+using namespace std;
+
+class posteriorDistance : public likeDist {
+public:
+ explicit posteriorDistance(const stochasticProcess & sp,
+ const VVdoubleRep & posteriorProb, // pos * rate
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0);
+
+ explicit posteriorDistance(stochasticProcess & sp,
+ const VVdoubleRep & posteriorProb, // pos * rate
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0);
+
+ explicit posteriorDistance(const stochasticProcess & sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0);
+
+ explicit posteriorDistance(stochasticProcess & sp,
+ const MDOUBLE toll =0.0001,
+ const MDOUBLE maxPairwiseDistance = 5.0);
+ posteriorDistance(const posteriorDistance& other);
+ virtual posteriorDistance* clone() const {return new posteriorDistance(*this);}
+
+ // distance is computed based on the posterior probability
+ const MDOUBLE giveDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL) const;
+
+ MDOUBLE giveDistanceOptAlphaForEachPairOfSequences(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score=NULL,
+ MDOUBLE* alpha=NULL) const;
+
+ MDOUBLE giveDistanceOptAlphaForPairOfSequences(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score,
+ MDOUBLE* alpha) const;
+
+ void setPosterior(VVdoubleRep posteriorProb) {_posteriorProb = posteriorProb;}
+ void setAlpha(MDOUBLE alpha) {
+ (static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
+ }
+
+private:
+ VVdoubleRep _posteriorProb;
+ MDOUBLE giveInitialGuessOfDistance(const sequence& s1,
+ const sequence& s2,
+ const vector<MDOUBLE> * weights,
+ MDOUBLE* score) const;
+};
+
+
+
+#endif
diff --git a/libs/phylogeny/readDatMatrix.cpp b/libs/phylogeny/readDatMatrix.cpp
new file mode 100644
index 0000000..8aebb1a
--- /dev/null
+++ b/libs/phylogeny/readDatMatrix.cpp
@@ -0,0 +1,284 @@
+// $Id: readDatMatrix.cpp 5805 2009-01-20 09:19:26Z adido $
+//#ifndef unix
+//#define SSTREAM_KNOWN
+//#endif
+
+//#ifdef SSTREAM_KNOWN
+#include <sstream>
+//#else
+//#include <strstream> //oldVersion
+//#endif
+
+
+#include <cassert>
+#include "readDatMatrix.h"
+#include "errorMsg.h"
+#include "logFile.h"
+
+//#define VERBOS
+
+void normalizeQ(VVdouble& q, const Vdouble& freq) {
+ MDOUBLE sum =0;
+ int i=0,j=0;
+ for (i=0; i < q.size(); ++i) {
+ sum += q[i][i]*freq[i];
+ }
+ assert(sum!=0);
+ MDOUBLE oneDividedBySum = -1.0/sum; // to avoid many divisions.
+
+ for (i=0; i < q.size(); ++i) {
+ for (j=0; j < q.size(); ++j) {
+ q[i][j] = q[i][j]*oneDividedBySum;
+ }
+ }
+}
+
+void readDatMatrixFromFile(const string & matrixFileName,
+ VVdouble & subMatrix,
+ Vdouble & freq) {
+ cout<<"****readDatMatrixFromFile******"<<endl;
+ int i=0,j=0; //indices
+ ifstream in(matrixFileName.c_str());
+ if (!in) {
+ errorMsg::reportError("unable to open matrix data file");
+ }
+
+ int alphaSize;
+ if (matrixFileName == "adrianCodon.dat.q")
+ alphaSize = 61;
+ else
+ alphaSize = 20;
+ subMatrix.resize(alphaSize);
+ for ( i=0; i < alphaSize; ++i) subMatrix[i].resize(alphaSize,0.0);
+ freq.resize(alphaSize,0.0);
+
+ for (i=1; i < subMatrix.size(); ++i) {
+ for (j=0; j <i;++j) {
+ in>>subMatrix[i][j];
+ subMatrix[j][i] = subMatrix[i][j];
+ }
+ }
+ for (i=0; i < subMatrix.size(); ++i) {
+ in>>freq[i];
+ }
+ in.close();
+
+ //check:
+ //LOG(5,<<" priting the 5*5 top part of the sub matrix: "<<endl);
+ //for (i=0; i < 5; ++i) {
+ // for (j=0; j <5;++j) {
+ // LOG(5,<<subMatrix[i][j]<<" ");
+ // }
+ // LOG(5,<<endl);
+ //}
+ //LOG(5,<<"the 5 last freqs: "<<endl);
+ //for (i=15; i < 20; ++i) {
+ // LOG(5,<<freq[i]<<" ");
+ //}
+}
+
+void readDatMatrixFromString(const string & matrixFileString,
+ VVdouble & subMatrix,
+ Vdouble & freq, int alphaSize) {
+ int i=0,j=0; //indices
+ //#ifdef SSTREAM_KNOWN
+ stringstream in(matrixFileString.c_str());
+// #else
+// istrstream in(matrixFileString.c_str()); // OLD VERSION
+//#endif
+ if (!in) {
+ errorMsg::reportError("unable to open matrix data buffer");
+ }
+
+
+ subMatrix.resize(alphaSize);
+ for ( i=0; i < alphaSize; ++i) subMatrix[i].resize(alphaSize,0.0);
+ freq.resize(alphaSize,0.0);
+
+ for (i=1; i < alphaSize; ++i) {
+ for (j=0; j <i;++j) {
+ in>>subMatrix[i][j];
+ subMatrix[j][i] = subMatrix[i][j];
+ }
+ }
+ for (i=0; i < alphaSize; ++i) {
+ in>>freq[i];
+ }
+}
+
+
+#include "fromQtoPt.h"
+#include "definitions.h"
+
+#include <iostream>
+using namespace std;
+
+void pupAll::fillMatricesFromFile(const string & dataFileString) {
+ VVdouble sMatrix;
+ readDatMatrixFromFile(dataFileString,sMatrix,_freq);
+ // readDatMatrixFromString(dataFileString,sMatrix,_freq);
+ VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
+
+ q2pt q2pt1;
+ q2pt1.fillFromRateMatrix(_freq,qMatrix);
+ _leftEigen = q2pt1.getLeftEigen();
+ _rightEigen = q2pt1.getRightEigen();
+ _eigenVector = q2pt1.getEigenVec();
+}
+void pupAll::fillMatricesFromFile(const string & dataFileString, const Vdouble & freq) {
+#ifdef VERBOS
+ LOG(5,<<"dataFileString = "<<dataFileString<<endl);
+#endif
+
+ VVdouble sMatrix;
+ readDatMatrixFromFile(dataFileString,sMatrix,_freq);
+ _freq=freq;
+ VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
+
+ q2pt q2pt1;
+ q2pt1.fillFromRateMatrix(_freq,qMatrix);
+ _leftEigen = q2pt1.getLeftEigen();
+ _rightEigen = q2pt1.getRightEigen();
+ _eigenVector = q2pt1.getEigenVec();
+}
+
+void pupAll::fillMatrices(const string & dataFileString,int alphaSize) {
+ VVdouble sMatrix;
+ readDatMatrixFromString(dataFileString,sMatrix,_freq,alphaSize);
+ // readDatMatrixFromString(dataFileString,sMatrix,_freq);
+ VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
+
+ q2pt q2pt1;
+ q2pt1.fillFromRateMatrix(_freq,qMatrix);
+ _leftEigen = q2pt1.getLeftEigen();
+ _rightEigen = q2pt1.getRightEigen();
+ _eigenVector = q2pt1.getEigenVec();
+}
+void pupAll::fillMatrices(const string & dataFileString, const Vdouble & freq) {
+ VVdouble sMatrix;
+ readDatMatrixFromString(dataFileString,sMatrix,_freq);
+ _freq=freq;
+ VVdouble qMatrix = fromWagSandFreqToQ(sMatrix,_freq);
+
+ q2pt q2pt1;
+ q2pt1.fillFromRateMatrix(_freq,qMatrix);
+ _leftEigen = q2pt1.getLeftEigen();
+ _rightEigen = q2pt1.getRightEigen();
+ _eigenVector = q2pt1.getEigenVec();
+}
+
+const MDOUBLE pupAll::Pij_t(const int i, const int j, const MDOUBLE t) const {
+ if (t<0) {
+ LOG(5,<<"negative length in routine Pij_t "<<endl);
+ LOG(5,<<" t = " <<t<<endl);
+ errorMsg::reportError("negative length in routine Pij_t");
+ }
+// if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0;
+ MDOUBLE sum=0;
+ int alphaSize = _freq.size();
+ for (int k=0 ; k<alphaSize ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t) );
+ }
+ if (currectFloatingPointProblems(sum)) return sum;
+// LOG(1,<<"err Pij_t i="<<i<<" j= "<<j<<" dis= "<<t<<" res= "<<sum<<endl);//sum is not in [0,1]
+ errorMsg::reportError("error in function pijt... ");return 0;
+}
+
+const MDOUBLE pupAll::dPij_dt(const int i,const int j, const MDOUBLE t) const {
+// if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0;
+ MDOUBLE sum=0;
+ int alphaSize = _freq.size();
+ for (int k=0 ; k<alphaSize ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]);
+ }
+ return sum;
+}
+
+
+const MDOUBLE pupAll::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
+// if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0;
+ MDOUBLE sum=0;;
+ int alphaSize = _freq.size();
+ for (int k=0 ; k<alphaSize ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]*_eigenVector[k]);
+ }
+ return sum;
+}
+// this gives the likelihood of j given i at distance t and gamma
+// parameter alpha. The result presented here is the integral over the
+// rates (according to the gamma distribution with parameter alpah). see Yang's (93) paper.
+const MDOUBLE pupAll::Pij_tAlpha(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
+ if (t<0) {
+ LOG(5,<<"negative length in routine Pij_tAlpha "<<endl);
+ LOG(5,<<" t = " <<t<<endl);
+ errorMsg::reportError("negative length in routine Pij_tAlpha");
+ }
+ MDOUBLE sum=0;
+ for (int k=0 ; k<20 ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]*pow(1-_eigenVector[k]*t/alpha,-alpha));
+ }
+ if (currectFloatingPointProblems(sum)) return sum;
+ errorMsg::reportError("error in function pijtAlpha... ");return 0;
+}
+
+
+const MDOUBLE pupAll::Pij_tAlpha_dt(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
+ if (t<0) {
+ LOG(5,<<"negative length in routine Pij_tAlpha_dt "<<endl);
+ LOG(5,<<" t = " <<t<<endl);
+ errorMsg::reportError("negative length in routine Pij_tAlpha_dt");
+ }
+ MDOUBLE sum=0;
+ for (int k=0 ; k<20 ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]* _eigenVector[k]* pow(1-_eigenVector[k]*t/alpha,-alpha-1));
+ }
+ return sum;
+}
+const MDOUBLE pupAll::Pij_tAlpha_dt2(const int i, const int j, const MDOUBLE t, const MDOUBLE alpha) const {
+ if (t<0) {
+ LOG(5,<<"negative length in routine Pij_tAlpha_dt2 "<<endl);
+ LOG(5,<<" t = " <<t<<endl);
+ errorMsg::reportError("negative length in routine Pij_tAlpha_dt2");
+ }
+ MDOUBLE sum=0;
+ for (int k=0 ; k<20 ; ++k) {
+ sum+=( _leftEigen[i][k]*_rightEigen[k][j]* (1+1/alpha) *_eigenVector[k]*_eigenVector[k]* pow(1-_eigenVector[k]*t/alpha,-alpha-2));
+ }
+ return sum;
+}
+
+bool pupAll::currectFloatingPointProblems(MDOUBLE& sum) const {
+ if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0;
+ if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1;
+ if ((sum>1) || (sum<0)) return false;
+ return true;
+}
+
+VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq){
+ VVdouble q(s.size());
+ for (int z=0; z < q.size(); ++z) q[z].resize(s.size(),0.0);
+ int i,j;
+ MDOUBLE sum;
+ for ( i=0; i < s.size(); ++i) {
+ sum =0;
+ for (j=0; j < s.size(); ++j) {
+ if (i!=j) q[i][j] = s[i][j]* freq[j];
+ sum += q[i][j];
+ }
+ q[i][i] = -sum;
+ }
+
+ // normalizing q:
+ normalizeQ(q,freq);
+
+
+ // check:
+ //sum =0;
+ //for (i=0; i < s.size(); ++i){
+ // sum += q[i][i]*freq[i];
+ //}
+ //LOG(5,<<" SUM OF DIAGOPNAL Q IS (should be -1) "<<sum<<endl);
+ return q;
+
+}
+
diff --git a/libs/phylogeny/readDatMatrix.h b/libs/phylogeny/readDatMatrix.h
new file mode 100644
index 0000000..cff291a
--- /dev/null
+++ b/libs/phylogeny/readDatMatrix.h
@@ -0,0 +1,68 @@
+// $Id: readDatMatrix.h 5805 2009-01-20 09:19:26Z adido $
+
+#ifndef ___READ_DAT_MATRIX
+#define ___READ_DAT_MATRIX
+
+#include "definitions.h"
+#include <string>
+#include <iostream>
+#include <fstream>
+#include "datMatrixHolder.h"
+
+using namespace std;
+
+void normalizeQ(VVdouble& q, const Vdouble& freq);
+
+void readDatMatrixFromFile(const string & matrixFileName,
+ VVdouble & subMatrix,
+ Vdouble & freq);
+void readDatMatrixFromString(const string & matrixFileString,
+ VVdouble & subMatrix,
+ Vdouble & freq, int alphaSize = 20);
+
+VVdouble fromWagSandFreqToQ(const VVdouble & s,const Vdouble& freq);
+
+#include "replacementModel.h"
+#include "definitions.h"
+#include "errorMsg.h"
+
+class pupAll : public replacementModel {
+public:
+ // get matrix from file:
+ explicit pupAll(const string& matrixFileString) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString);}
+ explicit pupAll(const string& matrixFileString, const vector<MDOUBLE>& freq) : err_allow_for_pijt_function(1e-4) {fillMatricesFromFile(matrixFileString,freq);}
+
+ // get matrix from within the .exe
+ explicit pupAll(const datMatrixString& matrixFileString,int alphaSize = 20) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,alphaSize); }
+ explicit pupAll(const datMatrixString& matrixFileString, const vector<MDOUBLE>& freq) : err_allow_for_pijt_function(1e-4) {fillMatrices(matrixFileString.Val,freq);}
+
+
+ const int alphabetSize() const {return _freq.size();}//20 or 61
+ const MDOUBLE err_allow_for_pijt_function; //1e-4
+ virtual replacementModel* clone() const { return new pupAll(*this); }
+
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE t) const;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE t) const;
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE t) const;
+ const MDOUBLE freq(const int i) const {return _freq[i];}
+
+ const MDOUBLE Pij_tAlpha (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
+ const MDOUBLE Pij_tAlpha_dt (const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
+ const MDOUBLE Pij_tAlpha_dt2(const int i,const int j, const MDOUBLE t, const MDOUBLE alpha) const;
+
+private:
+ void fillMatrices(const string & matrixName,const vector<MDOUBLE>& freq);
+ void fillMatrices(const string & matrixName,int alphaSize);
+ void fillMatricesFromFile(const string & dataFileString,const vector<MDOUBLE>& freq);
+ void fillMatricesFromFile(const string & dataFileString);
+
+
+ bool currectFloatingPointProblems(MDOUBLE& sum) const;
+
+ VVdouble _leftEigen;
+ VVdouble _rightEigen;
+ Vdouble _eigenVector;
+ Vdouble _freq;
+};
+
+#endif
diff --git a/libs/phylogeny/readTree.cpp b/libs/phylogeny/readTree.cpp
new file mode 100644
index 0000000..78ffb4f
--- /dev/null
+++ b/libs/phylogeny/readTree.cpp
@@ -0,0 +1,178 @@
+// $Id: readTree.cpp 5525 2008-12-19 20:17:05Z itaymay $
+
+#include "definitions.h"
+#include "errorMsg.h"
+#include "someUtil.h"
+#include "readTree.h"
+#include <iostream>
+using namespace std;
+
+
+
+
+
+// forward declarations
+
+//----------------------------------------------------------------------------------------------
+// about reading tree topology from files:
+// usually a tree topology is represented by a line like this
+// (((Langur:0.8,Baboon:0.55):0.3,Human:0.44):0.5,Rat:0.02,(Cow:0.2,Horse:0.04):0.03);
+// the syntax of such a line is (part, part, part, part)
+// where part is either (part,part, part, ...):distace or name:distance
+// or without the distance!
+// it should notice that the tree is unrooted.
+// if we look at the above file format, one can notice that the number of comas (",") is
+// always one less than the number of leaves (synonyms for leaves are OTUs and external nodes)
+// the function GetNumberOfLeaves counts the numnber of comas and returns the number of leaves.
+// in the example below there are 6 leaves.
+
+//*******************************************************************************
+// constructors
+//*******************************************************************************
+
+
+
+
+
+vector<char> PutTreeFileIntoVector(istream &in) {
+ vector<char> tree_contents;
+ bool endWithDotComa = false;
+ char chTemp;
+ while (( !in.eof()) && (tree_contents.size() < MAX_FILE_SIZE))
+ {
+ in.get(chTemp);
+#ifdef WIN32
+ if (chTemp == -52) return tree_contents; //tal addition.
+#endif
+ if ( !isspace( chTemp ) )
+ tree_contents.push_back(chTemp);
+ if (chTemp == ';') {
+ endWithDotComa = true;
+ break;
+ }
+ }
+
+ if (tree_contents.size() >= MAX_FILE_SIZE) {
+ vector<string> err;
+ err.push_back("Error reading tree file. The tree file is too large");
+ errorMsg::reportError(err,1); // also quit the program
+ }
+ if (endWithDotComa == false) tree_contents.clear(); // remove junk from the last ; till the end of the file.
+ return tree_contents;
+}
+
+
+
+
+int GetNumberOfLeaves(const vector<char> &tree_contents) {
+ int iCommasCounter = 0;
+ vector<char>::const_iterator itCurrent = tree_contents.begin();
+ for ( ; itCurrent != tree_contents.end(); ++itCurrent ) {
+ if (*itCurrent==COMMA)
+ ++iCommasCounter;
+ }
+ return ++iCommasCounter; //#leaves is always one more than number of comas
+}
+
+int GetNumberOfInternalNodes(const vector<char> &tree_contents) {
+ int iCloseCounter = 0;
+ vector<char>::const_iterator itCurrent = tree_contents.begin();
+ for ( ; itCurrent != tree_contents.end(); ++itCurrent ) {
+ if (*itCurrent==CLOSING_BRACE) ++iCloseCounter;
+ if (*itCurrent==CLOSING_BRACE2) ++iCloseCounter;
+ }
+ return iCloseCounter; //number of HTUs is always the number of ")"
+}
+
+
+bool verifyChar(vector<char>::const_iterator &p_itCurrent, const char p_cCharToFind) {
+ if ( (*p_itCurrent)==p_cCharToFind ) return true;
+ return false;
+}
+
+
+
+
+// IsAtomicPart decides whether we will now read a taxa name (return true),
+// or read an OPENING_BRACE which will say us, that we will read a complicated strucure.
+bool IsAtomicPart(const vector<char>::const_iterator p_itCurrent) {
+ if ( (*p_itCurrent)==OPENING_BRACE ) return false;
+ else if ( (*p_itCurrent)==OPENING_BRACE2 ) return false;
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+// there are 2 options for the tree format.
+// either (name1:0.43, name2: 0.45 , (name3 : 2 , name 4: 5) : 3.332)
+// or without the distances (name1, name2 , (name3 , name4) )
+// here we return true if the tree file is with the distance, or false, if the tree file
+// has not distances.
+// if distances exist: after the name there will always be a colon
+// if distance exist, also move the iterator, to the beggining of the number
+//-----------------------------------------------------------------------------
+bool DistanceExists(vector<char>::const_iterator& p_itCurrent) {
+
+ if ((*p_itCurrent)==COLON ) {
+ ++p_itCurrent;
+ return true;
+ }
+ return false;
+}
+
+void clearPosibleComment(vector<char>::const_iterator& p_itCurrent) {
+ if ((*p_itCurrent)=='[' ) {
+ while (*(++p_itCurrent) != ']');
+ ++p_itCurrent; // move over "]"
+ }
+}
+
+string readPosibleComment(vector<char>::const_iterator& p_itCurrent) {
+ string comment = "";
+
+ if ((*p_itCurrent)=='[' )
+ {
+ vector<char>::const_iterator tmp= (p_itCurrent+1);
+ if ((*tmp++)=='&' &&
+ (*tmp++)=='&' &&
+ (*tmp++)=='N' &&
+ (*tmp++)=='H' &&
+ (*tmp++)=='X') // see http://www.genetics.wustl.edu/eddy/forester/NHX.pdf
+ // [&&NHX...]
+ {
+ p_itCurrent += 5;
+ while (*(++p_itCurrent) != ']')
+ {
+ comment += *(p_itCurrent);
+ }
+ ++p_itCurrent; // move over "]"
+ }
+ else // [...]
+ {
+ // Skip over the text in []
+ ++p_itCurrent;
+ while (*(p_itCurrent) != ']')
+ ++p_itCurrent;
+ ++p_itCurrent; // move over "]"
+
+ }
+ }
+ if (comment.size())
+ LOG(10,<<"comment ="<<comment<<endl);
+
+ return comment;
+}
+
+
+
+MDOUBLE getDistance(vector<char>::const_iterator &p_itCurrent) {
+ string sTempNumber;
+ for ( ; isdigit(*p_itCurrent) || (*p_itCurrent)==PERIOD || (*p_itCurrent)=='E'|| (*p_itCurrent)=='e'|| (*p_itCurrent)=='-' || (*p_itCurrent)=='+'; ++p_itCurrent)
+ sTempNumber += (*p_itCurrent);
+ MDOUBLE dDistance = string2double(sTempNumber);
+ return dDistance;
+}
+
+
+
+
+
diff --git a/libs/phylogeny/readTree.h b/libs/phylogeny/readTree.h
new file mode 100644
index 0000000..efe9744
--- /dev/null
+++ b/libs/phylogeny/readTree.h
@@ -0,0 +1,40 @@
+// $Id: readTree.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___READ_TREE
+#define ___READ_TREE
+#include "definitions.h"
+#include <iostream>
+using namespace std;
+
+#define REMARK ';'
+#define MAX_LENGTH_OF_NAME 20
+#define MAX_FILE_SIZE 1000000
+#define FATHER 0
+#define LEFT 1
+#define RIGHT 2
+
+#define OPENING_BRACE '('
+#define CLOSING_BRACE ')'
+#define OPENING_BRACE2 '{'
+#define CLOSING_BRACE2 '}'
+#define COMMA ','
+#define COLON ':'
+#define SEMI_COLLON ';'
+#define PERIOD '.'
+
+
+
+bool DistanceExists(vector<char>::const_iterator& p_itCurrent);
+bool verifyChar(vector<char>::const_iterator &p_itCurrent, const char p_cCharToFind);
+int GetNumberOfLeaves(const vector<char>& tree_contents);
+int GetNumberOfInternalNodes(const vector<char>& tree_contents);
+bool IsAtomicPart(const vector<char>::const_iterator p_itCurrent);
+vector<char> PutTreeFileIntoVector(istream &in);
+
+MDOUBLE getDistance(vector<char>::const_iterator &p_itCurrent);
+bool DistanceExists(vector<char>::const_iterator& p_itCurrent);
+
+void clearPosibleComment(vector<char>::const_iterator& p_itCurrent);
+string readPosibleComment(vector<char>::const_iterator& p_itCurrent);
+#endif
+
diff --git a/libs/phylogeny/recognizeFormat.cpp b/libs/phylogeny/recognizeFormat.cpp
new file mode 100644
index 0000000..83a191f
--- /dev/null
+++ b/libs/phylogeny/recognizeFormat.cpp
@@ -0,0 +1,86 @@
+// $Id: recognizeFormat.cpp 1813 2007-03-01 09:29:48Z adist $
+
+#include "recognizeFormat.h"
+#include "maseFormat.h"
+#include "sequenceContainer.h"
+#include "molphyFormat.h"
+#include "phylipFormat.h"
+#include "nexusFormat.h"
+#include "fastaFormat.h"
+#include "clustalFormat.h"
+#include "nexusFormat.h"
+#include "phylipSequentialFormat.h"
+
+
+sequenceContainer recognizeFormat::read(istream &infile, const alphabet* alph) {
+ sequenceContainer mySeqData = readUnAligned(infile, alph);
+ mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
+ return mySeqData;
+}
+
+sequenceContainer recognizeFormat::readUnAligned(istream &infile, const alphabet* alph) {
+ // recognize a format and returns the sequence container of it.
+ sequenceContainer sc;
+ if (!infile){
+ string tmp = "error unable to open sequence input file ";
+ errorMsg::reportError(tmp);
+ }
+
+ // this part eats spaces, tabs and such.
+ char check = infile.peek();
+ while ((check==' ') || (check == '\n') || (check == '\t')) {
+ infile.get();
+ check = infile.peek();
+ }
+
+ switch (check){
+ case '#':
+ sc=nexusFormat::readUnAligned(infile,alph);
+ break;
+ case '>':
+ sc=fastaFormat::readUnAligned(infile,alph);
+ break;
+ case 'C':
+ sc=clustalFormat::readUnAligned(infile,alph);
+ break;
+ case ';':
+ sc=maseFormat::readUnAligned(infile,alph);
+ break;
+
+ default:
+ if (isdigit(check)){
+ // here it can be either MOLPHY format or one of the PHYLIP type formats (interleaved, sequential)
+ // in PHYLIP format there are lines that are not empty, but the first 10 characters
+ // are space.
+ string s;
+ getline(infile,s, '\n' ); // read the first line which are numbers in both formats
+ getline(infile,s, '\n' ); // read the second line
+ bool phylipFormat = false;
+ int r = s.find_first_of(' '); // if there is a space somewhere - this is phylip format
+ if ((r==(s.size()-1)) || (r==-1)) phylipFormat = false;
+ else phylipFormat = true;
+
+
+ if (phylipFormat == false) {
+ infile.seekg(0, ios::beg); // file return to the beginning
+ sc=molphyFormat::readUnAligned(infile,alph);
+ } else {
+ getline(infile,s, '\n' ); // read the third line: interleaved will begin with a space, sequential not
+ if (s[0] == ' ')
+ sc = phylipSequentialFormat::readUnAligned(infile, alph);
+ else
+ sc = phylipFormat::readUnAligned(infile,alph);
+ }
+ }
+ else{
+ string line;
+ getline(infile, line, '\n');
+ string tmp2 = "The program can't recognise your format!";
+ tmp2+="\nThis is the first line in your format:\n";
+ tmp2+=line;
+ errorMsg::reportError(tmp2);
+ }
+ break;
+ }
+ return sc;
+}
diff --git a/libs/phylogeny/recognizeFormat.h b/libs/phylogeny/recognizeFormat.h
new file mode 100644
index 0000000..cde704d
--- /dev/null
+++ b/libs/phylogeny/recognizeFormat.h
@@ -0,0 +1,19 @@
+// $Id: recognizeFormat.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___RECOGNIZE_FORMAT
+#define ___RECOGNIZE_FORMAT
+
+#include "sequenceContainer.h"
+
+class recognizeFormat{
+public:
+ static sequenceContainer read(istream &infile, const alphabet* alph);
+ static void write(ostream &out, const sequenceContainer& sd);
+ //readUnAligned: the input sequences do not need to be aligned (not all sequences are the same length).
+ static sequenceContainer readUnAligned(istream &infile, const alphabet* alph);
+};
+
+#endif
+
+
+
diff --git a/libs/phylogeny/replacementMatrixSource/HIVBetween.dat b/libs/phylogeny/replacementMatrixSource/HIVBetween.dat
new file mode 100644
index 0000000..6f532a8
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/HIVBetween.dat
@@ -0,0 +1,46 @@
+ACDEFGHIKLMNPQRSTVWY
+
+{{ 0, 0.065662251, 0.77200021, 0.7859595, 0.0074953058, 1.1329574, 0.044971782, 0.0026528488, 0.0026528488, 0.11420832, 0.009902713, 0.0026528488, 1.1259592, 0.029241185, 0.16315391, 1.3085601, 8.4457685, 4.0399067, 0.0026528488, 0.0026528488}
+{ 0.065662251, 0, 0.0026528488, 0.0026528488, 4.9333171, 0.47638319, 0.12737547, 0.0026528488, 0.0026528488, 0.068855751, 0.0026528488, 0.045663061, 0.0026528488, 0.0026528488, 0.18661252, 2.4900381, 0.39260517, 0.22285362, 1.3968681, 4.0213579}
+{ 0.77200021, 0.0026528488, 0, 5.6172481, 0.0026528488, 1.5057888, 1.0170492, 0.0093800488, 0.0026528488, 0.0046480457, 0.0026528488, 9.3704985, 0.018180397, 0.0026528488, 0.0026528488, 0.28026286, 0.14576024, 0.55599996, 0.0026528488, 0.35795048}
+{ 0.7859595, 0.0026528488, 5.6172481, 0, 0.0026528488, 2.0839453, 0.063530422, 0.0032315889, 2.4484839, 0.0026528488, 0.093268326, 0.042054709, 0.0063788279, 1.3583647, 0.039751241, 0.0026528488, 0.15374532, 0.54567507, 0.0026528488, 0.042054709}
+{ 0.0074953058, 4.9333171, 0.0026528488, 0.0026528488, 0, 0.15469345, 0.077228672, 1.803067, 0.018180397, 4.5230222, 0.099760378, 0.0026528488, 0.0026528488, 0.0026528488, 0.0026528488, 0.50747511, 0.0074953058, 0.38374731, 0.44002431, 8.13894}
+{ 1.1329574, 0.47638319, 1.5057888, 2.0839453, 0.15469345, 0, 0.0026528488, 0.0026528488, 0.27680089, 0.0026528488, 0.0026528488, 0.17158679, 0.0026528488, 0.032849536, 1.9384101, 2.324113, 0.19610654, 0.50571521, 0.64556544, 0.0026528488}
+{ 0.044971782, 0.12737547, 1.0170492, 0.063530422, 0.077228672, 0.0026528488, 0, 0.054707578, 0.0026528488, 0.92409864, 0.0026528488, 4.0566567, 1.3015831, 3.7434084, 4.796584, 0.20307398, 0.37755025, 0.0026528488, 0.036884095, 9.9186301}
+{ 0.0026528488, 0.0026528488, 0.0093800488, 0.0032315889, 1.803067, 0.0026528488, 0.054707578, 0, 0.17101271, 3.1615537, 5.9458299, 0.3610872, 0.021784823, 0.0026528488, 0.35934906, 0.64624988, 4.5693569, 9.4117238, 0.0026528488, 0.078613459}
+{ 0.0026528488, 0.0026528488, 0.0026528488, 2.4484839, 0.018180397, 0.27680089, 0.0026528488, 0.17101271, 0, 0.04324117, 0.68043448, 4.1938515, 0.016652568, 3.4738365, 10.850151, 0.26746605, 2.4785142, 0.14104083, 0.0026528488, 0.0026528488}
+{ 0.11420832, 0.068855751, 0.0046480457, 0.0026528488, 4.5230222, 0.0026528488, 0.92409864, 3.1615537, 0.04324117, 0, 2.8224242, 0.0026528488, 1.1022958, 0.79296833, 0.37215595, 0.49218621, 0.023221606, 0.74829436, 0.39731344, 0.059416384}
+{ 0.009902713, 0.0026528488, 0.0026528488, 0.093268326, 0.099760378, 0.0026528488, 0.0026528488, 5.9458299, 0.68043448, 2.8224242, 0, 0.0026528488, 0.0026528488, 0.1611213, 1.3338205, 0.0026528488, 2.6211525, 3.6361006, 0.047262092, 0.0026528488}
+{ 0.0026528488, 0.045663061, 9.3704985, 0.042054709, 0.0026528488, 0.17158679, 4.0566567, 0.3610872, 4.1938515, 0.0026528488, 0.0026528488, 0, 0.0039239772, 0.35657046, 0.15680618, 6.9741802, 3.6538588, 0.014142867, 0.0026528488, 0.93601524}
+{ 1.1259592, 0.0026528488, 0.018180397, 0.0063788279, 0.0026528488, 0.0026528488, 1.3015831, 0.021784823, 0.016652568, 1.1022958, 0.0026528488, 0.0039239772, 0, 2.3727663, 0.68101281, 2.8532025, 1.0686577, 0.0026528488, 0.023584144, 0.016149535}
+{ 0.029241185, 0.0026528488, 0.0026528488, 1.3583647, 0.0026528488, 0.032849536, 3.7434084, 0.0026528488, 3.4738365, 0.79296833, 0.1611213, 0.35657046, 2.3727663, 0, 1.8153444, 0.061711098, 0.12924096, 0.011097026, 0.014142867, 0.059971891}
+{ 0.16315391, 0.18661252, 0.0026528488, 0.039751241, 0.0026528488, 1.9384101, 4.796584, 0.35934906, 10.850151, 0.37215595, 1.3338205, 0.15680618, 0.68101281, 1.8153444, 0, 1.8459052, 1.5220348, 0.043106352, 0.52597396, 0.0052623288}
+{ 1.3085601, 2.4900381, 0.28026286, 0.0026528488, 0.50747511, 2.324113, 0.20307398, 0.64624988, 0.26746605, 0.49218621, 0.0026528488, 6.9741802, 2.8532025, 0.061711098, 1.8459052, 0, 4.7385556, 0.039751241, 0.013196755, 0.34382193}
+{ 8.4457685, 0.39260517, 0.14576024, 0.15374532, 0.0074953058, 0.19610654, 0.37755025, 4.5693569, 2.4785142, 0.023221606, 2.6211525, 3.6538588, 1.0686577, 0.12924096, 1.5220348, 4.7385556, 0, 0.37629386, 0.0026528488, 0.056055755}
+{ 4.0399067, 0.22285362, 0.55599996, 0.54567507, 0.38374731, 0.50571521, 0.0026528488, 9.4117238, 0.14104083, 0.74829436, 3.6361006, 0.014142867, 0.0026528488, 0.011097026, 0.043106352, 0.039751241, 0.37629386, 0, 0.0026528488, 0.021784823}
+{ 0.0026528488, 1.3968681, 0.0026528488, 0.0026528488, 0.44002431, 0.64556544, 0.036884095, 0.0026528488, 0.0026528488, 0.39731344, 0.047262092, 0.0026528488, 0.023584144, 0.014142867, 0.52597396, 0.013196755, 0.0026528488, 0.0026528488, 0, 0.67924601}
+{ 0.0026528488, 4.0213579, 0.35795048, 0.042054709, 8.13894, 0.0026528488, 9.9186301, 0.078613459, 0.0026528488, 0.059416384, 0.0026528488, 0.93601524, 0.016149535, 0.059971891, 0.0052623288, 0.34382193, 0.056055755, 0.021784823, 0.67924601, 0}
+}
+
+
+{{ 0.060490222}
+{ 0.020075899}
+{ 0.042109048}
+{ 0.071567447}
+{ 0.028809447}
+{ 0.072308239}
+{ 0.022293943}
+{ 0.069730629}
+{ 0.056968211}
+{ 0.098851122}
+{ 0.019768318}
+{ 0.044127815}
+{ 0.046025282}
+{ 0.053606488}
+{ 0.066039665}
+{ 0.05060433}
+{ 0.053636813}
+{ 0.061625237}
+{ 0.033011601}
+{ 0.028350243}
+}
diff --git a/libs/phylogeny/replacementMatrixSource/HIVWithin.dat b/libs/phylogeny/replacementMatrixSource/HIVWithin.dat
new file mode 100644
index 0000000..8c891fb
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/HIVWithin.dat
@@ -0,0 +1,46 @@
+ACDEFGHIKLMNPQRSTVWY
+
+{{ 0, 0.049094712, 1.2987859, 1.6291158, 0.17509295, 0.54716271, 0.0014641764, 0.0014641764, 0.17358807, 0.046923924, 0.0014641764, 0.18082842, 0.29570799, 0.0014641764, 0.021810606, 2.5166849, 7.0696878, 7.2650675, 0.0014641764, 0.0014641764}
+{ 0.049094712, 0, 0.0014641764, 0.0014641764, 0.1062872, 0.014343013, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.017714543, 0.0014641764, 0.0014641764, 0.83857481, 0.32854654, 0.0014641764, 0.0014641764, 1.6102836, 2.4446914}
+{ 1.2987859, 0.0014641764, 0, 3.5501299, 0.0014641764, 3.0445791, 0.67873067, 0.042497426, 0.26188639, 0.0014641764, 0.0014641764, 8.6119047, 0.0014641764, 0.0014641764, 0.019752881, 0.12529865, 0.18460189, 0.85445233, 0.0014641764, 0.66811539}
+{ 1.6291158, 0.0014641764, 3.5501299, 0, 0.0014641764, 4.3281346, 0.0014641764, 0.011435569, 7.0170946, 0.038021439, 0.0014641764, 0.059013922, 0.0014641764, 0.93899388, 0.0073686726, 0.0014641764, 0.13433613, 0.64409704, 0.0014641764, 0.0014641764}
+{ 0.17509295, 0.1062872, 0.0014641764, 0.0014641764, 0, 0.0014641764, 0.0014641764, 0.43423957, 0.0014641764, 2.1926949, 0.0014641764, 0.0014641764, 0.010022346, 0.0014641764, 0.0014641764, 1.2531563, 0.033533153, 0.66766443, 0.0014641764, 1.2086132}
+{ 0.54716271, 0.014343013, 3.0445791, 4.3281346, 0.0014641764, 0, 0.0014641764, 0.0014641764, 0.081825497, 0.014343013, 0.014343013, 0.017714543, 0.0014641764, 0.017714543, 3.9350911, 1.838906, 0.014343013, 0.81883185, 0.82749392, 0.0014641764}
+{ 0.0014641764, 0.0014641764, 0.67873067, 0.0014641764, 0.0014641764, 0.0014641764, 0, 0.0014641764, 0.065612672, 0.51650871, 0.0014641764, 2.5180202, 4.0834122, 5.4310694, 2.0041793, 0.21235155, 0.28099302, 0.24231504, 0.0014641764, 13.906425}
+{ 0.0014641764, 0.0014641764, 0.042497426, 0.011435569, 0.43423957, 0.0014641764, 0.0014641764, 0, 0.23938727, 2.6655214, 5.0679244, 0.28903662, 0.0014641764, 0.010022346, 0.39260132, 0.21672475, 2.7419485, 7.2690793, 0.0014641764, 0.033533153}
+{ 0.17358807, 0.0014641764, 0.26188639, 7.0170946, 0.0014641764, 0.081825497, 0.065612672, 0.23938727, 0, 0.0014641764, 1.1993479, 3.1232346, 0.032776467, 3.8275035, 11.681111, 0.0014641764, 1.185403, 0.037501949, 0.0014641764, 0.0014641764}
+{ 0.046923924, 0.0014641764, 0.0014641764, 0.038021439, 2.1926949, 0.014343013, 0.51650871, 2.6655214, 0.0014641764, 0, 3.3336075, 0.0014641764, 2.8788489, 0.8464345, 0.17182315, 1.7991682, 0.0014641764, 0.86487141, 0.40127511, 0.0014641764}
+{ 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.0014641764, 0.014343013, 0.0014641764, 5.0679244, 1.1993479, 3.3336075, 0, 0.059013922, 0.0014641764, 0.0014641764, 0.96240899, 0.11495981, 2.170826, 4.3246792, 0.0014641764, 0.16960961}
+{ 0.18082842, 0.017714543, 8.6119047, 0.059013922, 0.0014641764, 0.017714543, 2.5180202, 0.28903662, 3.1232346, 0.0014641764, 0.059013922, 0, 0.10098366, 0.10016958, 0.046923924, 4.2665807, 1.3300754, 0.021810606, 0.0014641764, 1.4831375}
+{ 0.29570799, 0.0014641764, 0.0014641764, 0.0014641764, 0.010022346, 0.0014641764, 4.0834122, 0.0014641764, 0.032776467, 2.8788489, 0.0014641764, 0.10098366, 0, 0.89168927, 0.11851717, 4.1726098, 1.2700295, 0.0014641764, 0.0014641764, 0.0014641764}
+{ 0.0014641764, 0.0014641764, 0.0014641764, 0.93899388, 0.0014641764, 0.017714543, 5.4310694, 0.010022346, 3.8275035, 0.8464345, 0.0014641764, 0.10016958, 0.89168927, 0, 3.1258994, 0.046923924, 0.059472209, 0.0014641764, 0.012981329, 0.0014641764}
+{ 0.021810606, 0.83857481, 0.019752881, 0.0073686726, 0.0014641764, 3.9350911, 2.0041793, 0.39260132, 11.681111, 0.17182315, 0.96240899, 0.046923924, 0.11851717, 3.1258994, 0, 2.4452448, 0.27181058, 0.081825497, 1.7469498, 0.0014641764}
+{ 2.5166849, 0.32854654, 0.12529865, 0.0014641764, 1.2531563, 1.838906, 0.21235155, 0.21672475, 0.0014641764, 1.7991682, 0.11495981, 4.2665807, 4.1726098, 0.046923924, 2.4452448, 0, 1.856807, 0.25261054, 0.32257563, 0.27325689}
+{ 7.0696878, 0.0014641764, 0.18460189, 0.13433613, 0.033533153, 0.014343013, 0.28099302, 2.7419485, 1.185403, 0.0014641764, 2.170826, 1.3300754, 1.2700295, 0.059472209, 0.27181058, 1.856807, 0, 0.0014641764, 0.0014641764, 0.14366733}
+{ 7.2650675, 0.0014641764, 0.85445233, 0.64409704, 0.66766443, 0.81883185, 0.24231504, 7.2690793, 0.037501949, 0.86487141, 4.3246792, 0.021810606, 0.0014641764, 0.0014641764, 0.081825497, 0.25261054, 0.0014641764, 0, 0.0014641764, 0.39673909}
+{ 0.0014641764, 1.6102836, 0.0014641764, 0.0014641764, 0.0014641764, 0.82749392, 0.0014641764, 0.0014641764, 0.0014641764, 0.40127511, 0.0014641764, 0.0014641764, 0.0014641764, 0.012981329, 1.7469498, 0.32257563, 0.0014641764, 0.0014641764, 0, 0.0014641764}
+{ 0.0014641764, 2.4446914, 0.66811539, 0.0014641764, 1.2086132, 0.0014641764, 13.906425, 0.033533153, 0.0014641764, 0.0014641764, 0.16960961, 1.4831375, 0.0014641764, 0.0014641764, 0.0014641764, 0.27325689, 0.14366733, 0.39673909, 0.0014641764, 0}
+}
+
+
+{{ 0.0377494}
+{ 0.0240105}
+{ 0.0342034}
+{ 0.0618606}
+{ 0.0422741}
+{ 0.0838496}
+{ 0.0156076}
+{ 0.0983641}
+{ 0.0641682}
+{ 0.0577867}
+{ 0.0158419}
+{ 0.0891129}
+{ 0.0458601}
+{ 0.0437824}
+{ 0.057321}
+{ 0.0550846}
+{ 0.0813774}
+{ 0.0515639}
+{ 0.019597}
+{ 0.0205847}
+}
diff --git a/libs/phylogeny/replacementMatrixSource/cpREV45.dat b/libs/phylogeny/replacementMatrixSource/cpREV45.dat
new file mode 100644
index 0000000..46d785a
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/cpREV45.dat
@@ -0,0 +1,24 @@
+ 105
+ 227 357
+ 175 43 4435
+ 669 823 538 10
+ 157 1745 768 400 10
+ 499 152 1055 3691 10 3122
+ 665 243 653 431 303 133 379
+ 66 715 1405 331 441 1269 162 19
+ 145 136 168 10 280 92 148 40 29
+ 197 203 113 10 396 286 82 20 66 1745
+ 236 4482 2430 412 48 3313 2629 263 305 345 218
+ 185 125 61 47 159 202 113 21 10 1772 1351 193
+ 68 53 97 22 726 10 145 25 127 454 1268 72 327
+ 490 87 173 170 285 323 185 28 152 117 219 302 100 43
+ 2440 385 2085 590 2331 396 568 691 303 216 516 868 93 487 1202
+ 1340 314 1393 266 576 241 369 92 32 1040 156 918 645 148 260 2151
+ 14 230 40 18 435 53 63 82 69 42 159 10 86 468 49 73 29
+ 56 323 754 281 1466 391 142 10 1971 89 189 247 215 2370 97 522 71 346
+ 968 92 83 75 592 54 200 91 25 4797 865 249 475 317 122 167 760 10 119
+
+0.076 0.062 0.041 0.037 0.009 0.038 0.049 0.084 0.025 0.081
+0.101 0.050 0.022 0.051 0.043 0.062 0.054 0.018 0.031 0.066
+
+cpREV45 model
diff --git a/libs/phylogeny/replacementMatrixSource/dayhoff.dat b/libs/phylogeny/replacementMatrixSource/dayhoff.dat
new file mode 100644
index 0000000..fe90d46
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/dayhoff.dat
@@ -0,0 +1,93 @@
+ 27
+ 98 32
+120 0 905
+ 36 23 0 0
+ 89 246 103 134 0
+198 1 148 1153 0 716
+240 9 139 125 11 28 81
+ 23 240 535 86 28 606 43 10
+ 65 64 77 24 44 18 61 0 7
+ 41 15 34 0 0 73 11 7 44 257
+ 26 464 318 71 0 153 83 27 26 46 18
+ 72 90 1 0 0 114 30 17 0 336 527 243
+ 18 14 14 0 0 0 0 15 48 196 157 0 92
+250 103 42 13 19 153 51 34 94 12 32 33 17 11
+409 154 495 95 161 56 79 234 35 24 17 96 62 46 245
+371 26 229 66 16 53 34 30 22 192 33 136 104 13 78 550
+ 0 201 23 0 0 0 0 0 27 0 46 0 0 76 0 75 0
+ 24 8 95 0 96 0 22 0 127 37 28 13 0 698 0 34 42 61
+208 24 15 18 49 35 37 54 44 889 175 10 258 12 48 30 157 0 28
+
+0.087127 0.040904 0.040432 0.046872 0.033474 0.038255 0.049530
+0.088612 0.033618 0.036886 0.085357 0.080482 0.014753 0.039772
+0.050680 0.069577 0.058542 0.010494 0.029916 0.064718
+
+Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+S_ij = S_ji and PI_i for the Dayhoff model, with the rate Q_ij=S_ij*PI_j
+The rest of the file is not used.
+Prepared by Z. Yang, March 1995.
+
+
+See the following reference for notation used here:
+
+Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
+applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.
+
+
+-----------------------------------------------------------------------
+
+
+ 30
+109 17
+154 0 532
+ 33 10 0 0
+ 93 120 50 76 0
+266 0 94 831 0 422
+579 10 156 162 10 30 112
+ 21 103 226 43 10 243 23 10
+ 66 30 36 13 17 8 35 0 3
+ 95 17 37 0 0 75 15 17 40 253
+ 57 477 322 85 0 147 104 60 23 43 39
+ 29 17 0 0 0 20 7 7 0 57 207 90
+ 20 7 7 0 0 0 0 17 20 90 167 0 17
+345 67 27 10 10 93 40 49 50 7 43 43 4 7
+772 137 432 98 117 47 86 450 26 20 32 168 20 40 269
+590 20 169 57 10 37 31 50 14 129 52 200 28 10 73 696
+ 0 27 3 0 0 0 0 0 3 0 13 0 0 10 0 17 0
+ 20 3 36 0 30 0 10 0 40 13 23 10 0 260 0 22 23 6
+365 20 13 17 33 27 37 97 30 661 303 17 77 10 50 43 186 0 17
+ A R N D C Q E G H I L K M F P S T W Y V
+Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+Accepted point mutations (x10) Figure 80 (Dayhoff 1978)
+-------------------------------------------------------
+
+A 100 /* Ala */ A 0.087 /* Ala */
+R 65 /* Arg */ R 0.041 /* Arg */
+N 134 /* Asn */ N 0.040 /* Asn */
+D 106 /* Asp */ D 0.047 /* Asp */
+C 20 /* Cys */ C 0.033 /* Cys */
+Q 93 /* Gln */ Q 0.038 /* Gln */
+E 102 /* Glu */ E 0.050 /* Glu */
+G 49 /* Gly */ G 0.089 /* Gly */
+H 66 /* His */ H 0.034 /* His */
+I 96 /* Ile */ I 0.037 /* Ile */
+L 40 /* Leu */ L 0.085 /* Leu */
+K 56 /* Lys */ K 0.081 /* Lys */
+M 94 /* Met */ M 0.015 /* Met */
+F 41 /* Phe */ F 0.040 /* Phe */
+P 56 /* Pro */ P 0.051 /* Pro */
+S 120 /* Ser */ S 0.070 /* Ser */
+T 97 /* Thr */ T 0.058 /* Thr */
+W 18 /* Trp */ W 0.010 /* Trp */
+Y 41 /* Tyr */ Y 0.030 /* Tyr */
+V 74 /* Val */ V 0.065 /* Val */
+
+scale factor = SUM_OF_PRODUCT = 75.246
+
+
+Relative Mutability The equilibrium freqs.
+(Table 21) Table 22
+(Dayhoff 1978) Dayhoff (1978)
+----------------------------------------------------------------
diff --git a/libs/phylogeny/replacementMatrixSource/jones.dat b/libs/phylogeny/replacementMatrixSource/jones.dat
new file mode 100644
index 0000000..05d7206
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/jones.dat
@@ -0,0 +1,150 @@
+ 58
+ 54 45
+ 81 16 528
+ 56 113 34 10
+ 57 310 86 49 9
+105 29 58 767 5 323
+179 137 81 130 59 26 119
+ 27 328 391 112 69 597 26 23
+ 36 22 47 11 17 9 12 6 16
+ 30 38 12 7 23 72 9 6 56 229
+ 35 646 263 26 7 292 181 27 45 21 14
+ 54 44 30 15 31 43 18 14 33 479 388 65
+ 15 5 10 4 78 4 5 5 40 89 248 4 43
+194 74 15 15 14 164 18 24 115 10 102 21 16 17
+378 101 503 59 223 53 30 201 73 40 59 47 29 92 285
+475 64 232 38 42 51 32 33 46 245 25 103 226 12 118 477
+ 9 126 8 4 115 18 10 55 8 9 52 10 24 53 6 35 12
+ 11 20 70 46 209 24 7 8 573 32 24 8 18 536 10 63 21 71
+298 17 16 31 62 20 45 47 11 961 180 14 323 62 23 38 112 25 16
+
+0.076748 0.051691 0.042645 0.051544 0.019803 0.040752 0.061830
+0.073152 0.022944 0.053761 0.091904 0.058676 0.023826 0.040126
+0.050901 0.068765 0.058565 0.014261 0.032102 0.066005
+
+Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+S_ij = S_ji and PI_i for the Jones model based on the SWISSPROT
+Version 22 data.
+Rate Q_ij=S_ij*PI_j.
+The rest of the file is not used.
+Prepared by Z. Yang, March 1995.
+
+See the following reference for notation:
+
+Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
+applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.
+
+-----------------------------------------------------------------------
+
+ 426
+ 333 185
+ 596 80 2134
+ 159 214 54 20
+ 332 1203 277 192 14
+ 920 176 286 4497 11 1497
+ 1853 954 470 907 158 144 999
+ 88 716 704 244 58 1027 69 71
+ 286 114 198 59 34 37 72 44 37
+ 394 332 88 62 79 497 101 80 217 2086
+ 294 3606 1209 148 15 1289 1210 215 115 121 140
+ 185 100 56 34 27 78 50 47 33 1129 1567 167
+ 84 21 33 16 115 14 23 28 69 354 1690 17 76
+ 1395 360 64 74 27 629 106 171 249 54 882 117 36 66
+ 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839
+ 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527
+ 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18
+ 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60
+ 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63
+
+ A R N D C Q E G H I L K M F P S T W Y V
+ Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+Accepted point mutations (x10), similar to Figure 80 of Dayhoff et
+al. (1978). SwissProt version 22 data.
+------------------------------------------------------------------------------
+
+ 256458 426 333 596 159 332 920 1853 88 286 394 294 185 84 1395 3664 3920 19 49 2771
+ 426 182302 185 80 214 1203 176 954 716 114 332 3606 100 21 360 661 360 171 62 111
+ 333 185 150772 2134 54 277 286 470 704 198 88 1209 56 33 64 2706 1069 9 178 86
+ 596 80 2134 178390 20 192 4497 907 244 59 62 148 34 16 74 390 216 5 142 195
+ 159 214 54 20 68120 14 11 158 58 34 79 15 27 115 27 559 91 60 246 150
+ 332 1203 277 192 14 139546 1497 144 1027 37 497 1289 78 14 629 278 227 20 59 100
+ 920 176 286 4497 11 1497 218432 999 69 72 101 1210 50 23 106 236 217 17 26 336
+ 1853 954 470 907 158 144 999 255274 71 44 80 215 47 28 171 1861 266 106 34 420
+ 88 716 704 244 58 1027 69 71 77124 37 217 115 33 69 249 214 116 5 777 32
+ 286 114 198 59 34 37 72 44 37 191018 2086 121 1129 354 54 274 1420 13 102 6260
+ 394 332 88 62 79 497 101 80 217 2086 319504 140 1567 1690 882 691 256 127 131 2020
+ 294 3606 1209 148 15 1289 1210 215 115 121 140 206568 167 17 117 351 653 16 30 99
+ 185 100 56 34 27 78 50 47 33 1129 1567 167 84670 76 36 89 579 15 25 937
+ 84 21 33 16 115 14 23 28 69 354 1690 17 76 143088 66 468 54 56 1276 307
+ 1395 360 64 74 27 629 106 171 249 54 882 117 36 66 175488 1839 653 8 32 142
+ 3664 661 2706 390 559 278 236 1861 214 274 691 351 89 468 1839 234536 3527 64 259 320
+ 3920 360 1069 216 91 227 217 266 116 1420 256 653 579 54 653 3527 203636 18 73 805
+ 19 171 9 5 60 20 17 106 5 13 127 16 15 56 8 64 18 50486 60 44
+ 49 62 178 142 246 59 26 34 777 102 131 30 25 1276 32 259 73 60 114728 63
+ 2771 111 86 195 150 100 336 420 32 6260 2020 99 937 307 142 320 805 44 63 223724
+
+Observed difference counts from pairwise comparisons, with ancestral sequences
+constructed by parsimony. F(t) = PI*P(t).
+Based on the SwissProt 22 data, kindly provided by D. Jones (Jones et al. 1992)
+-------------------------------------------------------------------------------
+
+
+Ala 0.98754 0.00030 0.00023 0.00042 0.00011 0.00023 0.00065 0.00130 0.00006 0.00020 0.00028 0.00021 0.00013 0.00006 0.00098 0.00257 0.00275 0.00001 0.00003 0.00194
+Arg 0.00044 0.98974 0.00019 0.00008 0.00022 0.00125 0.00018 0.00099 0.00075 0.00012 0.00035 0.00376 0.00010 0.00002 0.00037 0.00069 0.00037 0.00018 0.00006 0.00012
+Asn 0.00042 0.00023 0.98720 0.00269 0.00007 0.00035 0.00036 0.00059 0.00089 0.00025 0.00011 0.00153 0.00007 0.00004 0.00008 0.00342 0.00135 0.00001 0.00022 0.00011
+Asp 0.00062 0.00008 0.00223 0.98954 0.00002 0.00020 0.00470 0.00095 0.00025 0.00006 0.00006 0.00015 0.00004 0.00002 0.00008 0.00041 0.00023 0.00001 0.00015 0.00020
+Cys 0.00043 0.00058 0.00015 0.00005 0.99432 0.00004 0.00003 0.00043 0.00016 0.00009 0.00021 0.00004 0.00007 0.00031 0.00007 0.00152 0.00025 0.00016 0.00067 0.00041
+Gln 0.00044 0.00159 0.00037 0.00025 0.00002 0.98955 0.00198 0.00019 0.00136 0.00005 0.00066 0.00170 0.00010 0.00002 0.00083 0.00037 0.00030 0.00003 0.00008 0.00013
+Glu 0.00080 0.00015 0.00025 0.00392 0.00001 0.00130 0.99055 0.00087 0.00006 0.00006 0.00009 0.00105 0.00004 0.00002 0.00009 0.00021 0.00019 0.00001 0.00002 0.00029
+Gly 0.00136 0.00070 0.00035 0.00067 0.00012 0.00011 0.00074 0.99350 0.00005 0.00003 0.00006 0.00016 0.00003 0.00002 0.00013 0.00137 0.00020 0.00008 0.00003 0.00031
+His 0.00021 0.00168 0.00165 0.00057 0.00014 0.00241 0.00016 0.00017 0.98864 0.00009 0.00051 0.00027 0.00008 0.00016 0.00058 0.00050 0.00027 0.00001 0.00182 0.00008
+Ile 0.00029 0.00011 0.00020 0.00006 0.00003 0.00004 0.00007 0.00004 0.00004 0.98729 0.00209 0.00012 0.00113 0.00035 0.00005 0.00027 0.00142 0.00001 0.00010 0.00627
+Leu 0.00023 0.00019 0.00005 0.00004 0.00005 0.00029 0.00006 0.00005 0.00013 0.00122 0.99330 0.00008 0.00092 0.00099 0.00052 0.00040 0.00015 0.00007 0.00008 0.00118
+Lys 0.00027 0.00331 0.00111 0.00014 0.00001 0.00118 0.00111 0.00020 0.00011 0.00011 0.00013 0.99100 0.00015 0.00002 0.00011 0.00032 0.00060 0.00001 0.00003 0.00009
+Met 0.00042 0.00023 0.00013 0.00008 0.00006 0.00018 0.00011 0.00011 0.00007 0.00255 0.00354 0.00038 0.98818 0.00017 0.00008 0.00020 0.00131 0.00003 0.00006 0.00212
+Phe 0.00011 0.00003 0.00004 0.00002 0.00015 0.00002 0.00003 0.00004 0.00009 0.00047 0.00227 0.00002 0.00010 0.99360 0.00009 0.00063 0.00007 0.00008 0.00171 0.00041
+Pro 0.00148 0.00038 0.00007 0.00008 0.00003 0.00067 0.00011 0.00018 0.00026 0.00006 0.00093 0.00012 0.00004 0.00007 0.99270 0.00194 0.00069 0.00001 0.00003 0.00015
+Ser 0.00287 0.00052 0.00212 0.00031 0.00044 0.00022 0.00018 0.00146 0.00017 0.00021 0.00054 0.00027 0.00007 0.00037 0.00144 0.98556 0.00276 0.00005 0.00020 0.00025
+Thr 0.00360 0.00033 0.00098 0.00020 0.00008 0.00021 0.00020 0.00024 0.00011 0.00131 0.00024 0.00060 0.00053 0.00005 0.00060 0.00324 0.98665 0.00002 0.00007 0.00074
+Trp 0.00007 0.00065 0.00003 0.00002 0.00023 0.00008 0.00006 0.00040 0.00002 0.00005 0.00048 0.00006 0.00006 0.00021 0.00003 0.00024 0.00007 0.99686 0.00023 0.00017
+Tyr 0.00008 0.00010 0.00030 0.00024 0.00041 0.00010 0.00004 0.00006 0.00130 0.00017 0.00022 0.00005 0.00004 0.00214 0.00005 0.00043 0.00012 0.00010 0.99392 0.00011
+Val 0.00226 0.00009 0.00007 0.00016 0.00012 0.00008 0.00027 0.00034 0.00003 0.00511 0.00165 0.00008 0.00076 0.00025 0.00012 0.00026 0.00066 0.00004 0.00005 0.98761
+
+P(0.01), amino acid exchange data generated from SWISSPROT Release 22.0
+Ref. Jones D.T., Taylor W.R. and Thornton J.M. (1992) CABIOS 8:275-282
+
+
+Usable sequences: 23824
+Final alignments: 5437
+Accepted point mutations: 92883
+
+A R N D C Q E G H I L K M F P S T W Y V
+
+
+ 0.0767477 100
+ 0.0516907 82.3263
+ 0.0426448 102.697
+ 0.0515445 83.8924
+ 0.0198027 45.6097
+ 0.0407523 83.8825
+ 0.0618296 75.7914
+ 0.0731516 52.1273
+ 0.0229438 91.1374
+ 0.0537609 101.99
+ 0.0919042 53.7672
+ 0.0586762 72.2308
+ 0.0238262 94.8144
+ 0.0401265 51.3146
+ 0.0509007 58.5874
+ 0.0687652 115.899
+ 0.0585647 107.092
+ 0.0142613 25.2297
+ 0.0321015 48.7629
+ 0.0660051 99.4571
+
+Normalized Relative
+frequency mutabilities
+(SUM m*f) = 80.240436
+-------------------------------------------
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaAscidian.code b/libs/phylogeny/replacementMatrixSource/mitochondriaAscidian.code
new file mode 100644
index 0000000..7f04ddc
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaAscidian.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG AGA AGG "
+"H CAT CAC "
+"I ATT ATC "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG ATA "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaEchinoderm.code b/libs/phylogeny/replacementMatrixSource/mitochondriaEchinoderm.code
new file mode 100644
index 0000000..6f1c221
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaEchinoderm.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG "
+"N AAT AAC AAA "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC AGA AGG "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaFlatworm.code b/libs/phylogeny/replacementMatrixSource/mitochondriaFlatworm.code
new file mode 100644
index 0000000..56ccfa6
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaFlatworm.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG "
+"N AAT AAC AAA "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC AGA AGG "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC TAA "
+"V GTT GTC GTA GTG "
+"* TAG "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaInvertebrate.code b/libs/phylogeny/replacementMatrixSource/mitochondriaInvertebrate.code
new file mode 100644
index 0000000..acfb861
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaInvertebrate.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG ATA "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC AGA AGG "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG GTG ATA ATC ATT TTG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaProtozoan.code b/libs/phylogeny/replacementMatrixSource/mitochondriaProtozoan.code
new file mode 100644
index 0000000..838e19a
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaProtozoan.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGA AGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG GTG ATA ATC ATT CTG TTG TTA "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaVertebrate.code b/libs/phylogeny/replacementMatrixSource/mitochondriaVertebrate.code
new file mode 100644
index 0000000..0067b30
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaVertebrate.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG ATA "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG AGG AGA "
+"i ATG GTG ATA ATC ATT "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/mitochondriaYeast.code b/libs/phylogeny/replacementMatrixSource/mitochondriaYeast.code
new file mode 100644
index 0000000..b62db0a
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mitochondriaYeast.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGG AGA "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC "
+"L TTA TTG "
+"K AAA AAG "
+"M ATG ATA "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG CTT CTC CTA CTG "
+"W TGG TGA "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/mtREV24.dat b/libs/phylogeny/replacementMatrixSource/mtREV24.dat
new file mode 100644
index 0000000..948c9ff
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/mtREV24.dat
@@ -0,0 +1,41 @@
+
+ 23.18
+ 26.95 13.24
+ 17.67 1.90 794.38
+ 59.93 103.33 58.94 1.90
+ 1.90 220.99 173.56 55.28 75.24
+ 9.77 1.90 63.05 583.55 1.90 313.56
+ 120.71 23.03 53.30 56.77 30.71 6.75 28.28
+ 13.90 165.23 496.13 113.99 141.49 582.40 49.12 1.90
+ 96.49 1.90 27.10 4.34 62.73 8.34 3.31 5.98 12.26
+ 25.46 15.58 15.16 1.90 25.65 39.70 1.90 2.41 11.49 329.09
+ 8.36 141.40 608.70 2.31 1.90 465.58 313.86 22.73 127.67 19.57 14.88
+ 141.88 1.90 65.41 1.90 6.18 47.37 1.90 1.90 11.97 517.98 537.53 91.37
+ 6.37 4.69 15.20 4.98 70.80 19.11 2.67 1.90 48.16 84.67 216.06 6.44 90.82
+ 54.31 23.64 73.31 13.43 31.26 137.29 12.83 1.90 60.97 20.63 40.10 50.10 18.84 17.31
+ 387.86 6.04 494.39 69.02 277.05 54.11 54.71 125.93 77.46 47.70 73.61 105.79 111.16 64.29 169.90
+ 480.72 2.08 238.46 28.01 179.97 94.93 14.82 11.17 44.78 368.43 126.40 136.33 528.17 33.85 128.22 597.21
+ 1.90 21.95 10.68 19.86 33.60 1.90 1.90 10.92 7.08 1.90 32.44 24.00 21.71 7.84 4.21 38.58 9.99
+ 6.48 1.90 191.36 21.21 254.77 38.82 13.12 3.21 670.14 25.01 44.15 51.17 39.96 465.58 16.21 64.92 38.73 26.25
+ 195.06 7.64 1.90 1.90 1.90 19.00 21.14 2.53 1.90 1222.94 91.67 1.90 387.54 6.35 8.23 1.90 204.54 5.37 1.90
+
+
+0.072 0.019 0.039 0.019 0.006 0.025 0.024 0.056 0.028 0.088 0.169
+0.023 0.054 0.061 0.054 0.072 0.086 0.029 0.033 0.043
+
+Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+
+S_ij = S_ji and PI_i for the mtREV24 model (Adachi and Hasegawa 1996).
+The PI's used to sum to 0.999 and I changed one of the freq from 0.168
+into 0.169 so that the sum is 1. Prepared by Z. Yang according to
+data sent by Dr M. Hasegawa. This matrix was obtained from the 12
+mitochondrial proteins encoded by the same strand of the DNA from a
+diverse range of species including bird, fish, frog, lamprey, as well
+as mammals (see Adachi and Hasegawa 1996 for details). The other
+matrix (mtmam.dat) included in the package is based on the same
+proteins from mammals only.
+
+Adachi, J. and Hasegawa, M. (1996) MOLPHY version 2.3: programs for
+molecular phylogenetics based on maximum likelihood. Computer Science
+Monographs of Institute of Statistical Mathematics 28:1-150.
diff --git a/libs/phylogeny/replacementMatrixSource/nuclearBlepharisma.code b/libs/phylogeny/replacementMatrixSource/nuclearBlepharisma.code
new file mode 100644
index 0000000..edce8de
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/nuclearBlepharisma.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGA AGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG TAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TGA "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/nuclearCiliate.code b/libs/phylogeny/replacementMatrixSource/nuclearCiliate.code
new file mode 100644
index 0000000..5f039e9
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/nuclearCiliate.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGA AGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG TAA TAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TGA "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/nuclearEuplotid.code b/libs/phylogeny/replacementMatrixSource/nuclearEuplotid.code
new file mode 100644
index 0000000..02f823e
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/nuclearEuplotid.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGA AGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC TGA "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG "
+"i ATG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
\ No newline at end of file
diff --git a/libs/phylogeny/replacementMatrixSource/nuclearStandard.code b/libs/phylogeny/replacementMatrixSource/nuclearStandard.code
new file mode 100644
index 0000000..257bcba
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/nuclearStandard.code
@@ -0,0 +1,24 @@
+"A GCT GCC GCA GCG "
+"R CGT CGC CGA CGG AGA AGG "
+"N AAT AAC "
+"D GAT GAC "
+"C TGT TGC "
+"Q CAA CAG "
+"E GAA GAG "
+"G GGT GGC GGA GGG "
+"H CAT CAC "
+"I ATT ATC ATA "
+"L CTT CTC CTA CTG TTA TTG "
+"K AAA AAG "
+"M ATG "
+"F TTT TTC "
+"P CCT CCC CCA CCG "
+"S TCT TCC TCA TCG AGT AGC "
+"T ACT ACC ACA ACG "
+"W TGG "
+"Y TAT TAC "
+"V GTT GTC GTA GTG "
+"* TAA TAG TGA "
+"i ATG CTG TTG "
+"### taken from http://bioinformatics.org/JaMBW/2/3/TranslationTables.html "
+"### NOTE: initiation codons must appear after all codon lines "
diff --git a/libs/phylogeny/replacementMatrixSource/wag.dat b/libs/phylogeny/replacementMatrixSource/wag.dat
new file mode 100644
index 0000000..0b4ac87
--- /dev/null
+++ b/libs/phylogeny/replacementMatrixSource/wag.dat
@@ -0,0 +1,47 @@
+
+0.551571
+0.509848 0.635346
+0.738998 0.147304 5.429420
+1.027040 0.528191 0.265256 0.0302949
+0.908598 3.035500 1.543640 0.616783 0.0988179
+1.582850 0.439157 0.947198 6.174160 0.021352 5.469470
+1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717
+0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410
+0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190
+0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970
+0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555
+0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276
+0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630
+1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444
+3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280
+2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020
+0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864
+0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390
+2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730
+
+0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956
+
+ A R N D C Q E G H I L K M F P S T W Y V
+Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
+
+ Symmetrical part of the rate matrix and aa frequencies,
+estimated from 3905 globular protein amino acid sequences forming 182
+protein families.
+ The first part above indicates the symmetric 'exchangeability'
+parameters, where s_ij = s_ji. The s_ij above are not scaled, but the
+PAML package will perform this scaling.
+ The second part gives the amino acid frequencies (pi_i)
+estimated from the 3905 sequences. The net replacement rate from i to
+j is Q_ij = s_ij*pi_j.
+ Prepared by Simon Whelan and Nick Goldman, September 2000.
+
+Citation:
+Whelan, S. and N. Goldman. In press. A general empirical model of
+ protein evolution derived from multiple protein families using
+ a maximum likelihood approach. Molecular Biology and
+ Evolution.
+
+See the following reference for notation used here:
+
+Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and
+applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611.
diff --git a/libs/phylogeny/replacementModel.cpp b/libs/phylogeny/replacementModel.cpp
new file mode 100644
index 0000000..6bd0237
--- /dev/null
+++ b/libs/phylogeny/replacementModel.cpp
@@ -0,0 +1,9 @@
+// $Id: replacementModel.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "replacementModel.h"
+
+replacementModel::~replacementModel(){}
+// this must be here. see Effective c++ page 63 (item 14, constructors, destructors,
+// assignment
+
+
diff --git a/libs/phylogeny/replacementModel.h b/libs/phylogeny/replacementModel.h
new file mode 100644
index 0000000..aafe5d2
--- /dev/null
+++ b/libs/phylogeny/replacementModel.h
@@ -0,0 +1,26 @@
+// $Id: replacementModel.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___REPLACEMENT_MODEL
+#define ___REPLACEMENT_MODEL
+
+#include "definitions.h"
+
+class replacementModel{
+public:
+ virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const = 0;
+ virtual const MDOUBLE freq(const int i) const = 0;
+ virtual const MDOUBLE dPij_dt(const int i, const int j, const MDOUBLE t) const =0;
+ virtual const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const =0;
+ virtual replacementModel* clone() const = 0;
+ virtual ~replacementModel()=0;
+ virtual const int alphabetSize() const =0;
+
+ //virtual const MDOUBLE Q(const int i, const int j, const MDOUBLE r = 1.0) const = 0;
+ //note that we ask that sigma over i sigma over j!=i of p(i)Qij = 1;
+ //this is beacuse we ask the [sigma over i sigma over j!=i p(i)*pij(d)]/d approaches
+ //1 as d -> 0. (and l'hopital from here).
+};
+
+
+#endif
+
diff --git a/libs/phylogeny/replacementModelSSRV.cpp b/libs/phylogeny/replacementModelSSRV.cpp
new file mode 100644
index 0000000..7022156
--- /dev/null
+++ b/libs/phylogeny/replacementModelSSRV.cpp
@@ -0,0 +1,198 @@
+// $Id: replacementModelSSRV.cpp 4165 2008-06-04 09:19:48Z osnatz $
+
+#include "replacementModelSSRV.h"
+#include "logFile.h"
+#include <iomanip>
+#include <iostream>
+
+
+replacementModelSSRV::replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate /*= 1 */) :
+_dist(dist->clone()),
+_baseRM(baseRM->clone()),
+_rateOfRate(rateOfRate)
+{
+ if (_dist->categories() == 0)
+ errorMsg::reportError("replacementModelSSRV::replacementModelSSRV : number of categories == 0");
+
+ updateFreq();
+ updateQ();
+
+
+}
+
+//// similar to goldmanYangModel.cpp
+//replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) :
+//_dist(other._dist->clone()),
+//_baseRM(other._baseRM->clone()),
+//_rateOfRate(other._rateOfRate)
+//{
+// int size = alphabetSize();
+// _Q.resize(size);
+// for (int z=0; z < _Q.size();++z)
+// _Q[z].resize(size,0);
+// updateFreq();
+// updateQ();
+//}
+
+// Instead of calling updateQ here, like in goldmanYangModel.cpp,
+// this method uses the copy constructor of q2pt and also copies _freq and _Q
+replacementModelSSRV::replacementModelSSRV(const replacementModelSSRV& other) :
+_dist(other._dist->clone()),
+_baseRM(other._baseRM->clone()),
+_rateOfRate(other._rateOfRate),
+_q2pt(other._q2pt),
+_freq(other._freq),
+_Q(other._Q)
+{
+}
+
+replacementModelSSRV::~replacementModelSSRV()
+{
+ if (_dist) delete (_dist);
+ if (_baseRM) delete (_baseRM);
+}
+
+
+replacementModelSSRV& replacementModelSSRV::operator=(const replacementModelSSRV &other)
+{
+ if (_dist) delete (_dist);
+ if (_baseRM) delete (_baseRM);
+
+ _dist = other._dist->clone();
+ _baseRM = other._baseRM->clone();
+ _rateOfRate = other._rateOfRate;
+ _q2pt = other._q2pt; //@@@@ why doesn't this work ? explicit ?
+// _q2pt.fillFromRateMatrix(other._freq,other._Q);
+ _freq = other._freq;
+ _Q = other._Q;
+
+ return (*this);
+}
+
+const int replacementModelSSRV::alphabetSize() const
+{
+ return (_baseRM->alphabetSize() * _dist->categories());
+}
+
+
+
+// The freq of each mulCharacter is its freq in the _baseRM * the freq of the rate-category
+void replacementModelSSRV::updateFreq()
+{
+ _freq.clear();
+ int size = alphabetSize();
+ int numCategories = _dist->categories();
+ _freq.resize(size);
+ int idInCategory;
+
+ for(idInCategory=0; idInCategory < _baseRM->alphabetSize() ; ++idInCategory)
+ {
+ for (int categoryNumber=0; categoryNumber < numCategories; ++categoryNumber)
+ _freq[categoryNumber*_baseRM->alphabetSize() + idInCategory] =
+ _baseRM->freq(idInCategory) * _dist->ratesProb(categoryNumber);
+ }
+}
+
+
+void replacementModelSSRV::updateQ()
+{
+ if (_rateOfRate < EPSILON) _rateOfRate = EPSILON; // Temporary - to overcome a bug in QL algorithm, when _rateOfRate == 0
+
+ _Q.clear();
+ int size = alphabetSize();
+ _Q.resize(size);
+ for (int z=0; z < _Q.size();++z)
+ _Q[z].resize(size,0.0);
+
+ // fill Q
+ int _BaseRM_alphabetSize = _baseRM->alphabetSize();
+ int numCategories = _dist->categories();
+ // i,j : go over all the base-alphabet.
+ // z,w : go over all the categories.
+ for (int i=0; i < _BaseRM_alphabetSize; ++i)
+ {
+ for (int j=0; j < _BaseRM_alphabetSize; ++j)
+ {
+ for (int z=0; z < numCategories; ++z)
+ {
+ for (int w=0; w < numCategories; ++w)
+ {
+ if (i!=j)
+ {
+ // different alphabet, same rate category
+ if (z==w)
+ _Q[z*_BaseRM_alphabetSize + i][z*_BaseRM_alphabetSize+j]
+ = _dist->rates(z) * _baseRM->dPij_dt(i,j,0);
+ }
+ else
+ {
+ // same alphabet, different rate category
+ if (z!=w)
+ {
+ _Q[z*_BaseRM_alphabetSize+i][w*_BaseRM_alphabetSize+i] = _rateOfRate * _dist->ratesProb(w);
+ }
+ // same alphabet, same rate category
+ else
+ _Q[z*_BaseRM_alphabetSize+i][z*_BaseRM_alphabetSize+i] =
+ _dist->rates(z) * _baseRM->dPij_dt(i,j,0)
+ - ( _rateOfRate * (1.0 - _dist->ratesProb(z)));
+ }
+
+ }
+ }
+ }
+ }
+
+// // check OZ
+// LOG(4, <<"THE Q MATRIX IS: "<<endl ) ;
+// VVdouble::iterator itr1 = _Q.begin();
+// Vdouble::iterator itr2;
+// for (; itr1 != _Q.end(); ++itr1)
+// {
+// for (itr2 = itr1->begin(); itr2 != itr1->end(); ++itr2)
+// LOG(4,<< setprecision(3) << setw(5) << *itr2 <<'\t');
+// LOG(4,<<endl);
+// }
+// LOG (4,<<endl);
+//// end of check
+
+ _q2pt.fillFromRateMatrix(_freq,_Q);
+
+}
+
+void replacementModelSSRV::setDistribution(const distribution* dist)
+ {
+ if (dist->categories() == 0)
+ errorMsg::reportError("replacementModelSSRV::setDistribution : number of categories == 0");
+ if (_dist) delete (_dist);
+ _dist=dist->clone();
+ updateQ();
+ }
+
+MDOUBLE replacementModelSSRV::sumPijQij() const{
+ MDOUBLE sum=0.0;
+ for (int i=0; i < _Q.size(); ++i) {
+ sum -= _Q[i][i]*_freq[i];
+ }
+ return sum;
+}
+
+
+//void replacementModelSSRV::norm(MDOUBLE scale){
+//
+// for (int i=0; i < _Q.size(); ++i) {
+// for (int j=0; j < _Q.size(); ++j) {
+// _Q[i][j]*=scale;
+// }
+// }
+//
+// _q2pt.fillFromRateMatrix(_freq,_Q);
+//}
+
+
+
+
+
+
+
+
diff --git a/libs/phylogeny/replacementModelSSRV.h b/libs/phylogeny/replacementModelSSRV.h
new file mode 100644
index 0000000..27a9b0b
--- /dev/null
+++ b/libs/phylogeny/replacementModelSSRV.h
@@ -0,0 +1,73 @@
+// $Id: replacementModelSSRV.h 1914 2007-04-04 08:40:35Z osnatz $
+#ifndef ___REPLACEMENT_MODEL_SSRV
+#define ___REPLACEMENT_MODEL_SSRV
+
+#include <cmath>
+#include "replacementModel.h"
+#include "distribution.h"
+#include "fromQtoPt.h"
+#include "errorMsg.h"
+#include "definitions.h"
+
+class replacementModelSSRV : public replacementModel
+{
+public:
+ explicit replacementModelSSRV(const distribution* dist, const replacementModel* baseRM, MDOUBLE rateOfRate = 1);
+ explicit replacementModelSSRV(const replacementModelSSRV& other);
+ ~replacementModelSSRV();
+ replacementModelSSRV& operator=(const replacementModelSSRV &other);
+ const int alphabetSize() const;
+ virtual replacementModel* clone() const {return new replacementModelSSRV(*this);}
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+ return _q2pt.Pij_t(i,j,d);
+ }
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.dPij_dt(i,j,d);
+ }
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.d2Pij_dt2(i,j,d);
+ }
+
+ const MDOUBLE freq(const int i) const {return _freq[i];}
+
+ distribution* getDistribution() const { return _dist;} // @@@@ this const is a lie !!!
+ void setDistribution(const distribution* dist); // it's important to call updateQ after changing the distribution parameters
+
+ replacementModel* getBaseRM() const { return _baseRM;} // @@@@ this const is a lie (for the same reason as getDistribution()
+
+ MDOUBLE getRateOfRate() const { return _rateOfRate;}
+ void setRateOfRate(MDOUBLE rateOfRate) { _rateOfRate=rateOfRate; updateQ();}
+
+ VVdouble getQ() const { return _Q;}
+ Vdouble getFreqs() const {return _freq;}
+
+ MDOUBLE sumPijQij() const;
+
+ void updateQ();
+ void updateFreq();
+ q2pt getQ2pt() const {return _q2pt;} // used for debug only
+ //void norm(MDOUBLE scale);
+
+private:
+ distribution* _dist;
+ replacementModel* _baseRM;
+ MDOUBLE _rateOfRate;
+ q2pt _q2pt;
+ Vdouble _freq;
+ VVdouble _Q;
+
+};
+
+#endif
+
+/* @@@@ When we want to optimize alpha, we usually get the distibution from the stochastic process and then
+convert it using static_cast, for example to gammaDistribution and use its method setAlpha.
+For this reason, the method distr() in replacmentModel and the method getDistribution here are both const, although
+they actually allow changing the distribution.
+A good solution for this is to add a setDistribution in the stochasticProcess.
+This will check if the distributions are of the same type and if so, will just update the alpha.
+*/
+
+// @@@@ Idea - maybe there is no need of replacementModelSSRV. This can be stochasticProcessSSRV - not good. the SP also has an accelerator.
+
+
diff --git a/libs/phylogeny/samplingSequences.cpp b/libs/phylogeny/samplingSequences.cpp
new file mode 100644
index 0000000..b6015b6
--- /dev/null
+++ b/libs/phylogeny/samplingSequences.cpp
@@ -0,0 +1,193 @@
+#include "samplingSequences.h"
+#include "logFile.h"
+#include "talRandom.h"
+
+
+sampleSequences::sampleSequences(sequenceContainer &sc){
+ _sc = sc;
+}
+
+sequenceContainer sampleSequences::removeSequences(sequenceContainer &sc){
+ int noOfSeq = sc.numberOfSeqs();
+ int gap = sc.getAlphabet()->gap();
+ int unknown = sc.getAlphabet()->unknown();
+ bool seqToAdd;
+ int n =0;
+ sequenceContainer newSc;
+ for (int i=0;i<noOfSeq;i++){
+ seqToAdd = true;
+ for (int j=0;j<sc[i].seqLen();j++){
+ if ((sc[i][j]== gap) || (sc[i][j]== unknown ) || (sc[i].seqLen() != 297)){
+ seqToAdd = false;
+ }
+ }
+ if (seqToAdd == true) {
+ sequence add = sc[i];
+ sequence sc(add);
+ sc.setID(n);
+ n++;
+ newSc.add(sc);
+ }
+ }
+ return newSc;
+}
+
+
+void sampleSequences::printDistances(){
+ for (int i=0;i< _distances.size();i++){
+ for (int j=0;j<_distances[i].size();j++){
+ cout<<_distances[i][j]<<" ";
+ }
+ cout<<endl;
+ }
+}
+
+void sampleSequences::setDistance(int i,int j,MDOUBLE dist){
+ (i<j ? _distances[i][j-i] :_distances[j][i-j]) = dist;
+}
+
+MDOUBLE sampleSequences::getDistance(int i,int j){
+ return (i<j ? _distances[i][j-i] :_distances[j][i-j]);
+}
+
+
+sequenceContainer sampleSequences::sampleFarthestSequences(int n, distanceMethod *dm){
+ _sc.removeIdenticalSequences();
+ if (n >= _sc.numberOfSeqs()){
+ cerr<<"Number of sequences to sample is bigger than the origin number of sequences so the all sequences were chosen in sampleSequences::sampleFarthestSequences"<<endl;
+ return _sc;
+ }
+
+ int numberOfSeq = _sc.numberOfSeqs();
+ _distances.resize(numberOfSeq);
+ int i;
+ for (i=0;i<numberOfSeq;i++)
+ _distances[i].resize(numberOfSeq-i);
+
+ for (i=0;i<numberOfSeq;i++){
+ for(int j=i;j<numberOfSeq;j++){
+ int id1 = _sc.placeToId(i);
+ int id2 = _sc.placeToId(j);
+
+ setDistance(i,j,dm->giveDistance(_sc[id1],_sc[id2],NULL));
+ }
+ }
+
+ sequenceContainer newSc;
+ vector<int> sampled;
+ sampled.push_back(0);//to change
+ int id = 0;
+ int p = _sc.placeToId(0);
+ sequence sc(_sc[p]);
+ sc.setID(id++);
+ newSc.add(sc);
+ while (newSc.numberOfSeqs()<n){
+ int i = findNextSeq(sampled);
+ p = _sc.placeToId(i);
+ sequence sc(_sc[p]);
+ sc.setID(id);
+ newSc.add(sc);
+ id++;
+ sampled.push_back(i);
+ }
+ return newSc;
+}
+
+int sampleSequences::findNextSeq(vector<int> &sampled){
+ MDOUBLE max = 0,min;
+ int seqi = -1;
+ for(int i=0;i< _sc.numberOfSeqs();i++){
+ min=10000;//to update
+ for (int j=0;j<sampled.size();j++){
+ if (getDistance(i,sampled[j])<min)
+ min = getDistance(i,sampled[j]);
+ }
+ if (max<min){
+ max=min;
+ seqi = i;
+ }
+ }
+
+ if (seqi>_sc.numberOfSeqs() ||seqi<0){
+ errorMsg::reportError("Error in sampleSequences::findNextSeq");
+ }
+ return seqi;
+}
+
+//sequenceContainer sampleSequences::sampleRandomSequences(int seqNum)
+//{
+// if (seqNum > _sc.numberOfSeqs())
+// errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA");
+// sequenceContainer newSc(_sc);
+// while (newSc.numberOfSeqs() > seqNum)
+// {
+// int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.numberOfSeqs());
+// newSc.remove(newSc.placeToId(seqPlaceToRemove));
+// }
+// return newSc;
+//}
+
+
+sequenceContainer sampleSequences::sampleRandomSequences(int seqNum)
+{
+ if (seqNum > _sc.numberOfSeqs())
+ errorMsg::reportError("sampleSequences::sampleRandomSequences(): the number of requested seqeuences is larger than the number of sequences in the MSA");
+ sequenceContainer newSc;
+ Vint vec2Add(_sc.numberOfSeqs(),0);
+ int n = 0;
+ while (n < seqNum)
+ {
+ int seqPlaceToAdd = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.numberOfSeqs());
+ if (vec2Add[seqPlaceToAdd] == 0){
+ vec2Add[seqPlaceToAdd] = 1;
+ n++;
+ }
+
+ }
+ for (int i = 0; i<vec2Add.size();i++){
+ if (vec2Add[i] == 1)
+ newSc.add(_sc[i]);
+ }
+ return newSc;
+}
+//sequenceContainer sampleSequences::sampleRandomCharacters(int seqLen)
+//{
+// if (seqLen > _sc.seqLen())
+// errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA");
+// Vint posToRemove(_sc.seqLen(),1);
+// //first create a vector with seqLen positions to be sampled in the begining of the vector
+// for (int i = 0; i < seqLen; ++i)
+// posToRemove[i] = 0;
+// //then randomly swap the positions in posToRemove.
+// //The end result is a random vector with the positions to remove marked with '1'
+// int swapNum = _sc.seqLen() * 10;
+// for (int x = 0; x < swapNum; ++x)
+// {
+// int pos1 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen());
+// int pos2 = talRandom::giveIntRandomNumberBetweenZeroAndEntry(_sc.seqLen());
+// int tmp = posToRemove[pos1];
+// posToRemove[pos1] = posToRemove[pos2];
+// posToRemove[pos2] = tmp;
+// }
+//
+// sequenceContainer newSc(_sc);
+// newSc.removePositions(posToRemove);
+// return newSc;
+//}
+
+
+sequenceContainer sampleSequences::sampleRandomCharacters(int seqLen)
+{
+ if (seqLen > _sc.seqLen())
+ errorMsg::reportError("sampleSequences::sampleRandomCharacters(): the requested sequence length is larger than the number of characters in the MSA");
+ sequenceContainer newSc(_sc);
+
+ while (newSc.seqLen() > seqLen)
+ {
+ Vint posToRemove(newSc.seqLen(),0);
+ int seqPlaceToRemove = talRandom::giveIntRandomNumberBetweenZeroAndEntry(newSc.seqLen());
+ posToRemove[seqPlaceToRemove] = 1;
+ newSc.removePositions(posToRemove);
+ }
+ return newSc;
+}
diff --git a/libs/phylogeny/samplingSequences.h b/libs/phylogeny/samplingSequences.h
new file mode 100644
index 0000000..51ea8a8
--- /dev/null
+++ b/libs/phylogeny/samplingSequences.h
@@ -0,0 +1,33 @@
+#ifndef SAMPLE_SEQUENCES_H
+#define SAMPLE_SEQUENCES_H
+
+#include "definitions.h"
+#include "distanceMethod.h"
+#include "sequenceContainer.h"
+#include "pDistance.h"
+
+
+class sampleSequences{
+public:
+ explicit sampleSequences(sequenceContainer &sc);
+ virtual ~sampleSequences() {};
+
+ sequenceContainer sampleFarthestSequences(int n, distanceMethod *dm);
+ //sampleRandomSequences: samples seqNum sequences from the sequence container
+ sequenceContainer sampleRandomSequences(int seqNum);
+ //sampleRandomCharacters: samples seqLen characters from the sequenceContainer
+ sequenceContainer sampleRandomCharacters(int seqLen);
+
+
+private:
+ int findNextSeq(vector<int> &sampled);
+ void setDistance(int i,int j,MDOUBLE dist);
+ MDOUBLE getDistance(int i,int j);
+ void removeSequenceWithGap();
+ sequenceContainer removeSequences(sequenceContainer &sc);
+ void printDistances();
+private:
+ VVdouble _distances;
+ sequenceContainer _sc;
+};
+#endif
diff --git a/libs/phylogeny/searchStatus.cpp b/libs/phylogeny/searchStatus.cpp
new file mode 100644
index 0000000..f7a27a6
--- /dev/null
+++ b/libs/phylogeny/searchStatus.cpp
@@ -0,0 +1,9 @@
+// $Id: searchStatus.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "searchStatus.h"
+
+searchStatus::searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor ):
+ _currentTmp(startingTmp),
+ _factor(factor) {}
+
+
diff --git a/libs/phylogeny/searchStatus.h b/libs/phylogeny/searchStatus.h
new file mode 100644
index 0000000..fea5ba9
--- /dev/null
+++ b/libs/phylogeny/searchStatus.h
@@ -0,0 +1,30 @@
+// $Id: searchStatus.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___SEARCH_STATUS
+#define ___SEARCH_STATUS
+
+#include "definitions.h"
+
+class searchStatus {
+public:
+ explicit searchStatus(const MDOUBLE startingTmp,const MDOUBLE factor);
+ explicit searchStatus(){};
+ void setParameters(const MDOUBLE tmp, const MDOUBLE factor) {
+ _currentTmp=tmp;
+ _factor=factor;
+ }
+
+ void tmpUp1(){_currentTmp *= _factor;}
+ void tmpDown1(){_currentTmp /= _factor;}
+ const MDOUBLE getTmp() const {return _currentTmp;}
+ void setTmp(const MDOUBLE newTmp) {_currentTmp=newTmp;}
+ virtual ~searchStatus(){}
+
+private:
+ MDOUBLE _currentTmp;
+ MDOUBLE _factor;
+};
+
+#endif
+
+
diff --git a/libs/phylogeny/seqContainerTreeMap.cpp b/libs/phylogeny/seqContainerTreeMap.cpp
new file mode 100644
index 0000000..6e9c70c
--- /dev/null
+++ b/libs/phylogeny/seqContainerTreeMap.cpp
@@ -0,0 +1,63 @@
+// $Id: seqContainerTreeMap.cpp 5106 2008-10-31 02:17:49Z itaymay $
+
+#include "seqContainerTreeMap.h"
+#include "logFile.h"
+
+
+//if bLeavesOnly == true then checks only leaves, otherwise the sequence container includes also internal nodes (as may be the result of simlations
+void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly){
+ treeIterDownTopConst tIt(et);
+ //cout<<"tree names:"<<endl;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ bool bFound = false;
+ if (bLeavesOnly) {
+ if (mynode->isInternal())
+ continue;
+ }
+ sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
+ for (;it != sc.constTaxaEnd(); ++it)
+ {
+ string scName = it->name();
+ string treeNodeName = mynode->name();
+
+ if (it->name() == mynode->name())
+ {
+ bFound = true;
+ break;
+ }
+ }
+ if (bFound == false)
+ {
+ string errMsg = "The sequence name: ";
+ errMsg += mynode->name();
+ errMsg += " was found in the tree file but not found in the sequence file.\n";
+ LOG(4,<<errMsg<<endl);
+ errorMsg::reportError(errMsg);
+ }
+ }
+
+ sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
+ for (;it != sc.constTaxaEnd(); ++it){
+ bool bFound = false;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (bLeavesOnly)
+ {
+ if (mynode->isInternal())
+ continue;
+ }
+ if (it->name() == mynode->name())
+ {
+ bFound = true;
+ break;
+ }
+ }
+ if (bFound == false)
+ {
+ string errMsg = "The sequence name: ";
+ errMsg += it->name();
+ errMsg += " was found in the sequence file but not found in the tree file.\n";
+ errorMsg::reportError(errMsg);
+ }
+ }
+}
+
diff --git a/libs/phylogeny/seqContainerTreeMap.h b/libs/phylogeny/seqContainerTreeMap.h
new file mode 100644
index 0000000..2c98a41
--- /dev/null
+++ b/libs/phylogeny/seqContainerTreeMap.h
@@ -0,0 +1,36 @@
+// $Id: seqContainerTreeMap.h 5106 2008-10-31 02:17:49Z itaymay $
+
+#ifndef ___SEQUENCE_CONTAINER_TREE_MAP
+#define ___SEQUENCE_CONTAINER_TREE_MAP
+#include "definitions.h"
+#include "tree.h"
+#include "treeIt.h"
+#include "sequenceContainer.h"
+
+void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly = true);
+
+
+class seqContainerTreeMap {
+public:
+ explicit seqContainerTreeMap(const sequenceContainer& sc,
+ const tree& et) {
+ checkThatNamesInTreeAreSameAsNamesInSequenceContainer(et,sc);
+ _V.resize(et.getNodesNum());
+ treeIterTopDownConst tit(et);
+ for (tree::nodeP myN = tit.first();myN!=tit.end(); myN = tit.next()) {
+ if (myN->isInternal()) {
+ _V[myN->id()] = -1;
+ } else {
+ _V[myN->id()] = sc.getId(myN->name(),false);
+ }
+ }
+ }
+ int seqIdOfNodeI(const int nodeID) {
+ return _V[nodeID];
+ }
+
+private:
+ vector<int> _V;// _V[i] is the sequenceId of node I.
+};
+
+#endif
diff --git a/libs/phylogeny/seqeuncesFilter.cpp b/libs/phylogeny/seqeuncesFilter.cpp
new file mode 100644
index 0000000..8d09078
--- /dev/null
+++ b/libs/phylogeny/seqeuncesFilter.cpp
@@ -0,0 +1,233 @@
+#include "seqeuncesFilter.h"
+#include "nucleotide.h"
+
+seqeuncesFilter::~seqeuncesFilter()
+{}
+
+void seqeuncesFilter::removeSequencesWithStop(sequenceContainer & sc, codon & alpha)
+{
+
+ //going over al seqeunces
+ for (int i = 0; i < sc.numberOfSeqs();++i) {
+ int id = sc.placeToId(i);
+ //going over all sequence len
+ for (int j = 0; j < sc.seqLen();++j) {
+ //remove seqeunces with stop data not in the middle
+ if ((j != sc.seqLen()-1) && (alpha.isStopCodon(sc[id][j])))
+ {
+ LOG(4, <<"removing sequence = "<<sc.name(id)<<" : STOP codon in the middle of the reading frame!"<<endl);
+ sc.remove(id);
+ i--;
+ break;
+ }
+ }
+ }
+}
+
+void seqeuncesFilter::removeSequencesWithMissingData(sequenceContainer & sc)
+{
+
+ //going over al seqeunces
+ for (int i = 0; i < sc.numberOfSeqs(); ++i)
+ {
+ //going over all sequence len
+ for (int j = 0; j < sc.seqLen(); ++j)
+ {
+ int id = sc.placeToId(i);
+ //remove seqeunces with unkonwn data
+ if (sc[id][j] == sc.getAlphabet()->unknown())
+ {
+ sc.remove(id);
+ i--;
+ break;
+ }
+ }
+ }
+}
+
+void seqeuncesFilter::removeSequencesWithMissingDataAndStop(sequenceContainer & sc, codon & alpha)
+{
+
+ //going over al seqeunces
+ for (int i = 0; i < sc.numberOfSeqs(); ++i) {
+ int id = sc.placeToId(i);
+ //going over all sequence len
+ for (int j = 0; j < sc.seqLen();++j) {
+ //remove seqeunces with stop data not in the middle or missing data
+ if ((j != sc.seqLen()-1) && (sc[id][j] == sc.getAlphabet()->unknown() || alpha.isStopCodon(sc[id][j])))
+ {
+
+ sc.remove(id);
+ i--;
+ break;
+ }
+ }
+ }
+
+}
+
+
+void seqeuncesFilter::removeSequencesNotStartWithATG(sequenceContainer & sc, codon & alpha)
+{
+ amino aa;
+ //going over al seqeunces
+ for (int i = 0; i < sc.numberOfSeqs();++i) {
+ int id = sc.placeToId(i);
+ int in_first = codonUtility::aaOf(sc[id][0], alpha);
+ if (in_first != aa.fromChar('M'))
+ {
+ LOG(4, <<"removing sequence = "<<sc.name(id)<<" : not starting with ATG!"<<endl);
+ sc.remove(id);
+ i--;
+ }
+ }
+}
+
+void seqeuncesFilter::removeSequencesNotStartWithInitiationCodons(sequenceContainer & sc,codon & alpha)
+{
+ for (int i = 0; i < sc.numberOfSeqs();++i) {
+ int id = sc.placeToId(i);
+ int in_first = sc[id][0];
+ if(!alpha.isInitiationCodon(in_first)){
+ LOG(4, <<"removing sequence = "<<sc.name(id)<<" : not starting with initiation codon!"<<endl);
+ sc.remove(id);
+ i--;
+ }
+ }
+}
+
+
+void seqeuncesFilter::removeSequencesWithGapsAccordingRef(sequenceContainer & sc,int precent,string refName)
+{
+ int refID = sc.getId(refName);
+ Vint seqToRemove;
+ //going over all position in reference seqeunce
+ for (int pos = 0; pos < sc[refID].seqLen(); pos++)
+ {
+
+ //check if the pos is gap
+ if (sc[refID][pos] == sc.getAlphabet()->gap())
+ //going over all other seqeunces to compute the precents of gaps
+ {
+ cout<<pos<<" ";
+ seqToRemove.clear();
+ MDOUBLE numOfSeqWithOutGap = 0;
+ cout<<sc.numberOfSeqs()<<" ";
+ for (int i = 0; i < sc.numberOfSeqs(); i++)
+ {
+
+ int id = sc.placeToId(i);
+ if (sc[id][pos] != sc.getAlphabet()->gap())
+ {
+ numOfSeqWithOutGap++;
+ seqToRemove.push_back(id);
+ }
+ }
+ cout<<seqToRemove.size()<<endl;
+ if ((100 * ((sc.numberOfSeqs() - numOfSeqWithOutGap)/sc.numberOfSeqs())) > precent)
+ {
+ for (int j = 0; j < seqToRemove.size(); j++){
+ sc.remove(seqToRemove[j]);
+ }
+
+ }
+ }
+ }
+}
+
+//removes all sequences that are shorter than lowerBound and longer than upperBound
+void seqeuncesFilter::removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound)
+{
+ const alphabet* pAlph = sc.getAlphabet();
+ //going over al seqeunces
+ for (int seq = 0; seq < sc.numberOfSeqs(); ++seq)
+ {
+ int id = sc.placeToId(seq);
+ //checking sequence length
+ int seqLen = sc[id].seqLenSpecific();
+ if ((seqLen < lowerBound) || (seqLen > upperBound))
+ {
+ cerr<<"removing sequence: "<<sc.name(id)<<" sequence Length = "<<seqLen<<endl;
+ sc.remove(id);
+ --seq;
+ }
+ }
+}
+
+//removes all sequences that have inserts in which most other sequences (> percent) have gaps.
+//in case refName is given: check only positions in which the reference sequence has gaps.
+//The remained sequences are stored in newSc.
+void seqeuncesFilter::removeSequencesWithInserts(sequenceContainer & newSc,const sequenceContainer & sc,int percent, const string& refName, string outFileName)
+{
+ if (outFileName.empty())
+ outFileName = "removedSequences" + double2string(percent) + ".txt";
+ ofstream outF(outFileName.c_str());
+ int refID;
+ if (!refName.empty())
+ refID = sc.getId(refName);
+ Vint seqToAdd(sc.numberOfSeqs(), 1);//1== add the sequence to newSc. 0 = don't add.
+ //going over all position (in reference seqeunce if given)
+ for (int pos = 0; pos < sc.seqLen(); ++pos)
+ {
+
+ if (!refName.empty())
+ { //don't remove this position if it isn't gap in the refSeqeunce
+ if (sc[refID][pos] != sc.getAlphabet()->gap())
+ continue;
+ }
+ Vint seqToRemove; //holds the ids of sequences without gaps in the current positions
+ //going over all seqeunces to compute the percent of gaps
+ MDOUBLE numOfSeqWithGap = 0;
+ for (int i = 0; i < sc.numberOfSeqs(); i++)
+ {
+ int id = sc.placeToId(i);
+ if (sc[id][pos] != sc.getAlphabet()->gap())
+ {
+ seqToRemove.push_back(id);
+ }
+ else
+ numOfSeqWithGap++;
+ }
+ //outF<<"POS "<<pos<<" seqWithGaps = "<<numOfSeqWithGap<<" seqWithoutGaps = "<<sc.numberOfSeqs() - numOfSeqWithGap<<endl;
+ //in case most sequences have gaps in that position: remove the sequences that have inserts at that position
+ MDOUBLE percentGapsinPos = 100.0 * (numOfSeqWithGap / sc.numberOfSeqs());
+ if (percentGapsinPos > percent)
+ {
+ //outF<<"removing sequences: ";
+ for (int j = 0; j < seqToRemove.size(); j++)
+ {
+ int x = seqToRemove[j];
+ seqToAdd[seqToRemove[j]] = 0;
+ outF<<sc.name(sc.placeToId(x))<<endl;
+ }
+ outF<<endl;
+ }
+ }
+
+
+ for (int i=0; i<seqToAdd.size(); i++)
+ {
+ if (seqToAdd[i] == 1)
+ {
+ int id = sc.placeToId(i);
+ newSc.add(sc[id]);
+ }
+ }
+ outF.close();
+}
+
+void seqeuncesFilter::removeSequencesNotDivisableBy3(sequenceContainer & sc)
+{
+ nucleotide nucAlph;
+ for (int i = 0; i < sc.numberOfSeqs();++i)
+ {
+ int id = sc.placeToId(i);
+ int seqL = sc[id].seqLen();
+ if ((seqL % 3) != 0)
+ {
+ LOG(4, <<"removing sequence = "<<sc.name(id)<<" : nucleotide sequence length is not divisable by 3!"<<endl);
+ sc.remove(id);
+ --i;
+ }
+ }
+}
diff --git a/libs/phylogeny/seqeuncesFilter.h b/libs/phylogeny/seqeuncesFilter.h
new file mode 100644
index 0000000..9ef1a77
--- /dev/null
+++ b/libs/phylogeny/seqeuncesFilter.h
@@ -0,0 +1,35 @@
+#ifndef __SEQEUNCES_FILTER
+#define __SEQEUNCES_FILTER
+
+
+#include "definitions.h"
+#include "sequenceContainer.h"
+#include "codon.h"
+#include "amino.h"
+#include <string>
+#include <fstream>
+#include "fastaFormat.h"
+
+
+using namespace std;
+
+class seqeuncesFilter{
+
+public:
+ static void removeSequencesWithStop(sequenceContainer & sc,codon & alpha);
+ static void removeSequencesWithMissingData(sequenceContainer & sc);
+ //applied only to coding nucleotide seqeunces: remove sequence that are not divisable by 3.
+ static void removeSequencesNotDivisableBy3(sequenceContainer & sc);
+ static void removeSequencesWithMissingDataAndStop(sequenceContainer & sc,codon & alpha);
+ static void removeSequencesNotStartWithATG(sequenceContainer & sc,codon & alpha);
+ static void removeSequencesNotStartWithInitiationCodons(sequenceContainer & sc,codon & alpha);
+ static void removeSequencesWithGapsAccordingRef(sequenceContainer & sc,int precent, string refName);
+ static void removeSequencesWithInserts(sequenceContainer & newSc, const sequenceContainer & sc, int percent, const string& refName = "", string outFileName = "");
+
+
+ //removes all sequences that are shorter than lowerBound and longer than upperBound
+ static void removeShortAndLongSequences(sequenceContainer & sc, int lowerBound, int upperBound);
+ virtual ~seqeuncesFilter();
+
+};
+#endif
diff --git a/libs/phylogeny/sequence.cpp b/libs/phylogeny/sequence.cpp
new file mode 100644
index 0000000..47659d7
--- /dev/null
+++ b/libs/phylogeny/sequence.cpp
@@ -0,0 +1,178 @@
+// $Id: sequence.cpp 3668 2008-03-05 15:15:24Z itaymay $
+
+#include "sequence.h"
+
+#include <algorithm>
+using namespace std;
+
+
+sequence::sequence(const string& str,
+ const string& name,
+ const string& remark,
+ const int id,
+ const alphabet* inAlph)
+: _alphabet(inAlph->clone()), _remark(remark), _name(name),_id(id)
+{
+ for (int k=0; k < str.size() ;k += _alphabet->stringSize()) {
+ int charId = inAlph->fromChar(str, k);
+ if (charId == -99) {
+ string textToPrint = "unable to read sequence: " + name;
+ errorMsg::reportError(textToPrint);
+ }
+
+ _vec.push_back(charId);
+ }
+}
+
+
+sequence::sequence(const sequence& other)
+: _vec(other._vec), _alphabet(other._alphabet->clone()),
+ _remark(other._remark), _name(other._name),_id(other._id)
+{
+
+}
+// convert the other sequence to the alphabet inAlph.
+sequence::sequence(const sequence& other,const alphabet* inAlph)
+: _alphabet(inAlph->clone()), _remark(other._remark), _name(other._name), _id(other._id)
+{
+ const mulAlphabet* pMulAlphabet;
+ // if the other.alphabet is amino or nucleotide and the inAlph is indel
+
+ if ( (other._alphabet->size() == 20 && inAlph->size() == 2)
+ || (other._alphabet->size() == 4 && inAlph->size() == 2) )
+ {
+ for (int k=0; k < other.seqLen() ;k += other._alphabet->stringSize())
+ {
+ int charId = other._vec[k];
+
+ if (charId == other._alphabet->gap())
+ _vec.push_back(inAlph->fromChar("-",0));
+ else
+ _vec.push_back(inAlph->fromChar("X",0)); //also converts "." (charId==-3) to "X"
+ // unknown amino/nucleotide is converted to "X" and not to "?"
+ }
+ }
+
+ // if the other.alphabet is amino or nucleotide and the inAlph is mulAlphabet
+ else if ( (other._alphabet->size() == 20 && inAlph->size()%20 == 0)
+ || (other._alphabet->size() == 4 && inAlph->size()%4 == 0) )
+ {
+ for (int k=0; k < other.seqLen() ;++k)
+ {
+ int charId = other._vec[k];
+ string ch = other._alphabet->fromInt(charId);
+ int mulCharId = _alphabet->fromChar(ch,0);
+ _vec.push_back(mulCharId);
+ }
+ // debug OZ
+ //cout << "other sequence: " << other << endl;
+ //cout << "mul sequence " << (*this) << endl;
+ // end of debug
+ }
+ // if the other.alphabet is mulAlphabet and the inAlph is it's baseAlphabet
+ // (for example, if other.alphabet is a multiplied-amino and inAlph is amino, then the converted sequence
+ // will have alphabet amino)
+ else if ( ((inAlph->size() == 20) && (other._alphabet->size()%20 == 0))
+ || (inAlph->size() == 4) && (other._alphabet->size()%4 == 0))
+ {
+ pMulAlphabet=(mulAlphabet*)(other._alphabet);
+ for (int k=0; k < other.seqLen() ;++k)
+ {
+ int mulCharId = other._vec[k];
+ int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
+ _vec.push_back(baseId);
+ }
+ }
+
+ // I tried to implement it using dynamic_cast but it doesn't work...
+ /*else if
+ (
+ (pMulAlphabet = dynamic_cast<const mulAlphabet*>(other._alphabet)) != NULL
+ )
+ {
+ if (pMulAlphabet->getBaseAlphabet()->size() == inAlph->size())
+ {
+ for (int k=0; k < other.seqLen() ;++k)
+ {
+ int mulCharId = other._vec[k];
+ int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
+ _vec.push_back(baseId);
+ }
+ }
+ }*/
+
+ // (currently, there is no implimentions for other converts)
+ else
+ {
+ string error = "unable to convert this kind of alphabet";
+ errorMsg::reportError(error);
+ }
+}
+
+sequence::~sequence()
+{
+ if (_alphabet)
+ delete _alphabet;
+}
+
+void sequence::resize(const int k, const int* val) {
+ if (val == NULL) {
+ _vec.resize(k,_alphabet->unknown());
+ }
+ else {
+ _vec.resize(k,*val);
+ }
+}
+
+string sequence::toString() const{
+ string tmp;
+ for (int k=0; k < _vec.size() ; ++k ){
+ tmp+= _alphabet->fromInt(_vec[k]);
+ }
+ return tmp;
+}
+
+string sequence::toString(const int pos) const{
+ return _alphabet->fromInt(_vec[pos]);
+}
+
+void sequence::addFromString(const string& str) {
+ for (int k=0; k < str.size() ; k+=_alphabet->stringSize()) {
+ _vec.push_back(_alphabet->fromChar(str,k));
+ }
+}
+
+class particip {
+public:
+ explicit particip() {}
+ bool operator()(int i) {
+ return (i==-1000);
+ }
+};
+
+//removePositions: the poitions to be removed are marked as '1' in posToRemoveVec
+//all othehr positions are '0'
+void sequence::removePositions(const vector<int> & posToRemoveVec)
+{
+ if(posToRemoveVec.size() != seqLen())
+ errorMsg::reportError("the input vector must be same size as sequence length. in sequence::removePositions");
+ for (int k=0; k < posToRemoveVec.size(); ++k) {
+ if (posToRemoveVec[k] == 1)
+ _vec[k] = -1000;
+ }
+ vector<int>::iterator vec_iter;
+ vec_iter = remove_if(_vec.begin(),_vec.end(),particip());
+ _vec.erase(vec_iter,_vec.end()); // pg 1170, primer.
+}
+
+//return the number of sites that are specific = not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+int sequence::seqLenSpecific() const
+{
+ int res = 0;
+ for (int pos = 0; pos < seqLen(); ++pos)
+ {
+ if (isSpecific(pos))
+ ++res;
+ }
+ return res;
+}
diff --git a/libs/phylogeny/sequence.h b/libs/phylogeny/sequence.h
new file mode 100644
index 0000000..7939b73
--- /dev/null
+++ b/libs/phylogeny/sequence.h
@@ -0,0 +1,141 @@
+// $Id: sequence.h 3668 2008-03-05 15:15:24Z itaymay $
+
+#ifndef ___SEQUENCE
+#define ___SEQUENCE
+#include "definitions.h"
+#include "errorMsg.h"
+#include "alphabet.h"
+#include "mulAlphabet.h"
+#include <iostream>
+using namespace std;
+
+class sequence {
+
+
+public:
+ class Iterator;
+ friend class Iterator;
+ class constIterator;
+ friend class constIterator;
+
+ // constructors
+ explicit sequence(const string& str,
+ const string& name,
+ const string& remark,
+ const int id,
+ const alphabet* inAlph);
+
+ sequence(const sequence& other);
+ sequence(const sequence& other,const alphabet* inAlph); // convert the other sequence to the alphabet inAlph.
+ explicit sequence(const alphabet* inAlph) {
+ if (inAlph == NULL) {
+ errorMsg::reportError("must give a non Null alphabet when constructing sequences");
+ }
+ _alphabet = inAlph->clone();
+ }
+ virtual ~sequence();
+
+ int seqLen() const {return _vec.size();}
+ int seqLenSpecific() const; //return the number of sites that are isSpecific()
+ const string& name() const {return _name;}
+ void setName(const string & inName) { _name =inName ;}
+ const int id() const {return _id;}
+ void setID(const int inID) { _id =inID ;}
+ const string& remark() const {return _remark;}
+ void setRemarks(const string & inRemarks) { _remark =inRemarks ;}
+ string toString() const;
+ string toString(const int pos) const;
+
+ void addFromString(const string& str);
+ //push_back: add a single characer to the sequence
+ void push_back(int p) {_vec.push_back(p);}
+ void resize(const int k, const int* val = NULL);
+ void removePositions(const vector<int> & parCol);
+
+ void setAlphabet(const alphabet* inA) {if (_alphabet) delete _alphabet;
+ _alphabet=inA->clone();
+ }
+ const alphabet* getAlphabet() const {return _alphabet;}
+
+ inline sequence& operator=(const sequence& other);
+ inline sequence& operator+=(const sequence& other);
+ int& operator[](const int i) {return _vec[i];}
+ const int& operator[](const int pos) const {return _vec[pos];}
+ bool isUnknown(const int pos) const {return _vec[pos] == _alphabet->unknown();}
+
+ // "specific" here is not unknown, nor ambiguity, nor gap (for example, for nucleotides it will true for A,C,G, or T).
+ bool isSpecific(const int pos) const {return _alphabet->isSpecific(_vec[pos]);}
+
+private:
+ vector<int> _vec;
+ const alphabet* _alphabet;
+ string _remark;
+ string _name;
+ int _id;
+
+
+public:
+ class Iterator {
+ public:
+ explicit Iterator(){};
+ ~Iterator(){};
+ void begin(sequence& seq){_pointer = seq._vec.begin();}
+ void end(sequence& seq){_pointer = seq._vec.end();}
+ int& operator* (){return *_pointer;}
+ int const &operator* () const {return *_pointer;}
+ void operator ++() {++_pointer;}
+ void operator --() { --_pointer; }
+ bool operator != (const Iterator& rhs){return (_pointer != rhs._pointer);}
+ bool operator == (const Iterator& rhs){return (_pointer == rhs._pointer);}
+ private:
+ vector<int>::iterator _pointer;
+ };
+
+ class constIterator {
+ public:
+ explicit constIterator(){};
+ ~constIterator(){};
+ void begin(const sequence& seq){_pointer = seq._vec.begin();}
+ void end(const sequence& seq){_pointer = seq._vec.end();}
+ int const &operator* () const {return *_pointer;}
+ void operator ++(){++_pointer;}
+ void operator --(){--_pointer;}
+ bool operator != (const constIterator& rhs) {
+ return (_pointer != rhs._pointer);
+ }
+ bool operator == (const constIterator& rhs) {
+ return (_pointer == rhs._pointer);
+ }
+ private:
+ vector<int>::const_iterator _pointer;
+ };
+
+
+} ;
+
+inline sequence& sequence::operator=(const sequence& other) {
+ _vec = other._vec;
+ _alphabet = other._alphabet->clone();
+ _name=other.name();
+ _id=other.id();
+ _remark=other.remark();
+
+ return *this;
+}
+
+inline sequence& sequence::operator+=(const sequence& other) {
+ for (int i=0; i <other._vec.size();++i) {
+ _vec.push_back(other._vec[i]);
+ }
+ return *this;
+}
+
+
+inline ostream & operator<<(ostream & out, const sequence &Seq){
+ out<< Seq.toString();
+ return out;
+}
+
+
+#endif
+
diff --git a/libs/phylogeny/sequenceContainer.cpp b/libs/phylogeny/sequenceContainer.cpp
new file mode 100644
index 0000000..a0ae474
--- /dev/null
+++ b/libs/phylogeny/sequenceContainer.cpp
@@ -0,0 +1,389 @@
+// $Id: sequenceContainer.cpp 5244 2008-11-16 17:21:57Z cohenofi $
+#include "sequenceContainer.h"
+#include "logFile.h"
+#include "someUtil.h"
+
+sequenceContainer::sequenceContainer(const sequenceContainer& other,const alphabet *inAlph) :
+_generalRemarks(other._generalRemarks),
+_id2place(other._id2place)
+{
+ for (int i=0; i < other._seqDataVec.size(); ++i)
+ _seqDataVec.push_back(sequence(other._seqDataVec[i],inAlph));
+}
+
+
+//if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces
+const int sequenceContainer::makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs) {
+ bAugumentShorterSeqs = true;
+ if (_seqDataVec.size() == 0) return 0;
+ const int len = _seqDataVec[0].seqLen();
+ for (int i=1; i < _seqDataVec.size(); ++i) {
+ if (_seqDataVec[i].seqLen()!=len) {
+ if (bAugumentShorterSeqs) {
+ for (int pos = _seqDataVec[i].seqLen(); pos < len; ++pos)
+ _seqDataVec[i].push_back(getAlphabet()->gap());
+ }
+ else {
+ cerr<<_seqDataVec[i].name()<<" "<<_seqDataVec[i].seqLen()<<" "<<len<<endl;
+ //errorMsg::reportError("not all sequences are of the same lengths");
+ }
+ }
+ }
+
+ return len;
+}
+
+//void sequenceContainer::addFromsequenceContainer(sequenceContainer& seqToAdd){
+// if (_seqDataVec.empty()) { // first sequence to add
+// sequenceContainer::taxaIterator tit;
+// sequenceContainer::taxaIterator titEND;
+// tit.begin(seqToAdd);
+// titEND.end(seqToAdd);
+// while (tit!=titEND) {
+// _seqDataVec.push_back(*tit);
+//
+// }
+// }
+// else {// now we are adding sequences to sequences that are already there.
+// sequenceContainer::taxaIterator tit;
+// sequenceContainer::taxaIterator titEND;
+// tit.begin(seqToAdd);
+// titEND.end(seqToAdd);
+// while (tit!=titEND) {
+// for (int i=0; i < _seqDataVec.size(); ++i) {
+// if (tit->name() == _seqDataVec[i].name()) {
+// _seqDataVec[i]+=(*tit);
+// break;
+// }
+// }
+// ++tit;
+// }
+// }
+//}
+
+void sequenceContainer::changeGaps2MissingData() {
+
+ for (int i = 0; i < seqLen();++i) {//going over al positions
+ for (int j = 0; j < _seqDataVec.size();++j) {
+ if (_seqDataVec[j][i] == -1){
+ _seqDataVec[j][i]=getAlphabet()->unknown(); // missing data
+ }
+ }
+ }
+}
+
+const int sequenceContainer::getId(const string &seqName, bool issueWarningIfNotFound) const {
+ int k;
+ for (k=0 ; k < _seqDataVec.size() ; ++k) {
+ if (_seqDataVec[k].name() == seqName) return (_seqDataVec[k].id());
+ }
+ if (k == _seqDataVec.size() && issueWarningIfNotFound) {
+ // debuggin
+ LOG(5,<<"seqName = "<<seqName<<endl);
+ for (k=0 ; k < _seqDataVec.size() ; ++k) {
+ LOG(5,<<"_seqDataVec["<<k<<"].name() ="<<_seqDataVec[k].name()<<endl);
+ }
+ //end dubug
+ LOG(0,<<seqName<<endl);
+ vector<string> err;
+ err.push_back("Could not find a sequence that matches the sequence name ");
+ err.push_back(seqName);
+ err.push_back("in function sequenceContainer::getSeqPtr ");
+ err.push_back(" make sure that names in tree file match name in sequence file ");
+ errorMsg::reportError(err); // also quit the program
+ }
+ return -1;
+}
+
+const Vstring sequenceContainer::names() const {
+ vector<string> res;
+ for (int i=0; i < _seqDataVec.size(); ++i) {
+ res.push_back(_seqDataVec[i].name());
+ }
+ return res;
+}
+
+sequenceContainer::sequenceContainer() {
+ _id2place.resize(100,-1);
+}
+
+sequenceContainer::~sequenceContainer(){}
+
+void sequenceContainer::add(const sequence& inSeq) {
+ _seqDataVec.push_back(inSeq);
+ if (_id2place.size() < inSeq.id()+1) {
+ _id2place.resize(inSeq.id()+100,-1);
+ }
+ if (_id2place[inSeq.id()] != -1) {
+ string err = "Two sequences with the same id - error in function sequenceContainer::add";
+ err+= "\nThe id of the sequence you are trying to add = ";
+ err += int2string(inSeq.id());
+ errorMsg::reportError(err);
+ }
+ _id2place[inSeq.id()] = _seqDataVec.size()-1;
+}
+
+
+//given a sequence id the sequence is removed from the sequence container
+//and the vector _id2place is updated.
+void sequenceContainer::remove(const int idSeq) {
+ if (idSeq > _id2place.size()-1 || idSeq<0)
+ errorMsg::reportError("the id of sequence is not mapped by id2place in function sequenceContainer::remove");
+ int place = _id2place[idSeq];
+
+ if (place < 0)
+ errorMsg::reportError("cannot find place of the id in the sequence container in function sequenceContainer::remove");
+ _seqDataVec.erase(_seqDataVec.begin()+place);
+
+ _id2place[idSeq] = -1;
+ for (int i=place;i<_seqDataVec.size();i++) {
+ int id = _seqDataVec[i].id();
+ _id2place[id]--;
+ }
+}
+
+
+//removes identical sequences in the sequence container.
+void sequenceContainer::removeIdenticalSequences(){
+ bool exist;
+ for (int i=1;i<_seqDataVec.size();i++){
+ sequence sq1 = _seqDataVec[i];
+ for (int j=0;j<i;j++){
+ sequence sq2 = _seqDataVec[j];
+ exist = true;
+ if (sq1.seqLen() != sq2.seqLen()) continue;
+ for (int pos=0;pos<sq1.seqLen();pos++){
+ if (sq1[pos] != sq2[pos]){
+ exist = false;
+ break;
+ }
+ }
+ if (exist) {
+ remove(sq1.id());
+ i--;
+ break;
+
+ }
+
+ }
+
+ }
+
+}
+
+void sequenceContainer::removeGapPositions(){
+ vector<int> posToRemove(seqLen(),0);
+ bool gapCol;
+ int i,j;
+ for (i = 0; i < seqLen();++i) {//going over al positions
+ gapCol = false;
+ for (j = 0; j < _seqDataVec.size();++j) {
+ if (_seqDataVec[j][i] == -1) posToRemove[i] = 1;
+ }
+ }
+ removePositions(posToRemove);
+}
+void sequenceContainer::removeGapPositionsAllSeqs(){
+ vector<int> posToRemove(seqLen(),1);
+ bool gapCol;
+ int i,j;
+ for (i = 0; i < seqLen();++i) {//going over al positions
+ gapCol = false;
+ for (j = 0; j < _seqDataVec.size();++j) {
+ if (_seqDataVec[j][i] != -1) posToRemove[i] = 0;
+ }
+ }
+ removePositions(posToRemove);
+}
+void sequenceContainer::removeGapPositionsAccordingToAReferenceSeq(const string & seqName){
+ int idOfRefSeq = getId(seqName,true);
+ vector<int> posToRemove(seqLen(),0);
+ int i;
+ for (i = 0; i < seqLen();++i) {//going over al positions
+ if (_seqDataVec[idOfRefSeq][i] == -1) posToRemove[i] = 1;
+ }
+ removePositions(posToRemove);
+}
+
+void sequenceContainer::removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName){
+ int idOfRefSeq = getId(seqName,true);
+ vector<int> posToRemove(seqLen(),0);
+ int i;
+ for (i = 0; i < seqLen();++i) {//going over al positions
+ if (_seqDataVec[idOfRefSeq][i] == getAlphabet()->unknown()) posToRemove[i] = 1;
+ }
+ removePositions(posToRemove);
+}
+
+//removePositions: the positions to be removed are marked as '1' in posToRemoveVec
+//all othehr positions are '0'
+void sequenceContainer::removePositions(const Vint & posToRemoveVec) {
+ for (int z = 0; z < _seqDataVec.size();++z) {
+ _seqDataVec[z].removePositions(posToRemoveVec);
+ }
+}
+
+void sequenceContainer::changeDotsToGoodCharacters() {
+ for (int i = 0; i < seqLen();++i) {//going over al positions
+ int charInFirstSeq = _seqDataVec[0][i];
+ if (charInFirstSeq == -3) {
+ LOG(5,<<" position is "<<i<<endl);
+ errorMsg::reportError(" the first line contains dots ");
+ }
+ for (int j = 1; j < _seqDataVec.size();++j) {
+ if ((_seqDataVec[j][i] == -3)) {
+ _seqDataVec[j][i] = charInFirstSeq; // missing data
+ }
+ }
+ }
+}
+
+int sequenceContainer::numberOfSequencesWithoutGaps (const int pos) const {
+ int numOfNonCharPos = numberOfSeqs();
+ for (int i=0; i < numberOfSeqs(); ++i) {
+ if ((*this)[i][pos] <0) --numOfNonCharPos;
+ }
+ return numOfNonCharPos;
+}
+
+int sequenceContainer::numberOfSequencesWithoutUnknowns (const int pos) const {
+ int numOfNonCharPos = numberOfSeqs();
+ int unknown = getAlphabet()->unknown();
+ for (int i=0; i < numberOfSeqs(); ++i) {
+ if ((*this)[i][pos] == unknown )
+ --numOfNonCharPos;
+ }
+ return numOfNonCharPos;
+}
+
+bool sequenceContainer::isInvariable(const int pos) const {
+ int charFound = getAlphabet()->unknown();
+ for (int i=0; i < numberOfSeqs(); ++i) {
+ if ((*this)[i][pos] >= 0) {
+ if (charFound == getAlphabet()->unknown())
+ charFound = (*this)[i][pos];
+ else if (charFound != (*this)[i][pos])
+ return false;
+ }
+ }
+ return true;
+}
+
+int sequenceContainer::getInvariablePosNum() const {
+ int sum = 0;
+ for (int pos = 0; pos < seqLen(); ++pos) {
+ if (isInvariable(pos))
+ ++sum;
+ }
+ return sum;
+}
+
+// new func for gainLoss project
+void sequenceContainer::startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes)
+{
+ //if(minNumOfOnes == 1){
+ // string str = "0";
+ // string remark;
+ // int localid =0;
+ // for(int i=0; i<sc.numberOfSeqs();i++){
+ // this->add(sequence(str,sc.name(i),remark,localid,&alph));
+ // ++localid;
+ // }
+ //}
+ string str0 = "0";
+ string str1 = "1";
+ vector<string> strV;
+ strV.resize(sc.numberOfSeqs());
+ string remark ="";
+ switch (minNumOfOnes) {
+ case (1) :
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ // add patterns of 0 ones
+ strV[i] = str0;
+ }
+ break;
+ case (2) :
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ // add patterns of 0 ones
+ strV[i] = str0;
+ }
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ // add patterns of only 1 ones
+ for(int j=0; j<sc.numberOfSeqs(); j++){
+ if(j==i){
+ strV[i]+=str1;
+ }
+ else{
+ strV[i]+=str0;
+ }
+ }
+ }
+ break;
+ case (3) :
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ // add patterns of 0 ones
+ strV[i] = str0;
+ }
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ // add patterns of only 1 ones
+ for(int j=0; j<sc.numberOfSeqs(); j++){
+ if(j==i){
+ strV[i]+=str1;
+ }
+ else{
+ strV[i]+=str0;
+ }
+ }
+ }
+ // add patterns of only 2 ones
+ for(int onePosition1=0; onePosition1<sc.numberOfSeqs(); onePosition1++){
+ for(int onePosition2=0; onePosition2<sc.numberOfSeqs(); onePosition2++){
+ if(onePosition2<=onePosition1)
+ continue;
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ if(i==onePosition1 || i==onePosition2){
+ strV[i]+=str1;
+ }
+ else{
+ strV[i]+=str0;
+ }
+ }
+ }
+ }
+ break;
+ }
+ for(int i=0; i<sc.numberOfSeqs();i++){
+ //cout<<strV[i]<<endl;
+ this->add(sequence(strV[i],sc.name(i),remark,i,&alph));
+ }
+}
+
+
+
+//concatenate two sequecneContainers.
+//The sequence names must be identical in the two containers.
+//returns false if: (1) A sequence_name in one of the containers does not match any sequence_name in the other container.
+bool sequenceContainer::concatenate(const sequenceContainer& other) {
+ if (other.numberOfSeqs() != numberOfSeqs())
+ return false;
+ for(int i = 0; i < numberOfSeqs(); ++i)
+ {
+ bool bFound = false;
+ for (int j = 0; j < other.numberOfSeqs(); ++j)
+ {
+ if((*this)[i].name() == other[j].name())
+ {
+ (*this)[i] += other[i];
+ bFound = true;
+ break;
+ }
+ }
+ if (bFound == false)
+ {
+ string msg = string("Can't find sequence name in the second MSA: ") + other[i].name();
+ errorMsg::reportError(msg);
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/libs/phylogeny/sequenceContainer.h b/libs/phylogeny/sequenceContainer.h
new file mode 100644
index 0000000..13c7438
--- /dev/null
+++ b/libs/phylogeny/sequenceContainer.h
@@ -0,0 +1,169 @@
+// $Id: sequenceContainer.h 5244 2008-11-16 17:21:57Z cohenofi $
+
+#ifndef ___SEQUENCE_CONTAINER
+#define ___SEQUENCE_CONTAINER
+#include "definitions.h"
+#include "sequence.h"
+#include "gainLossAlphabet.h"
+
+class sequenceContainer {
+public:
+
+ class taxaIterator;
+ friend class taxaIterator;
+ class constTaxaIterator;
+ friend class constTaxaIterator;
+
+//------------------------------------------------------------
+//constructors:
+ explicit sequenceContainer();
+ sequenceContainer(const sequenceContainer& other,const alphabet *inAlph);
+ virtual ~sequenceContainer();
+
+ //questions only:
+ const int seqLen() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].seqLen();}
+ const int numberOfSeqs() const {return _seqDataVec.size();}
+ const int alphabetSize() const {return _seqDataVec.empty()? 0 : _seqDataVec[0].getAlphabet()->size();}
+ const vector<string>& getGeneralRemarks() const {return _generalRemarks;}
+ const int makeSureAllSeqAreSameLengthAndGetLen(bool bAugumentShorterSeqs = false); //if bAugumentShorterSeqs=true then add gap characters at the end of short seqeunces
+ const int getId(const string &seqName, bool issueWarninInNotFound=true) const;//return -1 if not found...
+ sequence& operator[](const int id) {return _seqDataVec[_id2place[id]];} // get the ID of the sequence. Return the sequence itself.
+ const sequence& operator[](const int id) const {return _seqDataVec[_id2place[id]];}
+ const Vstring names() const; // return a vector<string> of the names of all the sequences.
+ const string& name(const int id) const {return _seqDataVec[_id2place[id]].name();};
+ const alphabet* getAlphabet() const {return _seqDataVec[0].getAlphabet();}
+ //returns the number of positions that are invariable (all seqs are identical
+ int getInvariablePosNum() const;
+ bool isInvariable(const int pos) const;
+ // computed the number of sequences without gaps at a specific position
+ // for example, if the multiple sequence alignment is
+ // AT-
+ // AG-
+ // A-M
+ // numberOfSequencesWithoutGaps(0) = 3
+ // numberOfSequencesWithoutGaps(1) = 2
+ // numberOfSequencesWithoutGaps(2) = 1
+ int numberOfSequencesWithoutGaps(const int pos) const;
+ int numberOfSequencesWithoutUnknowns(const int pos) const;
+
+
+//make changes:
+ void resize(int t,const alphabet* inAlph) {
+ if (inAlph == NULL) {
+ errorMsg::reportError("cannot resize when the alphabet is unknown");
+ }
+ sequence s(inAlph);
+ _seqDataVec.resize(t,s);
+ }
+ void add(const sequence& inSeq);
+ void remove(const int idSeq);
+ void removeIdenticalSequences();
+ int placeToId(const int place) const {return _seqDataVec[place].id();}; //get place in the vector and return the id of the sequence
+ void addGeneralRemark(const string& inRemark) {_generalRemarks.push_back(inRemark);}
+ void changeGaps2MissingData();
+ //removePositions: the positions to be removed are marked as '1' in posToRemoveVec
+ //all othehr positions are '0'
+ void removePositions(const Vint & posToRemoveVec);
+ void removeGapPositions();
+ void removeGapPositionsAllSeqs();
+ void removeGapPositionsAccordingToAReferenceSeq(const string & seqName);
+ void changeDotsToGoodCharacters();
+ void removeUnknownPositionsAccordingToAReferenceSeq(const string & seqName);
+ bool concatenate(const sequenceContainer& other);
+ void startZeroSequenceContainerGL(const sequenceContainer &sc, const gainLossAlphabet& alph, const int minNumOfOnes=1);
+
+
+public:
+ sequence::Iterator begin(const int id){//iterface to sequence iterator
+ sequence::Iterator temp;
+ temp.begin(_seqDataVec[id]);
+ return temp;
+ }
+ sequence::Iterator end(const int id){//iterface to sequence iterator
+ sequence::Iterator temp;
+ temp.end(_seqDataVec[id]);
+ return temp;
+ }
+
+ class taxaIterator {
+ public:
+ explicit taxaIterator(){};
+ ~taxaIterator(){};
+ void begin(sequenceContainer & inSeqCont){
+ _pointer = inSeqCont._seqDataVec.begin();
+ }
+ void end(sequenceContainer & inSeqCont){
+ _pointer = inSeqCont._seqDataVec.end();
+ }
+ sequence& operator* () {return *_pointer;}
+ sequence const & operator* () const {return *_pointer;}
+ sequence * operator-> () {return &*_pointer;} //MATAN- CHECK!!!
+ sequence const * operator-> () const {return &* _pointer;} // MATAN - CHECK!!!
+
+ void operator ++() {++_pointer;}
+ void operator --() { --_pointer; }
+ bool operator != (const taxaIterator& rhs){return (_pointer != rhs._pointer);}
+ bool operator == (const taxaIterator& rhs){return (_pointer == rhs._pointer);}
+ private:
+ vector<sequence>::iterator _pointer;
+ };//end if class taxaIterator
+
+
+ class constTaxaIterator {
+ public:
+ explicit constTaxaIterator(){};
+ ~constTaxaIterator(){};
+ void begin(const sequenceContainer & inSeqCont){
+ _pointer = inSeqCont._seqDataVec.begin();
+ }
+ void end(const sequenceContainer & inSeqCont){
+ _pointer = inSeqCont._seqDataVec.end();
+ }
+ sequence const & operator*() const {return *_pointer;}
+ sequence const * operator->() const {return &*_pointer;}// MATAN - CHECK!!!
+
+ void operator ++() {++_pointer;}
+ void operator --() { --_pointer; }
+ bool operator != (const constTaxaIterator& rhs) {
+ return (_pointer != rhs._pointer);
+ }
+
+ bool operator == (const constTaxaIterator& rhs) {
+ return (_pointer == rhs._pointer);
+ }
+ private:
+ vector<sequence>::const_iterator _pointer;
+ };
+
+ public: // interfaces to iterators
+ taxaIterator taxaBegin(const int id=0){// interface to taxaIterator
+ taxaIterator temp;
+ temp.begin(*this);
+ return temp;
+ }
+
+ taxaIterator taxaEnd(){// interface to taxaIterator
+ taxaIterator temp;
+ temp.end(*this);
+ return temp;
+ }
+
+ constTaxaIterator constTaxaBegin() const{ //interface to const taxaIter
+ constTaxaIterator temp;
+ temp.begin(*this);
+ return temp;
+ }
+ constTaxaIterator constTaxaEnd() const{
+ constTaxaIterator temp;
+ temp.end(*this);
+ return temp;
+ }
+
+ private:
+ vector<sequence> _seqDataVec;
+ vector<string> _generalRemarks;
+ vector<int> _id2place;
+};
+
+#endif
+
diff --git a/libs/phylogeny/simulateCodonsJumps.cpp b/libs/phylogeny/simulateCodonsJumps.cpp
new file mode 100644
index 0000000..33a7c5d
--- /dev/null
+++ b/libs/phylogeny/simulateCodonsJumps.cpp
@@ -0,0 +1,210 @@
+#include "simulateCodonsJumps.h"
+#include "talRandom.h"
+#include "someUtil.h"
+#include "codon.h"
+#include <algorithm>
+
+
+simulateCodonsJumps::simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
+: simulateJumpsAbstract(inTree,sp,alphabetSize)
+{
+}
+
+simulateCodonsJumps::~simulateCodonsJumps()
+{
+}
+
+void simulateCodonsJumps::init()
+{
+ //init the vector of waiting times.
+ _waitingTimeParams.clear();
+ _waitingTimeParams.resize(_alphabetSize);
+ int i, j;
+ for (i = 0; i < _alphabetSize; ++i)
+ {
+ _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
+
+ }
+
+ //init _jumpProbs.
+ _jumpProbs.clear();
+ _jumpProbs.resize(_alphabetSize);
+ for (i = 0; i < _alphabetSize; ++i)
+ {
+ MDOUBLE sum = 0.0;
+ _jumpProbs[i].resize(_alphabetSize);
+ for (j = 0; j < _alphabetSize; ++j)
+ {
+ if (i == j)
+ _jumpProbs[i][j] = 0.0;
+ else
+ {
+ _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
+ }
+ sum += _jumpProbs[i][j];
+ }
+ if (! DEQUAL(sum, 1.0)){
+ string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to ";
+ err+=double2string(sum);
+ errorMsg::reportError(err);
+ }
+ }
+
+ //init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order
+ _tree.getAllNodes(_orderNodesVec, _tree.getRoot());
+ sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist);
+
+ _nodes2JumpsExp.clear();
+ _nodes2JumpsProb.clear();
+//
+ vector<pair<MDOUBLE,MDOUBLE> > zeroCombinedStates2jumps;
+ for(i = 0;i < getCombinedAlphabetSize();++i){
+ pair<MDOUBLE,MDOUBLE> syn_and_nonSyn_jumps(0.0,0.0);
+ zeroCombinedStates2jumps.push_back(syn_and_nonSyn_jumps);
+ }
+ Vdouble zeroVector(getCombinedAlphabetSize(),0.0);
+ for (i = 0; i < _orderNodesVec.size(); ++i)
+ {
+ string nodeName = _orderNodesVec[i]->name();
+ _nodes2JumpsExp[nodeName] = zeroCombinedStates2jumps;
+ _nodes2JumpsProb[nodeName] = zeroCombinedStates2jumps;
+ for (j=0; j<getCombinedAlphabetSize();++j)
+ _totalTerminals[nodeName]=zeroVector;
+ }
+}
+
+
+//simulate jumps starting from startState. The simulation continue until the maxTime is reached. In each step:
+//1. Draw a new waiting time.
+//2. Go over all branches shorter than nextJumpTime and update their jumpsNum between the states that were switched
+// (these branches will not be affected by the current jump):
+// however they might have been affected by the previous jump
+//3. Draw a new state
+void simulateCodonsJumps::runOneIter(int startState)
+{
+ int substitutionType;
+ MDOUBLE maxTime = _orderNodesVec[_orderNodesVec.size()-1]->dis2father();
+ MDOUBLE totalTimeTillJump = 0.0;
+ int curState = startState;
+ int smallestBranchNotUpdatedSofar = 0;
+ vector<pair<int, int> > jumpsSoFar(0);
+ while (totalTimeTillJump < maxTime)
+ {
+ MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState];
+ MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime);
+ //go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp
+ //with the jumps that occured between the terminal Ids: startState-->curState
+ for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b)
+ {
+ if (_orderNodesVec[b]->dis2father() > nextJumpTime)
+ {
+ smallestBranchNotUpdatedSofar = b;
+ break;
+ }
+ string nodeName = _orderNodesVec[b]->name();
+ //update all the jumps that occured along the branch
+ int terminalState = getCombinedState(startState, curState);
+ _totalTerminals[nodeName][terminalState]++;
+ //update all longer branches with all jumps that occurred till now
+ vector<bool> jumpsSoFarBool(getCombinedAlphabetSize(),false);
+ for (int j = 0; j < jumpsSoFar.size(); ++j)
+ {
+ int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second);
+ jumpsSoFarBool[combinedJumpState]=true;
+ if(substitutionType == 1)
+ _nodes2JumpsExp[nodeName][terminalState].first += 1;
+ else if(substitutionType == 2)
+ _nodes2JumpsExp[nodeName][terminalState].second += 1;
+ }
+ for (int combined=0;combined<jumpsSoFarBool.size();++combined)
+ {
+ if (jumpsSoFarBool[combined]){
+ if(substitutionType == 1)
+ _nodes2JumpsProb[nodeName][terminalState].first += 1;
+ else if(substitutionType == 2)
+ _nodes2JumpsProb[nodeName][terminalState].second += 1;
+ }
+ }
+ }
+ totalTimeTillJump = nextJumpTime;
+ int nextState = giveRandomState(_alphabetSize,curState,_jumpProbs);
+ substitutionType = codonUtility::codonReplacement(curState,nextState);
+ jumpsSoFar.push_back(pair<int,int>(curState, nextState));
+ curState = nextState;
+ }
+}
+
+
+void simulateCodonsJumps::computeExpectationsAndPosterior(){
+ //scale _nodes2JumpsExp so it will represent expectations
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterExp = _nodes2JumpsExp.begin();
+ for (; iterExp != _nodes2JumpsExp.end(); ++iterExp)
+ {//each node
+ string nodeName = iterExp->first;
+ for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState)
+ {
+ map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator iterProb = _nodes2JumpsProb.find(nodeName);
+ if ((iterTerm==_totalTerminals.end()) || (iterProb==_nodes2JumpsProb.end()))
+ {
+ errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map");
+ }
+
+ if (iterTerm->second[termState]==0){ //never reached these terminal states
+ if((iterExp->second[termState].first == 0)&&(iterExp->second[termState].second == 0)&&
+ ((iterProb->second[termState].first == 0)&&(iterProb->second[termState].second == 0))) continue;
+ else
+ errorMsg::reportError("error in simulateCodonJumps::runSimulation, 0 times reached termState but non-zero for jumpCount");
+ }
+ (iterExp->second[termState].first) /= iterTerm->second[termState];
+ (iterExp->second[termState].second) /= iterTerm->second[termState];
+ (iterProb->second[termState].first) /= iterTerm->second[termState];
+ (iterProb->second[termState].second) /= iterTerm->second[termState];
+ }
+ }
+}
+
+
+MDOUBLE simulateCodonsJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId)
+{
+ //map <string, VVdouble>::iterator pos;//Old
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
+ if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
+ {
+ string err="error in simulateCodonJumps::getExpectation: cannot find node "+nodeName;
+ errorMsg::reportError(err);
+ }
+ int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
+ //Old
+ //int combinedJumpState = getCombinedState(fromId, toId);
+ //return (pos->second[combinedTerminalState][combinedJumpState]);
+
+ MDOUBLE expectation;
+ if(codonUtility::codonReplacement(fromId,toId) == 1)
+ expectation = pos->second[combinedTerminalState].first;
+ else if(codonUtility::codonReplacement(fromId,toId) == 2)
+ expectation = pos->second[combinedTerminalState].second;
+ return (expectation);
+}
+
+
+MDOUBLE simulateCodonsJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){
+ //map <string, VVdouble>::iterator pos;
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > >::iterator pos;
+ if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
+ {
+ string err="error in simulateCodonJumps::getProb: cannot find node "+nodeName;
+ errorMsg::reportError(err);
+ }
+ int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
+ //Old
+ //int combinedJumpState = getCombinedState(fromId, toId);
+ //return (pos->second[combinedTerminalState][combinedJumpState]);
+
+ MDOUBLE expectation;
+ if(codonUtility::codonReplacement(fromId,toId) == 1)
+ expectation = pos->second[combinedTerminalState].first;
+ else if(codonUtility::codonReplacement(fromId,toId) == 2)
+ expectation = pos->second[combinedTerminalState].second;
+ return (expectation);
+}
\ No newline at end of file
diff --git a/libs/phylogeny/simulateCodonsJumps.h b/libs/phylogeny/simulateCodonsJumps.h
new file mode 100644
index 0000000..2077553
--- /dev/null
+++ b/libs/phylogeny/simulateCodonsJumps.h
@@ -0,0 +1,49 @@
+#ifndef ___SIMULATE_CODONS_JUMPS__
+#define ___SIMULATE_CODONS_JUMPS__
+
+#include "simulateJumpsAbstract.h"
+using namespace std;
+
+/******************************************************************
+This class implements simulateJumpsAbstract for small alphabets: (tested so far up to 3)
+*******************************************************************/
+
+class simulateCodonsJumps:public simulateJumpsAbstract {
+public:
+ simulateCodonsJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
+ virtual ~simulateCodonsJumps();
+
+ //for a branch length specified by a nodeName:
+ //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
+ //in which the starting character is terminalStart and the terminal character is terminalEnd
+ MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
+ //same as above, except here we return the probability of a jump from fromId to toId given
+ //terminal states terminalStart, terminalEnd in this branch
+ MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
+
+private:
+ void init();
+ void runOneIter(int state);
+ void computeExpectationsAndPosterior();
+
+
+private:
+
+ //_node2Jumps: maps a node name (which specify a branch length) to
+ //the expected number of synonymous and nonsynonymous jumps between any two characters along the branch leading from the father to this node
+ //given the terminal characters of this branch.
+ //We use a "combined alphabet" to make access easier. see getCombinedState() for details
+ //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively.
+
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsExp;
+
+ //_node2JumpsProb: maps a node name (which specify a branch length) to
+ //the probability of a synonymous and non-synonymous jump between any two characters along the branch leading from the father to this node
+ //given the terminal characters of this branch.
+ //We use a "combined alphabet" to make access easier. see getCombinedState() for details
+ //The dimension of the vector is the combined terminal state and the pair elements are: synonymous and non-synonymous jumps, respectively
+ map<string, vector<pair<MDOUBLE,MDOUBLE> > > _nodes2JumpsProb;
+
+};
+
+#endif
diff --git a/libs/phylogeny/simulateJumps.cpp b/libs/phylogeny/simulateJumps.cpp
new file mode 100644
index 0000000..1c602cc
--- /dev/null
+++ b/libs/phylogeny/simulateJumps.cpp
@@ -0,0 +1,188 @@
+#include "simulateJumps.h"
+#include "talRandom.h"
+#include "someUtil.h"
+#include <algorithm>
+
+
+simulateJumps::simulateJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
+: simulateJumpsAbstract(inTree,sp,alphabetSize)
+{
+}
+
+simulateJumps::~simulateJumps()
+{
+}
+
+void simulateJumps::init()
+{
+ //init the vector of waiting times.
+ _waitingTimeParams.clear();
+ _waitingTimeParams.resize(_alphabetSize);
+ int i, j;
+ for (i = 0; i < _alphabetSize; ++i)
+ {
+ _waitingTimeParams[i] = -_sp.dPij_dt(i, i, 0.0);
+
+ }
+
+ //init _jumpProbs.
+ //_jumpProbs[i][j] = Q[i][j] / -Q[i][i]
+ _jumpProbs.clear();
+ _jumpProbs.resize(_alphabetSize);
+ for (i = 0; i < _alphabetSize; ++i)
+ {
+ MDOUBLE sum = 0.0;
+ _jumpProbs[i].resize(_alphabetSize);
+ for (j = 0; j < _alphabetSize; ++j)
+ {
+ if (i == j)
+ _jumpProbs[i][j] = 0.0;
+ else
+ {
+ _jumpProbs[i][j] = _sp.dPij_dt(i, j, 0.0) / _waitingTimeParams[i];
+ }
+ sum += _jumpProbs[i][j];
+ }
+ if (! DEQUAL(sum, 1.0)){
+ string err = "error in simulateJumps::init(): sum probabilities is not 1 and equal to ";
+ err+=double2string(sum);
+ errorMsg::reportError(err);
+ }
+ }
+
+ //init _orderNodesVec: a vector in which the branch lengths are ordered in ascending order
+ _tree.getAllNodes(_orderNodesVec, _tree.getRoot());
+ sort(_orderNodesVec.begin(), _orderNodesVec.end(), simulateJumpsAbstract::compareDist);
+
+ _nodes2JumpsExp.clear();
+ _nodes2JumpsProb.clear();
+ VVdouble zeroMatrix(getCombinedAlphabetSize());
+ for (i = 0; i < getCombinedAlphabetSize(); ++i)
+ zeroMatrix[i].resize(getCombinedAlphabetSize(), 0.0);
+ Vdouble zeroVector(getCombinedAlphabetSize(),0.0);
+ for (i = 0; i < _orderNodesVec.size(); ++i)
+ {
+ string nodeName = _orderNodesVec[i]->name();
+ _nodes2JumpsExp[nodeName] = zeroMatrix;
+ _nodes2JumpsProb[nodeName] = zeroMatrix;
+ for (j=0; j<getCombinedAlphabetSize();++j)
+ _totalTerminals[nodeName]=zeroVector;
+ }
+
+}
+
+
+//simulate jumps starting from startState. The simulation continue until the maxTime is reached. In each step:
+//1. Draw a new waiting time.
+//2. Go over all branches shorter than nextJumpTime and update their jumpsNum between the states that were switched
+// (these branches will not be affected by the current jump):
+// however they might have been affected by the previous jump
+//3. Draw a new state
+void simulateJumps::runOneIter(int startState)
+{
+ MDOUBLE maxTime = _orderNodesVec[_orderNodesVec.size()-1]->dis2father();
+ MDOUBLE totalTimeTillJump = 0.0;
+ int jumpsNum = 0;
+ int curState = startState;
+ int smallestBranchNotUpdatedSofar = 0;
+ vector<pair<int, int> > jumpsSoFar(0);
+ while (totalTimeTillJump < maxTime)
+ {
+ MDOUBLE avgWaitingTime = 1 / _waitingTimeParams[curState];
+ MDOUBLE nextJumpTime = totalTimeTillJump + talRandom::rand_exp(avgWaitingTime);
+ //go over all branches that "finished" their simulation (shorter than nextJumpTime) and update with their _nodes2JumpsExp
+ //with the jumps that occured between the terminal Ids: startState-->curState
+ for (int b = smallestBranchNotUpdatedSofar; b < _orderNodesVec.size(); ++b)
+ {
+ if (_orderNodesVec[b]->dis2father() > nextJumpTime)
+ {
+ smallestBranchNotUpdatedSofar = b;
+ break;
+ }
+ string nodeName = _orderNodesVec[b]->name();
+ //update all the jumps that occured along the branch
+ int terminalState = getCombinedState(startState, curState);
+ _totalTerminals[nodeName][terminalState]++;
+ //update all longer branches with all jumps that occurred till now
+ vector<bool> jumpsSoFarBool(getCombinedAlphabetSize(),false);
+ for (int j = 0; j < jumpsSoFar.size(); ++j)
+ {
+ int combinedJumpState = getCombinedState(jumpsSoFar[j].first, jumpsSoFar[j].second);
+ jumpsSoFarBool[combinedJumpState]=true;
+ _nodes2JumpsExp[nodeName][terminalState][combinedJumpState] += 1;
+ }
+ for (int combined=0;combined<jumpsSoFarBool.size();++combined)
+ {
+ if (jumpsSoFarBool[combined])
+ _nodes2JumpsProb[nodeName][terminalState][combined]+=1;
+ }
+ }
+ totalTimeTillJump = nextJumpTime;
+ int nextState = giveRandomState(_alphabetSize,curState, _jumpProbs);
+ jumpsSoFar.push_back(pair<int,int>(curState, nextState));
+ curState = nextState;
+ ++jumpsNum;
+ }
+}
+
+
+void simulateJumps::computeExpectationsAndPosterior(){
+ //scale _nodes2JumpsExp so it will represent expectations
+ map<string, VVdouble>::iterator iterExp = _nodes2JumpsExp.begin();
+ for (; iterExp != _nodes2JumpsExp.end(); ++iterExp)
+ {
+ string nodeName = iterExp->first;
+ for (int termState = 0; termState < getCombinedAlphabetSize(); ++termState)
+ {
+ for (int jumpState = 0; jumpState < getCombinedAlphabetSize(); ++jumpState)
+ {
+
+ //(iter->second[termState][jumpState]) /= static_cast<MDOUBLE>(iterNum);
+ map<string, Vdouble>::iterator iterTerm = _totalTerminals.find(nodeName);
+ map<string, VVdouble>::iterator iterProb = _nodes2JumpsProb.find(nodeName);
+ if ((iterTerm==_totalTerminals.end()) || (iterProb==_nodes2JumpsProb.end()))
+ {
+ errorMsg::reportError("error in simulateJumps::runSimulation, unknown reason: cannot find nodeName in map");
+ }
+ if ((iterTerm->second[termState]==0)){ //never reached these terminal states
+ if ((iterExp->second[termState][jumpState]==0) && (iterProb->second[termState][jumpState]==0))
+ continue;//leave the value of _nodes2JumpsExp and _nodes2JumpsProb as zero
+ else {
+ errorMsg::reportError("error in simulateJumps::runSimulation, 0 times reached termState but non-zero for jumpCount");
+ }
+ }
+ (iterExp->second[termState][jumpState]) /= iterTerm->second[termState];
+
+ (iterProb->second[termState][jumpState]) /= iterTerm->second[termState];
+
+ }
+ }
+ }
+}
+
+
+MDOUBLE simulateJumps::getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId)
+{
+ map <string, VVdouble>::iterator pos;
+ if ((pos = _nodes2JumpsExp.find(nodeName)) == _nodes2JumpsExp.end())
+ {
+ string err="error in simulateJumps::getExpectation: cannot find node "+nodeName;
+ errorMsg::reportError(err);
+ }
+ int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
+ int combinedJumpState = getCombinedState(fromId, toId);
+ return (pos->second[combinedTerminalState][combinedJumpState]);
+}
+
+
+MDOUBLE simulateJumps::getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId){
+ map <string, VVdouble>::iterator pos;
+ if ((pos = _nodes2JumpsProb.find(nodeName)) == _nodes2JumpsProb.end())
+ {
+ string err="error in simulateJumps::getProb: cannot find node "+nodeName;
+ errorMsg::reportError(err);
+ }
+ int combinedTerminalState = getCombinedState(terminalStart, terminalEnd);
+ int combinedJumpState = getCombinedState(fromId, toId);
+ return (pos->second[combinedTerminalState][combinedJumpState]);
+}
\ No newline at end of file
diff --git a/libs/phylogeny/simulateJumps.h b/libs/phylogeny/simulateJumps.h
new file mode 100644
index 0000000..c03f7e0
--- /dev/null
+++ b/libs/phylogeny/simulateJumps.h
@@ -0,0 +1,48 @@
+#ifndef ___SIMULATE_JUMPS__
+#define ___SIMULATE_JUMPS__
+
+#include "simulateJumpsAbstract.h"
+using namespace std;
+
+/******************************************************************
+This class implements simulateJumpsAbstract for small alphabets: (tested so far up to 3)
+*******************************************************************/
+
+class simulateJumps:public simulateJumpsAbstract {
+public:
+ simulateJumps(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
+ virtual ~simulateJumps();
+
+ //for a branch length specified by a nodeName:
+ //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
+ //in which the starting character is terminalStart and the terminal character is terminalEnd
+ MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
+ //same as above, except here we return the probability of a jump from fromId to toId given
+ //terminal states terminalStart, terminalEnd in this branch
+ MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId);
+
+private:
+ void init();
+ void runOneIter(int state);
+ void computeExpectationsAndPosterior();
+
+
+private:
+
+ //_node2Jumps: maps a node name (which specify a branch length) to
+ //the expected number of jumps between any two characters along the branch leading from the father to this node
+ //given the terminal characters of this branch.
+ //The matrix is 2D and not 4D because we use a "combined alphabet" to make access easier. see getCombinedState() for details
+ //The first dimension is the combined terminal state and the second dimension is the combined jump state
+ map<string, VVdouble> _nodes2JumpsExp;
+
+ //_node2JumpsProb: maps a node name (which specify a branch length) to
+ //the probability of a jump between any two characters along the branch leading from the father to this node
+ //given the terminal characters of this branch.
+ //The matrix is 2D and not 4D because we use a "combined alphabet" to make access easier. see getCombinedState() for details
+ //The first dimension is the combined terminal state and the second dimension is the combined jump state
+ map<string, VVdouble> _nodes2JumpsProb;
+
+};
+
+#endif
diff --git a/libs/phylogeny/simulateJumpsAbstract.cpp b/libs/phylogeny/simulateJumpsAbstract.cpp
new file mode 100644
index 0000000..3ddf2b4
--- /dev/null
+++ b/libs/phylogeny/simulateJumpsAbstract.cpp
@@ -0,0 +1,44 @@
+#include "simulateJumpsAbstract.h"
+
+
+simulateJumpsAbstract::simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize)
+: _tree(inTree), _sp(sp), _alphabetSize(alphabetSize)
+{
+}
+
+
+
+//runSimulation: do the actual simulation. iterNum specifies the number of iterations starting from each state
+void simulateJumpsAbstract::runSimulation(int iterNum)
+{
+ init();
+ for (int state = 0; state < _alphabetSize; ++state)
+ {
+ for (int iter = 0; iter < iterNum; ++iter)
+ {
+ runOneIter(state);
+ }
+ }
+
+ computeExpectationsAndPosterior();
+}
+
+//////////////////////////////////////////////////////////
+//combined two characters into a combined state.
+//For example. if the alphabet is {0,1,2} then the combined alphabet will be {0,1...8}.
+//The states (terminalStart, terminalEnd) = (0,2) then combinedId = 2.
+//The states (terminalStart, terminalEnd) = (1,2) then combinedId = 5. etc.
+int simulateJumpsAbstract::getCombinedState(int terminalStart, int terminalEnd) const
+{
+ return (terminalStart * _alphabetSize + terminalEnd);
+}
+int simulateJumpsAbstract::getStartId(int combinedState) const
+{
+ return combinedState / _alphabetSize;
+}
+int simulateJumpsAbstract::getEndId(int combinedState) const
+{
+ return combinedState % _alphabetSize;
+}
+//////////////////////////////////////////////////////////
+
diff --git a/libs/phylogeny/simulateJumpsAbstract.h b/libs/phylogeny/simulateJumpsAbstract.h
new file mode 100644
index 0000000..9e43390
--- /dev/null
+++ b/libs/phylogeny/simulateJumpsAbstract.h
@@ -0,0 +1,73 @@
+#ifndef ___SIMULATE_JUMPS_ABSTRACT_
+#define ___SIMULATE_JUMPS_ABSTRACT_
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "alphabet.h"
+
+#include <map>
+#include <vector>
+using namespace std;
+
+/******************************************************************
+This is an abstract class to various implementations of simulateJumps.
+It was created to be a father class to the generic (original) implementation of
+simulateJumps class simulateJumps (working on alphabets of either 0,1,2 or 0,1
+and class simulateCodonsJumps which is a variant simulateJumps that can handle the
+61 sized alphabet without memory limitations.
+
+The simulateJumps algorithm simulates jumps (events) along differing branch lengths (according to a
+given tree), with the aim of giving the expectation of the number of jumps
+from state a to state b given that the terminal states at the end of the branch are
+x and y.
+*******************************************************************/
+
+class simulateJumpsAbstract {
+public:
+ simulateJumpsAbstract(const tree& inTree, const stochasticProcess& sp, const int alphabetSize);
+ virtual ~simulateJumpsAbstract(){}
+ virtual void runSimulation(int iterNum = 10000);
+
+ //for a branch length specified by a nodeName:
+ //give the expected number of jumps (changes) from fromId to toId that occured along the specified branh length,
+ //in which the starting character is terminalStart and the terminal character is terminalEnd
+ virtual MDOUBLE getExpectation(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0;
+ //same as above, except here we return the probability of a jump from fromId to toId given
+ //terminal states terminalStart, terminalEnd in this branch
+ virtual MDOUBLE getProb(const string& nodeName, int terminalStart, int terminalEnd, int fromId, int toId) = 0;
+
+protected:
+ virtual int getCombinedState(int terminalStart, int terminalEnd) const;
+ virtual int getCombinedAlphabetSize() const {return _alphabetSize*_alphabetSize;}
+ virtual int getStartId(int combinedState) const;
+ virtual int getEndId(int combinedState) const;
+
+ virtual void init() = 0;
+ virtual void runOneIter(int state) = 0;
+ virtual void computeExpectationsAndPosterior() = 0;
+
+ // a comparison function to be used in sort init
+ static bool compareDist(tree::nodeP node1, tree::nodeP node2){ return (node1->dis2father() < node2->dis2father());}
+
+
+protected:
+ tree _tree;
+ stochasticProcess _sp;
+ const int _alphabetSize;
+
+ Vdouble _waitingTimeParams;//each entry is the lambda parameter of the exponential distribution modeling the waiting time for "getting out" of state i
+
+ //_jumpProbs[i][j] is the probability of jumping from state i to state j (given that a change has ocured).
+ VVdouble _jumpProbs;
+
+ //the number of times we reached a certain combination of terminal states for each branch lengths
+ //e.g. the number of times we observed 0,1 at terminal states given branch length 0.03
+ //this is used to to afterwards normalize (i.e. compute the expectation) the _nodes2JumpsExp values
+ map<string, Vdouble> _totalTerminals;
+
+ vector<tree::nodeP> _orderNodesVec; //internal use: the branch are sorted in ascending order
+
+};
+
+#endif
diff --git a/libs/phylogeny/simulateTree.cpp b/libs/phylogeny/simulateTree.cpp
new file mode 100644
index 0000000..7f0ab03
--- /dev/null
+++ b/libs/phylogeny/simulateTree.cpp
@@ -0,0 +1,225 @@
+// $Id: simulateTree.cpp 3574 2008-02-27 10:44:30Z itaymay $
+
+#include "definitions.h"
+#include "treeUtil.h"
+#include "simulateTree.h"
+#include "talRandom.h"
+#include "gammaDistribution.h"
+#include "codon.h"
+
+simulateTree::simulateTree(const tree& _inEt,
+ const stochasticProcess& sp,
+ const alphabet* alph) :
+ _et(_inEt), _sp(sp),_alph(alph) {};
+
+simulateTree::~simulateTree() {}
+
+
+void simulateTree::generate_seq(int seqLength) {
+ sequence justAseq(_alph);
+ _simulatedSequences.resize(_et.getNodesNum(),justAseq);
+ for (int i=0; i < _simulatedSequences.size(); ++i) {
+ _simulatedSequences[i].resize(seqLength);
+ }
+ generateRootSeq(seqLength);
+
+ vector<MDOUBLE> rateVec(seqLength);
+ for (int h = 0; h < seqLength; h++) {
+ int theRanCat = getRandCategory(h);
+ rateVec[h] = _sp.rates(theRanCat);
+ }
+
+
+ for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
+ recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p));
+ }
+}
+
+void simulateTree::generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha, Vdouble rates)
+{
+ rates.clear();
+ rates.resize(seqLength);
+ for (int h = 0; h < seqLength; h++) {
+ rates[h] = talRandom::SampleGamma(alpha);
+ }
+}
+
+void simulateTree::generate_seq_continuous_gamma(int seqLength) {
+ sequence justAseq(_alph);
+ _simulatedSequences.resize(_et.getNodesNum(),justAseq);
+ for (int i=0; i < _simulatedSequences.size(); ++i) {
+ _simulatedSequences[i].resize(seqLength);
+ }
+ generateRootSeq(seqLength);
+
+ vector<MDOUBLE> rateVec(seqLength);
+ MDOUBLE alpha= (static_cast<gammaDistribution*>(_sp.distr()))->getAlpha();
+ for (int h = 0; h < seqLength; h++) {
+ rateVec[h] = talRandom::SampleGamma(alpha);
+ }
+
+
+ for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
+ recursiveGenerateSpecificSeq(rateVec, seqLength, _et.getRoot()->getSon(p));
+ }
+}
+
+void simulateTree::generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength)
+{
+ if (_alph->size() != 4)
+ errorMsg::reportError("generate_seqWithRateVectorNoStopCodon is applicable only for nucleotide process");
+ if (seqLength %3 != 0)
+ errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: seqLenth should be a multiplicative of 3");
+ if (simRates.size() != seqLength)
+ errorMsg::reportError("generate_seqWithRateVectorNoStopCodon: the size of simRates should be identical to seqLenth");
+
+// sequence justAseq(_alph);
+// vector<sequence> simulatedSequences(_et.getNodesNum(),justAseq);
+ vector<sequence> simulatedSequences;
+ //generate three nucleotide positions at a time. Repeat each position if the generated sequences contain stop codon
+ Vdouble rateVec(3);
+ bool bStopCodonFound = false;
+ codon codonAlph;
+ for (int p = 0; p < seqLength; p+=3)
+ {
+ rateVec[0] = simRates[p];
+ rateVec[1] = simRates[p+1];
+ rateVec[2] = simRates[p+2];
+ //generate 3 nucleotide positions with no stop codon
+ for (int loop = 0; loop < 1000; ++loop)
+ {
+ bStopCodonFound = false;
+ generate_seqWithRateVector(rateVec, 3);
+ for (int s = 0; s < _simulatedSequences.size(); ++s)
+ {
+ string codonStr = _simulatedSequences[s].toString();
+ if (codonAlph.isStopCodon(codonStr))
+ {
+ bStopCodonFound = true;
+ break;
+ }
+ }
+ if (!bStopCodonFound)
+ break;
+ }
+ if (bStopCodonFound)
+ errorMsg::reportError("Could not generate a position without stop codon");
+ //append positions to the positions generated so far
+ if (p == 0)
+ simulatedSequences = _simulatedSequences; //this will copy also the names of the sequences
+ else
+ {
+ for (int i = 0; i < simulatedSequences.size(); ++i)
+ simulatedSequences[i] += _simulatedSequences[i];
+ }
+ }
+ _simulatedSequences = simulatedSequences;
+}
+
+
+
+void simulateTree::generate_seqWithRateVector(const Vdouble& rateVec, const int seqLength) {
+ sequence justAseq(_alph);
+ _simulatedSequences.resize(_et.getNodesNum(),justAseq);
+ for (int i=0; i < _simulatedSequences.size(); ++i) {
+ _simulatedSequences[i].resize(seqLength);
+ }
+ generateRootSeq(seqLength);
+
+ for (int p=0 ; p < _et.getRoot()->getNumberOfSons() ; ++p) {
+ recursiveGenerateSpecificSeq(rateVec,seqLength,_et.getRoot()->getSon(p));
+ }
+}
+
+void simulateTree::generateRootSeq(int seqLength) {
+ for (int i = 0; i < seqLength; i++) {
+ _simulatedSequences[_et.getRoot()->id()][i] = giveRandomChar();
+ }
+
+ _simulatedSequences[_et.getRoot()->id()].setAlphabet(_alph);
+ _simulatedSequences[_et.getRoot()->id()].setName(_et.getRoot()->name());
+ _simulatedSequences[_et.getRoot()->id()].setID(_et.getRoot()->id());
+
+}
+
+
+void simulateTree::recursiveGenerateSpecificSeq(
+ const vector<MDOUBLE> &rateVec,
+ const int seqLength,
+ tree::nodeP myNode) {
+
+ for (int y = 0; y < seqLength; y++) {
+ MDOUBLE lenFromFather=myNode->dis2father()*rateVec[y];
+ int aaInFather = _simulatedSequences[myNode->father()->id()][y];
+ int newChar = giveRandomChar(aaInFather,lenFromFather,y);
+ _simulatedSequences[myNode->id()][y] = newChar;
+ }
+ _simulatedSequences[myNode->id()].setAlphabet(_alph);
+ _simulatedSequences[myNode->id()].setName(myNode->name());
+ _simulatedSequences[myNode->id()].setID(myNode->id());
+ for (int x =0 ; x < myNode->getNumberOfSons(); ++x) {
+ recursiveGenerateSpecificSeq(rateVec, seqLength, myNode->getSon(x));
+ }
+}
+
+int simulateTree::giveRandomChar() const {
+ for (int loop =0 ;loop<100000 ;loop++) {
+ MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
+ MDOUBLE sum = 0.0;
+ for (int j=0;j<_sp.alphabetSize();++j) {
+ sum+=_sp.freq(j);
+ if (theRandNum<sum) return j;
+ }
+ }
+ errorMsg::reportError("Could not give random character. The reason is probably that the P_i do not sum to one.");
+ return 1;
+}
+
+int simulateTree::giveRandomChar(const int letterInFatherNode,
+ const MDOUBLE length,
+ const int pos) const {
+ assert(letterInFatherNode>=0);
+ assert(letterInFatherNode<_sp.alphabetSize());
+ for (int loop =0 ;loop<100000 ;loop++) {
+ MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
+ MDOUBLE sum = 0.0;
+ for (int j=0;j<_sp.alphabetSize();++j) {
+ sum+=_sp.Pij_t(letterInFatherNode,j, length);
+ if (theRandNum<sum) return j;
+ }
+ }
+ errorMsg::reportError("Could not give random character. The reason is probably that the Pij_t do not sum to one.");
+ return 1;
+}
+
+
+int simulateTree::getRandCategory(const int pos) const {
+ MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1);
+ MDOUBLE sum = 0.0;
+ for (int j=0;j<_sp.categories() ;++j) {
+ sum+=_sp.ratesProb(j);
+ if (theRandNum<sum) return j;
+ }
+ errorMsg::reportError(" error in function simulateTree::getRandCategory() ");// also quit the program
+ return -1;
+}
+
+sequenceContainer simulateTree::toSeqData() {
+ sequenceContainer myseqData;
+ for (int i=0; i < _simulatedSequences.size(); ++i) {
+ myseqData.add(_simulatedSequences[i]);
+ }
+ return myseqData;
+}
+
+sequenceContainer simulateTree::toSeqDataWithoutInternalNodes() {
+ sequenceContainer myseqData;
+ for (int i=0; i < _simulatedSequences.size(); ++i) {
+ tree::nodeP theCurNode = _et.findNodeByName(_simulatedSequences[i].name());
+ if (theCurNode == NULL)
+ errorMsg::reportError("could not find the specified name: " + _simulatedSequences[i].name());
+ if (theCurNode->isInternal()) continue;
+ myseqData.add(_simulatedSequences[i]);
+ }
+ return myseqData;
+}
diff --git a/libs/phylogeny/simulateTree.h b/libs/phylogeny/simulateTree.h
new file mode 100644
index 0000000..a476a03
--- /dev/null
+++ b/libs/phylogeny/simulateTree.h
@@ -0,0 +1,49 @@
+// $Id: simulateTree.h 3574 2008-02-27 10:44:30Z itaymay $
+
+#ifndef ___SIMULATE_TREE
+#define ___SIMULATE_TREE
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+
+//class sequenceData; // to be able to go to simulate data.
+
+class simulateTree {
+public:
+ explicit simulateTree(const tree& _inEt,const stochasticProcess& sp,
+ const alphabet* alph);
+ void generate_seq(int seqLength);
+
+ // This function generates the sequences not using the discrete gamma, but rather,
+ // the rates are sampled from the continuous distribution.
+ // It assumes the Gamma distribution has mean 1 (alpha = beta).
+ void generate_seq_continuous_gamma(int seqLength);
+
+ void generate_seqWithRateVector(const Vdouble& simRates, const int seqLength);
+ //these function do the same simulation as above but check that no stop codon is created.
+ //applicable only when the stochasticProcess is based on nucleotides
+ void generate_seqWithRateVectorNoStopCodon(const Vdouble& simRates, int seqLength);
+
+ tree gettree() {return _et;}
+ virtual ~simulateTree();
+ sequenceContainer toSeqData();
+ sequenceContainer toSeqDataWithoutInternalNodes();
+ void generate_rates_continuous_gamma(const int seqLength,const MDOUBLE alpha,Vdouble rates);
+
+private:
+ void generateRootSeq(int seqLength);
+ void recursiveGenerateSpecificSeq(const Vdouble& rateVec, int seqLength, tree::nodeP myNode);
+ int giveRandomChar() const;
+ int giveRandomChar(const int letterInFatherNode, const MDOUBLE length,const int pos) const;
+ int getRandCategory(const int pos) const;
+
+ vector<sequence> _simulatedSequences; // the sequences (nodes * seqLen)
+ tree _et;
+ const stochasticProcess& _sp;
+ const alphabet* _alph;
+};
+
+#endif
+
diff --git a/libs/phylogeny/siteSpecificRate.cpp b/libs/phylogeny/siteSpecificRate.cpp
new file mode 100644
index 0000000..fbd2907
--- /dev/null
+++ b/libs/phylogeny/siteSpecificRate.cpp
@@ -0,0 +1,334 @@
+// $Id: siteSpecificRate.cpp 5059 2008-10-19 15:57:14Z cohenofi $
+
+#include "siteSpecificRate.h"
+#include "numRec.h"
+#include "checkcovFanctors.h"
+#include "definitions.h"
+
+
+/********************************************************************************************
+ML - full data (1)
+*********************************************************************************************/
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE maxRate,//20.0f
+ const MDOUBLE tol){//=0.0001f;
+
+ ratesV.resize(sc.seqLen());
+ likelihoodsV.resize(sc.seqLen());
+ MDOUBLE Lsum = 0.0;
+
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol);
+ assert(likelihoodsV[pos]>0.0);
+ Lsum += log(likelihoodsV[pos]);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+ return Lsum;
+}
+/********************************************************************************************
+ML - per Pos (1.1)
+*********************************************************************************************/
+// note that this places the likelihood, rather then the *log*likelihood into posL
+void computeML_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE& posL,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol) {
+ LOG(5,<<".");
+ MDOUBLE ax=0.00001f,bx=maxRate*0.25,cx=maxRate; // MN
+ posL=-brent(ax,bx,cx,Cevaluate_L_given_r(sc,et,sp,pos),tol,&bestRate);
+}
+
+
+/********************************************************************************************
+ML - full data AttributesVecs (1)
+*********************************************************************************************/
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol){
+ MDOUBLE Lsum = 0.0;
+ ratesV.resize(sc.seqLen()); // the rates themselves
+ likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position
+
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ LOG(5,<<".");
+ MDOUBLE bestR=-1.0; // tree1
+ // MDOUBLE LmaxR1=0;
+
+ // getting the right tree for the specific position:
+ const tree* treeForThisPosition=NULL;
+ if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) {
+ treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
+ } else {
+ errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)");
+ }
+
+ // getting the right stochastic process for the specific position:
+
+ const stochasticProcess* spForThisPosition=NULL;
+
+ if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) {
+ spForThisPosition = spVec[ spAttributesVec[pos] -1];
+ } else {
+ errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)");
+ }
+
+ computeML_siteSpecificRate(pos,sc,*spForThisPosition,*treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol);
+ ratesV[pos] = bestR;
+ assert(likelihoodsV[pos]>0.0);
+ Lsum += log(likelihoodsV[pos]);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+ return Lsum;
+}
+/********************************************************************************************
+ML - AttributesVecs (1.1)
+*********************************************************************************************/
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& treeAttributesVec, //treeAttributesVec
+ const vector<tree> & etVec,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol) {
+ Vint spAttributesVec(sc.seqLen(),1);
+ vector<const stochasticProcess* > spVec;
+ spVec.push_back(&sp);
+ return computeML_siteSpecificRate(ratesV,likelihoodsV,
+ spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
+}
+/********************************************************************************************
+ML - AttributesVecs (1.1)
+*********************************************************************************************/
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec, // spAttributesVec
+ const tree & et,
+ const vector<const stochasticProcess* > & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol){
+ Vint treeAttributesVec(sc.seqLen(),1);
+ vector<tree> etVec;
+ etVec.push_back(et);
+ return computeML_siteSpecificRate(ratesV,likelihoodsV,
+ spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
+}
+
+
+
+// THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. //
+/********************************************************************************************
+EB_EXP - full data (1)
+*********************************************************************************************/
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat, //2 fill (*LpostPerCat)[cat][pos]
+ unObservableData* unObservableData_p)
+{
+ ratesV.resize(sc.seqLen());
+ stdV.resize(sc.seqLen());
+ lowerBoundV.resize(sc.seqLen());
+ upperBoundV.resize(sc.seqLen());
+
+ computePijGam cpg;
+ cpg.fillPij(et,sp);
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,unObservableData_p);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+}
+
+
+/********************************************************************************************
+EB_EXP - per Pos (1.1)
+*********************************************************************************************/
+void computeEB_EXP_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const computePijGam& cpg,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE & stdRate,
+ MDOUBLE & lowerConf,
+ MDOUBLE & upperConf,
+ const MDOUBLE alphaConf, // alpha of 0.05 is considered 0.025 for each side.
+ VVdouble* LpostPerCat, //2 fill (*LpostPerCat)[cat][pos]
+ unObservableData* unObservableData_p)
+{
+ // here we compute P(r | data)
+ VdoubleRep pGivenR(sp.categories(),0.0);
+ doubleRep sum=0;
+ doubleRep LofPos_givenRateCat;
+ LOG(8,<<pos+1<<"\t"); //DEBUG
+ for (int cat=0; cat < sp.categories(); ++cat) {
+ LofPos_givenRateCat = likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp);
+
+// ver1 - fix likelihoodForEachCat by LforMissingDataPerCat
+ //if(unObservableData_p){
+ // LofPos_givenRateCat = LofPos_givenRateCat/(1- unObservableData_p->getLforMissingDataPerCat()[cat]);
+ //}
+// ver2 - fix likelihoodForEachCat by LforMissingDataAll
+ if(unObservableData_p){
+ LofPos_givenRateCat = LofPos_givenRateCat/(1- exp(unObservableData_p->getlogLforMissingData()));
+ }
+ pGivenR[cat] = LofPos_givenRateCat * sp.ratesProb(cat);
+ LOG(8,<<cat<<"\t"<<LofPos_givenRateCat<<"\t"); //DEBUG
+ sum+=pGivenR[cat];
+ }
+ LOG(8,<<"\n"); //DEBUG
+ assert(sum!=0);
+
+ // here we compute sigma r * P(r | data)
+ doubleRep sumOfSquares(0.0);
+ doubleRep bestRate_dblRep(0.0);
+
+ LOG(6,<<"Pos "<<pos<<" content = "<<sc[0][pos]<<" ,total likelihood = "<<sum<<endl); //DEBUG
+
+ for (int j=0; j < sp.categories(); ++j) {
+ pGivenR[j]/=sum; // So that pGivenR is probability.
+ // From here on we can convert it back
+ // to MDOUBLE because it's not a very
+ // small likelihood any more
+
+// ver3 - fix likelihoodForEachCat after multiplied by Prob - Error??
+ //if(unObservableData_p){
+ // pGivenR[j] = pGivenR[j]/(1- (unObservableData_p->getLforMissingDataPerCat())[j]) ; // Note: each postProbCat corrected by unObs of a
+ //}
+
+ if (LpostPerCat){
+ (*LpostPerCat)[j][pos]= convert(pGivenR[j]);
+ }
+ doubleRep tmp = pGivenR[j]*sp.rates(j);
+ bestRate_dblRep += tmp;
+ sumOfSquares += (tmp*sp.rates(j));
+ }
+
+ bestRate = convert(bestRate_dblRep);
+ MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate);
+ MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important
+ if (varRate<-tolerance)
+ LOGnOUT(3,<<"Error in computeEB_EXP_siteSpecificRate pos="<<pos<<", varRate="<<varRate<<" (< 0) \n");
+ if ((varRate<0) && (varRate>=-tolerance))
+ varRate = 0;
+ stdRate = sqrt(varRate);
+
+ // detecting the confidence intervals.
+ MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
+ doubleRep cdf = 0.0; // cumulative density function.
+ int k=0;
+ while (k < sp.categories()){
+ cdf += convert(pGivenR[k]);
+ if (cdf >oneSideConfAlpha) {
+ lowerConf = sp.rates(k);
+ break;
+ }
+ k++;
+ }
+ while (k < sp.categories()) {
+ if (cdf >(1.0-oneSideConfAlpha)) {
+ upperConf = sp.rates(k);
+ break;
+ }
+ ++k;
+ cdf += convert(pGivenR[k]);
+ }
+ if (k==sp.categories()) upperConf = sp.rates(k-1);
+}
+
+/********************************************************************************************
+EB_EXP - full data AttributesVecs (1)
+*********************************************************************************************/
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf){
+ ratesV.resize(sc.seqLen());
+ stdV.resize(sc.seqLen());
+ lowerBoundV.resize(sc.seqLen());
+ upperBoundV.resize(sc.seqLen());
+ for (int treeNum=0; treeNum<etVec.size(); ++treeNum) {
+ for (int spNum = 0; spNum<spVec.size(); ++spNum) {
+ computePijGam cpg;
+ cpg.fillPij(etVec[treeNum],*(spVec[spNum]));
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ if (((spAttributesVec[pos]-1)!=spNum ) || ((treeAttributesVec[pos]-1)!=treeNum )) continue;
+ const tree* treeForThisPosition=NULL;
+ assert ((etVec.size() >0 ) && (treeAttributesVec[pos]>0));
+ treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
+ const stochasticProcess* spForThisPosition=NULL;
+ assert ((spVec.size() >0 ) && (spAttributesVec[pos]>0));
+ spForThisPosition = spVec[ spAttributesVec[pos] -1];
+ computeEB_EXP_siteSpecificRate(pos,sc,*spForThisPosition,cpg, *treeForThisPosition,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ }
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+}
+
+/********************************************************************************************
+EB_EXP - AttributesVecs - one tree many sps
+*********************************************************************************************/
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const sequenceContainer& sc,
+ const tree & et,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf){
+ Vint etAttributesVec(sc.seqLen(),1);
+ vector<tree> etVec;
+ etVec.push_back(et);
+ computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf);
+}
+
+/********************************************************************************************
+EB_EXP - AttributesVecs - one sp many trees
+*********************************************************************************************/
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const stochasticProcess & sp,
+ const MDOUBLE alphaConf){
+ Vint spAttributesVec(sc.seqLen(),1);
+ vector<const stochasticProcess* > spVec;
+ spVec.push_back(&sp);
+ computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf);
+}
+
diff --git a/libs/phylogeny/siteSpecificRate.h b/libs/phylogeny/siteSpecificRate.h
new file mode 100644
index 0000000..1a44622
--- /dev/null
+++ b/libs/phylogeny/siteSpecificRate.h
@@ -0,0 +1,138 @@
+// $Id: siteSpecificRate.h 4742 2008-08-19 17:40:56Z cohenofi $
+
+#ifndef ___SITE_SPECIFIC_RATE
+#define ___SITE_SPECIFIC_RATE
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "computePijComponent.h"
+#include "unObservableData.h"
+
+
+// the function returns the total log-likelihood of the rates.
+// it is used for computing the rates, when there is one tree common to
+// all positions and 1 stochastic process common to all position.
+
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const sequenceContainer& sd,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE maxRate=20.0f,
+ const MDOUBLE tol=0.0001f);
+
+// this function is the same as the one above, but here, each site can have its
+//own tree, or its own stochastic process.
+//etVec: a vector of possible trees.
+//spVec: a vector of possible stochastic processes.
+//treeAttributesVec: defines which tree is assigned to a specific position.
+//NOTE: the possible attributes are 1,2..., so that the tree for position i
+//is etVec[treeAttributesVec[i]-1]
+//The same is true for the stochastic process atributes vector.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// this function is the same as the one above, but here,
+// there are only tree attributes.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// this function is the same as the one above, but here,
+// there are only stochastic process attributes.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const tree & et,
+ const vector<const stochasticProcess* > & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+void computeML_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE& posL,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// BAYESIAN PART
+
+// 1 sequence container, 1 tree, 1 position
+void computeEB_EXP_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const computePijGam& cpg,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE & stdRate,
+ MDOUBLE & lowerConf,
+ MDOUBLE & upperConf,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat=NULL,
+ unObservableData* unObservableData_p=NULL);
+
+// 1 stochastic process, 1 tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat=NULL,
+ unObservableData* unObservableData_p=NULL);
+
+
+// many stochastic process, many tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf);
+
+// many stochastic process, 1 tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const sequenceContainer& sc,
+ const tree & et,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf);
+
+// 1 stochastic process, many tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const stochasticProcess & sp,
+ const MDOUBLE alphaConf);
+#endif
+
diff --git a/libs/phylogeny/siteSpecificRateGL.cpp b/libs/phylogeny/siteSpecificRateGL.cpp
new file mode 100644
index 0000000..86a9f9e
--- /dev/null
+++ b/libs/phylogeny/siteSpecificRateGL.cpp
@@ -0,0 +1,299 @@
+// $Id: siteSpecificRate.cpp 3658 2008-03-05 09:25:46Z cohenofi $
+
+#include "siteSpecificRateGL.h"
+#include "numRec.h"
+#include "checkcovFanctors.h"
+#include "definitions.h"
+
+using namespace siteSpecificRateGL;
+
+MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE maxRate,//20.0f
+ const MDOUBLE tol){//=0.0001f;
+
+ ratesV.resize(sc.seqLen());
+ likelihoodsV.resize(sc.seqLen());
+ MDOUBLE Lsum = 0.0;
+
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,sp,et,ratesV[pos],likelihoodsV[pos],maxRate,tol);
+ assert(likelihoodsV[pos]>0.0);
+ Lsum += log(likelihoodsV[pos]);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+ return Lsum;
+}
+
+MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol){
+ MDOUBLE Lsum = 0.0;
+ ratesV.resize(sc.seqLen()); // the rates themselves
+ likelihoodsV.resize(sc.seqLen()); // the log likelihood of each position
+
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ LOG(5,<<".");
+ MDOUBLE bestR=-1.0; // tree1
+ // MDOUBLE LmaxR1=0;
+
+ // getting the right tree for the specific position:
+ const tree* treeForThisPosition=NULL;
+ if ((etVec.size() >0 ) && (treeAttributesVec[pos]>0)) {
+ treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
+ } else {
+ errorMsg::reportError("tree vector is empty, or treeAttribute is empty, or treeAttribute[pos] is zero (it should be one)");
+ }
+
+ // getting the right stochastic process for the specific position:
+
+ const stochasticProcess* spForThisPosition=NULL;
+
+ if ((spVec.size() >0 ) && (spAttributesVec[pos]>0)) {
+ spForThisPosition = spVec[ spAttributesVec[pos] -1];
+ } else {
+ errorMsg::reportError("stochastic process vector is empty, or spAttributesVec is empty, or spAttribute[pos] is zero (it should be one)");
+ }
+
+ siteSpecificRateGL::computeML_siteSpecificRate(pos,sc,*spForThisPosition,*treeForThisPosition,bestR,likelihoodsV[pos],maxRate,tol);
+ ratesV[pos] = bestR;
+ assert(likelihoodsV[pos]>0.0);
+ Lsum += log(likelihoodsV[pos]);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+ return Lsum;
+}
+
+// note that this places the likelihood, rather then the *log*likelihood into posL
+void siteSpecificRateGL::computeML_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE& posL,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol) {
+ LOG(5,<<".");
+ MDOUBLE ax=0.00001f,bx=maxRate*0.25,cx=maxRate; // MN
+ posL=-brent(ax,bx,cx,Cevaluate_L_given_r(sc,et,sp,pos),tol,&bestRate);
+}
+
+MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol) {
+ Vint spAttributesVec(sc.seqLen(),1);
+ vector<const stochasticProcess* > spVec;
+ spVec.push_back(&sp);
+ return computeML_siteSpecificRate(ratesV,likelihoodsV,
+ spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
+}
+
+MDOUBLE siteSpecificRateGL::computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const tree & et,
+ const vector<const stochasticProcess* > & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol){
+ Vint treeAttributesVec(sc.seqLen(),1);
+ vector<tree> etVec;
+ etVec.push_back(et);
+ return siteSpecificRateGL::computeML_siteSpecificRate(ratesV,likelihoodsV,
+ spAttributesVec,treeAttributesVec,etVec,spVec,sc,maxRate,tol);
+}
+
+
+// THE BAYESIAN EB_EXP PART OF RATE ESTIMATION. //
+
+void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const computePijGam& cpg,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE & stdRate,
+ MDOUBLE & lowerConf,
+ MDOUBLE & upperConf,
+ const MDOUBLE alphaConf, // alpha of 0.05 is considered 0.025 for each side.
+ VVdouble* LpostPerCat,
+ Vdouble* pLforMissingDataPerCat)
+{
+ // here we compute P(r | data)
+ VdoubleRep pGivenR(sp.categories(),0.0);
+ doubleRep sum=0;
+ MDOUBLE LofPos_givenRateCat;
+ LOG(8,<<pos+1<<"\t"); //DEBUG
+ for (int cat=0; cat < sp.categories(); ++cat) {
+ LofPos_givenRateCat = convert(likelihoodComputation::getLofPos(pos,et,sc,cpg[cat],sp));
+ if(pLforMissingDataPerCat){
+ LofPos_givenRateCat = LofPos_givenRateCat/(1- (*pLforMissingDataPerCat)[cat]);
+ }
+ pGivenR[cat] = LofPos_givenRateCat*sp.ratesProb(cat);
+ LOG(8,<<cat<<"\t"<<LofPos_givenRateCat<<"\t"); //DEBUG
+ sum+=pGivenR[cat];
+ }
+ LOG(8,<<"\n"); //DEBUG
+ assert(sum!=0);
+
+ // here we compute sigma r * P(r | data)
+
+ doubleRep sumOfSquares(0.0);
+ doubleRep bestRate_dblRep(0.0);
+
+ LOG(5,<<"Pos "<<pos<<" content = "<<sc[0][pos]<<" ,total likelihood = "<<sum<<endl); //DEBUG
+
+ for (int j=0; j < sp.categories(); ++j) {
+ pGivenR[j]/=sum; // So that pGivenR is probability.
+ // From here on we can convert it back
+ // to MDOUBLE because it's not a very
+ // small likelihood any more
+ if (LpostPerCat){
+ (*LpostPerCat)[j][pos]= convert(pGivenR[j]);
+ }
+ doubleRep tmp = pGivenR[j]*sp.rates(j);
+ bestRate_dblRep += tmp;
+ sumOfSquares += (tmp*sp.rates(j));
+
+ }
+
+ bestRate = convert(bestRate_dblRep);
+ MDOUBLE varRate = convert(sumOfSquares) - convert(bestRate*bestRate);
+ MDOUBLE tolerance = 0.0001; // tolerance for variance is not very exact, and also exact computation not very important
+ if (varRate<-tolerance)
+ errorMsg::reportError("Error in computeEB_EXP_siteSpecificRate, varRate < 0");
+ if ((varRate<0) && (varRate>=-tolerance))
+ varRate = 0;
+ stdRate = sqrt(varRate);
+
+ // detecting the confidence intervals.
+ MDOUBLE oneSideConfAlpha = alphaConf/2.0; // because we are computing the two tail.
+ doubleRep cdf = 0.0; // cumulative density function.
+ int k=0;
+ while (k < sp.categories()){
+ cdf += convert(pGivenR[k]);
+ if (cdf >oneSideConfAlpha) {
+ lowerConf = sp.rates(k);
+ break;
+ }
+ k++;
+ }
+ while (k < sp.categories()) {
+ if (cdf >(1.0-oneSideConfAlpha)) {
+ upperConf = sp.rates(k);
+ break;
+ }
+ ++k;
+ cdf += convert(pGivenR[k]);
+ }
+ if (k==sp.categories()) upperConf = sp.rates(k-1);
+}
+
+void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat,
+ Vdouble* pLforMissingDataPerCat)
+{
+ ratesV.resize(sc.seqLen());
+ stdV.resize(sc.seqLen());
+ lowerBoundV.resize(sc.seqLen());
+ upperBoundV.resize(sc.seqLen());
+
+ computePijGam cpg;
+ cpg.fillPij(et,sp);
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,sp,cpg, et,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf,LpostPerCat,pLforMissingDataPerCat);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+}
+
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf){
+ ratesV.resize(sc.seqLen());
+ stdV.resize(sc.seqLen());
+ lowerBoundV.resize(sc.seqLen());
+ upperBoundV.resize(sc.seqLen());
+ for (int treeNum=0; treeNum<etVec.size(); ++treeNum) {
+ for (int spNum = 0; spNum<spVec.size(); ++spNum) {
+ computePijGam cpg;
+ cpg.fillPij(etVec[treeNum],*(spVec[spNum]));
+ for (int pos=0; pos < sc.seqLen(); ++pos) {
+ if (((spAttributesVec[pos]-1)!=spNum ) || ((treeAttributesVec[pos]-1)!=treeNum )) continue;
+ const tree* treeForThisPosition=NULL;
+ assert ((etVec.size() >0 ) && (treeAttributesVec[pos]>0));
+ treeForThisPosition = & etVec[ treeAttributesVec[pos] -1];
+ const stochasticProcess* spForThisPosition=NULL;
+ assert ((spVec.size() >0 ) && (spAttributesVec[pos]>0));
+ spForThisPosition = spVec[ spAttributesVec[pos] -1];
+ siteSpecificRateGL::computeEB_EXP_siteSpecificRate(pos,sc,*spForThisPosition,cpg, *treeForThisPosition,ratesV[pos],stdV[pos],lowerBoundV[pos],upperBoundV[pos],alphaConf);
+ LOG(5,<<" rate of pos: "<<pos<<" = "<<ratesV[pos]<<endl);
+ }
+ }
+ }
+ LOG(5,<<" number of sites: "<<sc.seqLen()<<endl);
+}
+
+// one tree many sps
+void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const sequenceContainer& sc,
+ const tree & et,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf){
+ Vint etAttributesVec(sc.seqLen(),1);
+ vector<tree> etVec;
+ etVec.push_back(et);
+ siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,etAttributesVec,sc,etVec,spVec,alphaConf);
+}
+
+// one sp many trees
+
+void siteSpecificRateGL::computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const stochasticProcess & sp,
+ const MDOUBLE alphaConf){
+ Vint spAttributesVec(sc.seqLen(),1);
+ vector<const stochasticProcess* > spVec;
+ spVec.push_back(&sp);
+ siteSpecificRateGL::computeEB_EXP_siteSpecificRate(ratesV,stdV,lowerBoundV,upperBoundV,spAttributesVec,treeAttributesVec,sc,etVec,spVec,alphaConf);
+}
+
diff --git a/libs/phylogeny/siteSpecificRateGL.h b/libs/phylogeny/siteSpecificRateGL.h
new file mode 100644
index 0000000..3eb1c65
--- /dev/null
+++ b/libs/phylogeny/siteSpecificRateGL.h
@@ -0,0 +1,141 @@
+// $Id: siteSpecificRate.h 3428 2008-01-30 12:30:46Z cohenofi $
+
+#ifndef ___SITE_SPECIFIC_RATE_GL_
+#define ___SITE_SPECIFIC_RATE_GL_
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "computePijComponent.h"
+//#include "likelihoodComputationGL.h"
+
+// the function returns the total log-likelihood of the rates.
+// it is used for computing the rates, when there is one tree common to
+// all positions and 1 stochastic process common to all position.
+
+namespace siteSpecificRateGL {
+
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const sequenceContainer& sd,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE maxRate=20.0f,
+ const MDOUBLE tol=0.0001f);
+
+// this function is the same as the one above, but here, each site can have its
+//own tree, or its own stochastic process.
+//etVec: a vector of possible trees.
+//spVec: a vector of possible stochastic processes.
+//treeAttributesVec: defines which tree is assigned to a specific position.
+//NOTE: the possible attributes are 1,2..., so that the tree for position i
+//is etVec[treeAttributesVec[i]-1]
+//The same is true for the stochastic process atributes vector.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// this function is the same as the one above, but here,
+// there are only tree attributes.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& treeAttributesVec,
+ const vector<tree> & etVec,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// this function is the same as the one above, but here,
+// there are only stochastic process attributes.
+MDOUBLE computeML_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & likelihoodsV,
+ const Vint& spAttributesVec,
+ const tree & et,
+ const vector<const stochasticProcess* > & spVec,
+ const sequenceContainer& sc,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+void computeML_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE& posL,
+ const MDOUBLE maxRate,
+ const MDOUBLE tol);
+
+// BAYESIAN PART
+
+// 1 sequence container, 1 tree, 1 position
+void computeEB_EXP_siteSpecificRate(int pos,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const computePijGam& cpg,
+ const tree &et,
+ MDOUBLE& bestRate,
+ MDOUBLE & stdRate,
+ MDOUBLE & lowerConf,
+ MDOUBLE & upperConf,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat=NULL,
+ Vdouble* pLforMissingDataPerCat=NULL);
+
+// 1 stochastic process, 1 tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const sequenceContainer& sc,
+ const stochasticProcess& sp,
+ const tree& et,
+ const MDOUBLE alphaConf,
+ VVdouble* LpostPerCat=NULL,
+ Vdouble* pLforMissingDataPerCat=NULL);
+
+// many stochastic process, many tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf);
+
+// many stochastic process, 1 tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& spAttributesVec,
+ const sequenceContainer& sc,
+ const tree & et,
+ const vector<const stochasticProcess *> & spVec,
+ const MDOUBLE alphaConf);
+
+// 1 stochastic process, many tree, all positions
+void computeEB_EXP_siteSpecificRate(Vdouble & ratesV,
+ Vdouble & stdV,
+ Vdouble & lowerBoundV,
+ Vdouble & upperBoundV,
+ const Vint& treeAttributesVec,
+ const sequenceContainer& sc,
+ const vector<tree> & etVec,
+ const stochasticProcess & sp,
+ const MDOUBLE alphaConf);
+
+};
+
+#endif
+
diff --git a/libs/phylogeny/someUtil.cpp b/libs/phylogeny/someUtil.cpp
new file mode 100644
index 0000000..3a06380
--- /dev/null
+++ b/libs/phylogeny/someUtil.cpp
@@ -0,0 +1,822 @@
+// $Id: someUtil.cpp 6055 2009-04-03 21:19:38Z rubi $
+
+#include "someUtil.h"
+#include "errorMsg.h"
+#include "talRandom.h"
+#include <cmath>
+#include <ctime>
+#include <iterator>
+#include <algorithm>
+#include <string>
+#include <cctype>
+#include <cassert>
+using namespace std;
+
+// for the _mkdir call
+#if defined(WIN32) || defined(SunOS) || defined(solaris)
+ #include <direct.h>
+#else
+ #include <sys/file.h>
+ #include <dirent.h>
+// #include <io.h>
+#endif
+
+//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
+//used in functoin mnbrack below.
+void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d) {
+ a=b;
+ b=c;
+ c=d;
+}
+
+MDOUBLE computeAverage(const vector<int>& vec) {
+ MDOUBLE sum=0.0;
+ for (int i=0; i < vec.size(); ++i) {
+ sum+=static_cast<MDOUBLE>(vec[i]);
+ }
+ return sum/static_cast<MDOUBLE>(vec.size());
+}
+
+// X ~ Poisson(lamda) --> P(X=k) = ((lamda^k)/k!) * e^(-lamda)
+// It isn't smart to first calculate factorial(k) because the size of long int limits this calculation to k<=13
+MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda)
+{
+ assert(k>=0);
+ long double tmp = pow(lamda,k); // tmp = (lamda^k)/k!
+
+ for (int i=2; i<=k; ++i)
+ tmp/=i;
+
+ return (tmp * exp(-lamda));
+}
+
+
+MDOUBLE computeAverage(const vector<MDOUBLE>& vec) {
+ MDOUBLE sum=0.0;
+ for (int i=0; i < vec.size(); ++i) sum+=vec[i];
+ return sum/static_cast<MDOUBLE>(vec.size());
+}
+
+MDOUBLE computeStd(const vector<int>& vec) {// page 60, Sokal and Rohlf
+ MDOUBLE sum=0.0;
+ MDOUBLE sumSqr=0.0;
+ MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
+ for (int i=0; i < vec.size(); ++i) {
+ sum+=static_cast<MDOUBLE>(vec[i]);
+ sumSqr+=(static_cast<MDOUBLE>(vec[i])*static_cast<MDOUBLE>(vec[i]));
+ }
+ MDOUBLE res= sumSqr-(sum*sum/vecSize);
+ res /= (vecSize-1.0);
+ res = sqrt(res);
+ return res;
+}
+
+MDOUBLE computeStd(const vector<MDOUBLE>& vec) {// page 60, Sokal and Rohlf
+ MDOUBLE sum=0.0;
+ MDOUBLE sumSqr=0.0;
+ MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
+ for (int i=0; i < vec.size(); ++i) {
+ sum+=vec[i];
+ sumSqr+=(vec[i]*vec[i]);
+ }
+ MDOUBLE res= sumSqr-(sum*sum/vecSize);
+ res /= (vecSize-1.0);
+ res = sqrt(res);
+ return res;
+}
+
+void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs){
+ MDOUBLE proportionAfterOptimization = 1.0 - newValFreq;
+ MDOUBLE proportionBeforeOptimization = 1.0 - freqs[indexNewFreq];
+ MDOUBLE sum = 0.0;
+ for (int i=0; i<freqs.size(); ++i) {
+ if (i==indexNewFreq){
+ freqs[i] = newValFreq;
+ }
+ else {
+ freqs[i] = proportionAfterOptimization*freqs[i]/proportionBeforeOptimization;
+ }
+ sum+=freqs[i];
+ }
+ if (!DEQUAL(sum,1.0)) {
+ errorMsg::reportError("Error in computeRelativeFreqsFollowingOneChanged, sum not equal to 1");
+ }
+}
+
+
+char mytolower(char in){return tolower(in);}
+char mytoupper(char in){return toupper(in);}
+
+void toLower(string& str) {
+ transform (str.begin(), str.end(), str.begin(), mytolower);
+}
+void toUpper(string& str) {
+ transform (str.begin(), str.end(), str.begin(), mytoupper);
+}
+bool allowCharSet(const string& allowableChars, const string& string2check) {
+// this function check if all the character in string2check are made of characters from allowableChars
+ for (int i=0; i < string2check.size(); ++i) {
+ // now checking for string2check[i]
+ int j;
+ for (j=0; j < allowableChars.size(); ++j) {
+ if (string2check[i] == allowableChars[j]) {
+ break;
+ }
+ }
+ if (j==allowableChars.size()) return false;
+ }
+ return true;
+}
+
+bool isCharInString(const string& stringToCheck, const char charToCheck) {
+ for (int i=0; i < stringToCheck.size(); ++i ) {
+ if (stringToCheck[i] == charToCheck) return true;
+ }
+ return false;
+}
+
+string double2string(const double x, const int lenght){
+
+ // first getting the integer part:
+ //Itay: fixing bug regarding negative floats
+ double x_abs = fabs(x);
+ int theIntegerPart = static_cast<int>(x_abs);
+ double theRemainingPart = fabs(x_abs-theIntegerPart);
+ int integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght));
+ string part1 = int2string(theIntegerPart);
+ string part2 = int2string(integerRepresentingTheRemainingPart);
+ while (part2.length()<lenght){
+ part2.insert(0, "0");
+ }
+
+ string result("");
+ if (x < 0.0)
+ result += "-";
+ result += part1;
+ result += ".";
+ result += part2;
+
+ // removing 0 from the end
+ int i = result.length()-1;
+ while (result[i]!='.' && i>0 && result[i]=='0'){
+ result.erase(i);
+ i--;
+ }
+
+ // removing "." if this is the last character in the string.
+ if (result[result.length()-1]=='.')
+ result.erase(result.length()-1);
+
+ return result;
+}
+
+string int2string(const int num) {
+// the input to this program is say 56
+// the output is the string "56"
+// this version of int2string is more portable
+// than sprintf like functions from c;
+// or sstream of stl.
+ if (num == 0) return "0";
+ string res;
+ int i = abs(num);
+
+
+ int leftover;
+ char k;
+ while (i) {
+ leftover = i%10;
+ k = '0'+leftover;
+ res = k+res;
+ i/=10;
+ }
+ if (num<0) res = "-" + res;
+ return res;
+};
+
+void printTime(ostream& out) {
+ time_t ltime;
+ time( <ime );
+ out<<"# the date is "<< ctime( <ime )<<endl;
+}
+
+MDOUBLE string2double(const string& inString) {
+
+ if (allowCharSet("0123456789.eE+-",inString) == false) {
+ errorMsg::reportError(" error in function string2double ");
+ }
+
+ // first decide if the format is like 0.00343 (regularFormat) or
+ // if it is in the form of 0.34e-006 for example
+
+ bool regularFormat = true;
+ int i;
+ for (i=0; i < inString.size(); ++i) {
+ if ((inString[i] == 'e' ) || (inString[i] == 'E' )) {
+ regularFormat = false;
+ break;
+ }
+ }
+
+ if (regularFormat) {
+ MDOUBLE dDistance = atof(inString.c_str());
+ return dDistance;
+ }
+ else {
+ string b4TheExp;
+ bool plusAfterTheExp = true;
+ string afterTheExp;
+
+ // b4 the exp
+ for (i=0; i < inString.size(); ++i) {
+ if (inString[i] != 'e' ) {
+ b4TheExp += inString[i];
+ }
+ else break;
+ }
+ ++i; //now standing after the exp;
+ if (inString[i] == '-' ) {
+ plusAfterTheExp = false;
+ ++i;
+ }
+ else if (inString[i] == '+' ) {
+ plusAfterTheExp = true;
+ ++i;
+ }
+ else plusAfterTheExp = true; // the number is like 0.34e43
+
+ for (; i < inString.size(); ++i) {
+ afterTheExp += inString[i];
+ }
+
+ MDOUBLE res = 0.0;
+ MDOUBLE dDistance = atof(b4TheExp.c_str());
+ int exponentialFactor = atoi(afterTheExp.c_str());
+ if (plusAfterTheExp) res = dDistance * pow(10.0,exponentialFactor);
+ else res = dDistance * pow(10.0,-exponentialFactor);
+
+ return res;
+ }
+
+
+}
+
+
+bool checkThatFileExist(const string& fileName) {
+ ifstream file1(fileName.c_str());
+ if (file1==NULL) return false;
+ file1.close();
+ return true;
+}
+
+void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile){
+ inseqFile.clear();
+ string tmp1;
+ while (getline(infile,tmp1, '\n' ) ) {
+ if (tmp1.size() > 15000) {
+ vector<string> err;
+ err.push_back("Unable to read file. It is required that each line is no longer than");
+ err.push_back("15000 characters. ");
+ errorMsg::reportError(err,1);
+ }
+ if (tmp1[tmp1.size()-1]=='\r') {// in case we are reading a dos file
+ tmp1.erase(tmp1.size()-1);
+ }// remove the traling carrige-return
+ inseqFile.push_back(tmp1);
+ }
+}
+
+bool fromStringIterToInt(string::const_iterator & it, // ref must be here
+ const string::const_iterator endOfString,
+ int& res) {// the ref is so that we can use the it after the func.
+ while (it != endOfString) {
+ if ((*it == ' ') || (*it == '\t')) ++it;else break; // skeeping white spaces.
+ }
+ if (it != endOfString) {
+ if (isdigit(*it) || (*it == '-')){
+ int k = atoi(&*it);
+ if (*it == '-') ++it;
+ for (int numDig = abs(k); numDig>0; numDig/=10) ++it;
+ res = k;
+ return true;
+ }
+ else return false; //unable to read int From String
+ }
+ return false; //unable to read int From String
+
+}
+
+string* searchStringInFile(const string& string2find,
+ const int index,
+ const string& inFileName) {
+ ifstream f;
+ f.open(inFileName.c_str());
+ if (f==NULL) {
+ string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
+ errorMsg::reportError(tmp);
+ }
+
+ string numm = int2string(index);
+ string realString2find = string2find+numm;
+
+ istream_iterator<string> is_string(f);
+ istream_iterator<string> end_of_stream;
+
+ is_string = find(is_string,end_of_stream,realString2find);
+ if(is_string == end_of_stream) {f.close();return NULL;}
+ else {
+ is_string++;
+ if(is_string == end_of_stream) {f.close();return NULL;};
+ string* s = new string(*is_string);
+ f.close();
+ return s;
+ }
+ f.close();
+ return NULL;
+}
+string* searchStringInFile(const string& string2find,
+ const string& inFileName) {// return the string that is AFTER the string to search.
+ ifstream f;
+ f.open(inFileName.c_str());
+ if (f==NULL) {
+ string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
+ errorMsg::reportError(tmp);
+ }
+ string realString2find = string2find;
+
+ istream_iterator<string> is_string(f);
+ istream_iterator<string> end_of_stream;
+
+ is_string = find(is_string,end_of_stream,realString2find);
+ if(is_string == end_of_stream) {f.close();return NULL;}
+ else {
+ is_string++;
+ if(is_string == end_of_stream) {f.close();return NULL;};
+ string* s = new string(*is_string);
+ f.close();
+ return s;
+ }
+ f.close();
+ return NULL;
+}
+bool doesWordExistInFile(const string& string2find,const string& inFileName) {
+ ifstream f;
+ f.open(inFileName.c_str());
+ if (f==NULL) {
+ string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile";
+ errorMsg::reportError(tmp);
+ }
+
+ istream_iterator<string> is_string(f);
+ istream_iterator<string> end_of_stream;
+
+ is_string = find(is_string,end_of_stream,string2find);
+ if(is_string == end_of_stream) return false;
+ else return true;
+}
+
+string takeCharOutOfString(const string& charsToTakeOut, const string& fromString) {
+ string finalString;
+ for (int i=0; i<fromString.size(); ++i) {
+ bool goodChar = true;
+ for (int j=0; j < charsToTakeOut.size(); ++j) {
+ if (fromString[i]== charsToTakeOut[j]) goodChar = false;
+ }
+ if (goodChar) finalString+=fromString[i];
+ }
+ return finalString;
+}
+
+bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/) {
+ return (fabs(x1-x2)<epsilon);
+}
+
+bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/){
+ return ((x1 > x2) || DEQUAL(x1, x2,epsilon));
+}
+
+
+bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/){
+ return ((x1 < x2) || DEQUAL(x1, x2,epsilon));
+}
+
+void createDir(const string & curDir, const string & dirName){// COPYRIGHT OF ITAY MAYROSE.
+ string newDir;
+ if (curDir == "")
+ newDir = dirName;
+ else
+ newDir = curDir + string("/") + dirName;
+#ifdef WIN32
+ if( _mkdir(newDir.c_str()) == 0 ){
+ LOG(5, << "Directory " <<newDir<<" was successfully created"<<endl);
+ }else{
+ if (errno == EEXIST) {
+ LOG(5,<<"Directory already exist");
+ return;
+ } else {
+ string err = "Problem creating directory " + newDir + " \n";
+ LOG(5, << err << endl);
+ errorMsg::reportError(err);
+ }
+ }
+#else
+ DIR * directory = opendir(newDir.c_str());
+ if (directory == NULL) {
+ string sysCall = "mkdir " + newDir;
+ system(sysCall.c_str());
+ }
+ else{
+ string err = "Directory " + newDir + " already exists \n";
+ LOG(5, << err << endl);
+ //errorMsg::reportError(err);
+
+ }
+#endif
+}
+
+//scale vecToScale so that its new average is AvgIn. return the scaling factor.
+MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn)
+{
+ int vecSize = vecToScale.size();
+ MDOUBLE sum = 0;
+ for (int x = 0; x<vecSize; ++x)
+ {
+ sum += vecToScale[x];
+ }
+ MDOUBLE avg = sum/vecSize;
+ MDOUBLE scaleFactor = avgIn / avg;
+
+ for (int i = 0; i<vecSize; ++i)
+ {
+ vecToScale[i] *= scaleFactor;
+ }
+
+ MDOUBLE newAvg = computeAverage(vecToScale);
+ if (fabs(newAvg - avgIn) > 0.001)
+ errorMsg::reportError(" problem - scalled average is not avgIn after scalling!!!");
+ return scaleFactor;
+}
+
+//calculates the mean square error distance between 2 vectors:
+MDOUBLE calcMSEDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
+{
+ MDOUBLE res = 0.0;
+ if (oneRatesVec.size() != otherRatesVec.size())
+ errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
+
+ for (int i=0; i<oneRatesVec.size(); ++i)
+ {
+ MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
+ res += diff * diff;
+ }
+
+ res /= oneRatesVec.size();
+ return res;
+}
+
+//calculates the mean absolute deviations distance between 2 vectors:
+MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
+{
+ MDOUBLE res = 0.0;
+ if (oneRatesVec.size() != otherRatesVec.size())
+ errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
+
+ for (int i=0; i<oneRatesVec.size(); ++i)
+ {
+ MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
+ res += fabs(diff);
+ }
+
+ res /= oneRatesVec.size();
+ return res;
+}
+
+MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/*0.0*/)
+{
+ MDOUBLE res = 0.0;
+ if (inferredValues.size() != trueValues.size())
+ errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
+
+ int counter = 0;
+ for (int i=0; i<inferredValues.size(); ++i)
+ {
+ if (trueValues[i] < threshhold)
+ continue;
+ MDOUBLE diff = fabs(inferredValues[i] - trueValues[i]);
+ res += (diff / trueValues[i]);
+ ++counter;
+ }
+
+ res /= counter;
+ return res;
+}
+
+//calculates the relative mean square error distance between 2 vectors:
+//The difference from a regualar MSE is that for each position the squared difference is devided by the true value
+//if threshhold > 0: if trueValues[i] < threshhold then do not add the rse for this psition to the result
+MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/*0.0*/ )
+{
+ MDOUBLE res = 0.0;
+ if (inferredValues.size() != trueValues.size())
+ errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
+
+ int counter = 0;
+ for (int i=0; i<inferredValues.size(); ++i)
+ {
+ if (trueValues[i] < threshhold)
+ continue;
+ MDOUBLE diff = inferredValues[i] - trueValues[i];
+ res += diff * diff / trueValues[i];
+ ++counter;
+ }
+
+ res /= counter;
+ return res;
+}
+
+
+MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
+{
+ MDOUBLE res = 0.0;
+ Vdouble orderVec1, orderVec2;
+ MDOUBLE s_one = orderVec(oneRatesVec, orderVec1);
+ MDOUBLE s_two = orderVec(otherRatesVec, orderVec2);
+ int seqLength = oneRatesVec.size();
+ MDOUBLE diff, sum_diff_sqr = 0;
+ for (int i=0; i<seqLength; ++i)
+ {
+ diff = orderVec1[i] - orderVec2[i];
+ sum_diff_sqr += pow(diff, 2);
+ }
+ MDOUBLE en3n = (seqLength * (pow(static_cast<double>(seqLength), 2.0) -1)); //n^3 -n
+ MDOUBLE numerator = 1.0 - ((6/en3n) * (sum_diff_sqr + (s_one + s_two)/12.0));
+ MDOUBLE denum = sqrt((1.0 - s_one/en3n) * (1.0 - s_two/en3n));
+ res = numerator/ denum;
+ return res;
+}
+
+ostream &operator<<(ostream &out, const Vdouble &v){
+ for (int j=0;j<v.size();++j)
+ out<< v[j]<<" ";
+ out <<endl;
+ return(out);
+}
+
+ostream &operator<<(ostream &out, const VVdouble &m){
+ for (int i=0;i<m.size();++i)
+ out<<m[i];
+ out <<endl;
+ return(out);
+}
+
+void mult(Vdouble& vec, const MDOUBLE factor){
+ for(int i=0;i<vec.size();++i)
+ vec[i]*=factor;
+}
+
+void mult(VVdouble& vec, const MDOUBLE factor){
+ for(int i=0;i<vec.size();++i)
+ mult(vec[i],factor);
+}
+
+
+
+////orderVec - determine the relative order of vecIn
+////returns orderVecOut[i] is the rank of vecIn[i]
+////note that in case of ties the rank will be the midrank of the tied group
+//Vdouble orderVec(const Vdouble& vecIn)
+//{
+// int vecSize = vecIn.size();
+// Vdouble orderVecOut(vecSize);
+// vector< vecElem<MDOUBLE> > sortVec(vecSize);
+// for (int x =0; x < vecSize ; ++x)
+// {
+// sortVec[x].setValue(vecIn[x]);
+// sortVec[x].setPlace(x);
+// }
+// sort(sortVec.begin(), sortVec.end());
+//
+// //check for ties and correct their rank
+// Vdouble rankVec(vecSize);
+// MDOUBLE rank;
+// for (int i=0; i < vecSize; )
+// {
+// if (sortVec[i].getValue() != sortVec[i+1].getValue())
+// {//no tie
+// rankVec[i] = i;
+// ++i;
+// }
+// else
+// {//tie
+// int to =0;
+// for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
+// ;//check how far the tie goes
+// to--;
+// rank = 0.5*(to + i);
+// for (int ji = i; ji<= to; ji++)
+// {
+// rankVec[ji] = rank;
+// }
+//
+// i = to+1;
+// }
+// }
+// for (int j =0; j < vecSize; ++j) {
+// assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
+// orderVecOut[sortVec[j].getPlace()] = rankVec[j];
+// }
+// return orderVecOut;
+//}
+
+//orderVec - determine the relative order of vecIn
+//orderVecOut[i] is the rank of vecIn[i]
+//note that in case of ties the rank will be the midrank of the tied group
+//return sum of n^3 - n where n is the number of elements in each tied group - see spearman rank correlation
+MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut)
+{
+ int vecSize = vecIn.size();
+ orderVecOut.resize(vecSize);
+ vector< vecElem<MDOUBLE> > sortVec(vecSize);
+ for (int x =0; x < vecSize ; ++x)
+ {
+ sortVec[x].setValue(vecIn[x]);
+ sortVec[x].setPlace(x);
+ }
+ sort(sortVec.begin(), sortVec.end());
+ //check for ties and correct their rank
+ Vdouble rankVec(vecSize);
+ MDOUBLE sumRankDif = 0; //sum(Fk^3 - Fk)
+
+ MDOUBLE rank;
+ for (int i=0; i < vecSize; )
+ {
+ if (sortVec[i].getValue() != sortVec[i+1].getValue())
+ {//no tie
+ rankVec[i] = i;
+ ++i;
+ }
+ else
+ {//tie
+ int to =0;
+ for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
+ ;//check how far the tie goes
+ to--;
+ rank = 0.5*(to + i);
+ for (int ji = i; ji<= to; ji++)
+ {
+ rankVec[ji] = rank;
+ }
+
+ int numTies = to - i +1; //number o fties in this group
+ sumRankDif += numTies*numTies*numTies - numTies;
+ i = to+1;
+ }
+ }
+
+ for (int j =0; j < vecSize; ++j) {
+ assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
+ orderVecOut[sortVec[j].getPlace()] = rankVec[j];
+ }
+ return sumRankDif;
+}
+
+void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut)
+{
+ int vecSize = vecIn.size();
+ orderVecOut.resize(vecSize);
+ for (int x =0; x < vecSize ; ++x)
+ {
+ orderVecOut[x].setValue(vecIn[x]);
+ orderVecOut[x].setPlace(x);
+ }
+ sort(orderVecOut.begin(), orderVecOut.end());
+}
+
+
+void splitString2(string str, string seperater, string &first, string &second)
+{
+ int i = (int)str.find(seperater); //find seperator
+ if(i != -1)
+ {
+ int y = 0;
+ if(!str.empty())
+ {
+ while(y != i)
+ {
+ first += str[y++]; //creating first string
+ }
+ y = y+(int)seperater.length(); //jumping forward seperater length
+ while(y != str.length())
+ {
+ second += str[y++]; //creating second string
+ }
+
+ }
+ }
+ else
+ {
+ first = str;
+ second = "NULL"; //if seperator is not there then second string == null
+ }
+}
+
+
+void splitString(const string& str,vector<string>& subStrs,const string& delimiter)
+{
+ // Skip delimiter at beginning.
+ string::size_type lastPos = str.find_first_not_of(delimiter,0);
+ // Find first "non-delimiter".
+ string::size_type pos = str.find_first_of(delimiter,lastPos);
+
+ while (string::npos != pos || string::npos != lastPos)
+ {
+ // Found a subStr, add it to the vector.
+ subStrs.push_back(str.substr(lastPos,pos - lastPos));
+ // Skip delimiter. Note the "not_of"
+ lastPos = str.find_first_not_of(delimiter,pos);
+ // Find next "non-delimiter"
+ pos = str.find_first_of(delimiter,lastPos);
+ }
+}
+
+Vint getVintFromStr(const string& inStr)
+{
+ Vint res;
+ vector<string> outStr;
+ splitString(inStr, outStr, ",");
+ for (int i = 0; i < outStr.size(); ++i)
+ {
+ int x = atoi(outStr[i].c_str());
+ res.push_back(x);
+ }
+ return res;
+}
+
+string getStrFromVint(const Vint& inVec)
+{
+ string res("");
+ for (int i = 0; i < inVec.size(); ++i)
+ {
+ if (i > 0)
+ res += ",";
+ res += int2string(inVec[i]);
+ }
+ return res;
+}
+
+
+/********************************************************************************************
+*********************************************************************************************/
+int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories){
+ int gainIndex;
+ if(lossCategories<=gainCategories){
+ gainIndex = (int)floor((double)i/(lossCategories) );
+ }
+ else{
+ gainIndex = i%(gainCategories);
+ }
+ return gainIndex;
+}
+
+int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories){
+ int lossIndex;
+ if(lossCategories<=gainCategories){
+ lossIndex = i%(lossCategories);
+ }
+ else{
+ lossIndex = (int)floor((double)i/(gainCategories) );
+ }
+ return lossIndex;
+}
+
+int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities)
+{
+ for (int loop = 0 ; loop < 100000 ; ++loop)
+ {
+ MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
+ MDOUBLE sum = 0.0;
+ for (int state = 0; state < alphabetSize; ++state)
+ {
+ sum += changeProbabilities[beginningState][state];
+ if (theRandNum < sum) {
+ return state;
+ }
+ }
+ }
+ errorMsg::reportError("giveRandomState: could not give random character. The reason is unknown.");
+ return 1;
+
+}
+
+int giveRandomState(const int alphabetSize, const Vdouble &frequencies) {
+ for (int loop =0 ;loop<100000 ;loop++) {
+ MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
+ MDOUBLE sum = 0.0;
+ for (int j=0; j < alphabetSize;++j) {
+ sum+=frequencies[j];
+ if (theRandNum<sum) return j;
+ }
+ }
+ errorMsg::reportError("giveRandomState: Could not give random character. The reason is probably that the frequencies do not sum to one.");
+ return 1;
+}
+
diff --git a/libs/phylogeny/someUtil.h b/libs/phylogeny/someUtil.h
new file mode 100644
index 0000000..59e7f5d
--- /dev/null
+++ b/libs/phylogeny/someUtil.h
@@ -0,0 +1,161 @@
+// $Id: someUtil.h 6055 2009-04-03 21:19:38Z rubi $
+
+#ifndef ___SOME_UTIL_H
+#define ___SOME_UTIL_H
+
+#include "logFile.h"
+#include "definitions.h"
+#include "alphabet.h"
+#include <string>
+#include <iostream>
+using namespace std;
+
+//to be used for orderVec
+template <class T>
+class vecElem
+{
+public:
+ vecElem();
+ virtual ~vecElem() {};
+ void setValue(const T val) {m_value = val;}
+ T getValue() {return m_value;}
+ void setPlace(const int place) {m_place = place;}
+ int getPlace() {return m_place;}
+ inline bool operator< (const vecElem& elemIn) const;
+private:
+ int m_place;
+ T m_value;
+};
+
+
+template <class T>
+vecElem< T >::vecElem()
+{
+ m_value = -1;
+ m_place = -1;
+}
+
+//template <class T>
+//vecElement< T >::~vecElement()
+//{
+//}
+template <class T>
+bool vecElem< T >::operator<(const vecElem& elemIn) const
+{
+ if (m_value == elemIn.m_value)
+ return (m_place < elemIn.m_place);
+ else
+ return (m_value < elemIn.m_value);
+}
+
+
+
+// STATISTICAL UTILITIES:
+
+MDOUBLE computeAverage(const vector<int>& vec);
+MDOUBLE computeAverage(const vector<MDOUBLE>& vec);
+MDOUBLE computeStd(const vector<MDOUBLE>& vec);// page 60, Sokal and Rohlf
+MDOUBLE computeStd(const vector<int>& vec);// page 60, Sokal and Rohlf
+MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda);
+// re-computes a vector of frequencies after one value is changed:
+// all other values are set according to their relative value
+void computeRelativeFreqsFollowingOneChanged(MDOUBLE newValFreq, int indexNewFreq,Vdouble &freqs);//freqs is the old vector into which we write the new values
+
+// SIMULATIONS:
+int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities);
+int giveRandomState(const int alphabetSize, const Vdouble &frequencies);
+
+// TIME UTILITIES
+void printTime(ostream& out);
+
+// TEXT UTILITIES
+string int2string(const int i);
+string double2string(const double x, int const howManyDigitsAfterTheDot=5);
+MDOUBLE string2double(const string& inString);
+bool allowCharSet(const string& allowableChars, const string& string2check);
+bool isCharInString(const string& stringToCheck, const char charToCheck);
+void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile);
+
+bool fromStringIterToInt(string::const_iterator & it,
+ const string::const_iterator endOfString,
+ int& res);
+
+string takeCharOutOfString(const string& charsToTakeOut, const string& fromString);
+void toLower(string& str);
+void toUpper(string& str);
+//splits the string to substr according to the given delimiter (parallel to split in perl)
+void splitString(const string& str,vector<string>& subStrs,const string& delimiter);
+
+//input: a list of INTs seperated by commas ("1,3,5") returns the int in the vector
+Vint getVintFromStr(const string& str);
+//return a list of INTs seperated by commas ("1,3,5")
+string getStrFromVint(const Vint& inVec);
+
+// FILE UTILITIES
+bool checkThatFileExist(const string& fileName);
+string* searchStringInFile(const string& string2find,
+ const int index,
+ const string& inFileName);
+string* searchStringInFile(const string& string2find,
+ const string& inFileName);
+bool doesWordExistInFile(const string& string2find,const string& inFileName);
+void createDir(const string& curDir,const string& dirName);
+
+
+//BIT UTILITIES
+//void nextBit(bitset<64> &cur);
+
+//ARITHMETIC UTILITIES
+//DEQUAL: == UP TO EPSILON
+//DBIG_EQUAL: >= UP TO EPSILON
+//DSMALL_EQUAL: <= UP TO EPSILON
+bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // epsilon taken from WINDOW'S FILE FLOAT.H
+bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F);
+bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, const MDOUBLE epsilon = 1.192092896e-07F); // {return ((x1 < x2) || DEQUAL(x1, x2));}
+
+//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
+//used in functoin mnbrack below.
+void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d);
+
+
+// print vector and VVdoulbe util
+ostream &operator<<(ostream &out, const Vdouble &v);
+ostream &operator<<(ostream &out, const VVdouble &m);
+void mult(Vdouble& vec, const MDOUBLE factor);
+void mult(VVdouble& vec, const MDOUBLE factor);
+//scale vecToScale so that its new average is AvgIn. return the scaling factor.
+MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn);
+//determine the relative order of vecIn. The order vector is returned
+//ex: vecIn = [0.1 0.4 0.01 0.9 1.8] orderVecOut = [1 2 0 3 4]
+MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut);
+//in this version orderVecOut does not preserv the same order as vecIn.
+//orderVecOut[0] cotains the lowest score and it is stored in orderVecOut[0].getValue()
+//The place in the original vector is stored in orderVecOut[0].getPlace()
+void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut);
+//calculates the spearman rank correlation value
+MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
+MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
+MDOUBLE calcMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues);
+//MAD = mean absolute deviations distance
+MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec);
+MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold = 0.0);
+
+
+/* Will split a string into 2 by the given seperator
+Example for usage:
+ string a, b, c;
+ a.assign("Hello world!");
+ splitString2(a, " ", b, c);
+ cout << "b = " << b << endl << "c = " << c << endl;
+ //b == Hello
+ //c == world!
+*/
+void splitString2(string str, string seperater, string &first, string &second);
+
+int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories);
+int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories);
+
+
+
+#endif
+
diff --git a/libs/phylogeny/split.cpp b/libs/phylogeny/split.cpp
new file mode 100644
index 0000000..c492f7b
--- /dev/null
+++ b/libs/phylogeny/split.cpp
@@ -0,0 +1,84 @@
+// $Id: split.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "split.h"
+#include <cassert>
+#include <algorithm>
+using namespace std;
+
+// there are always two options. Either the active set is _set[0] or _set[1].
+// this depends on the parameter _reverse.
+// The "1" will always be in the active set.
+// so, for example consider the leaves [0,1,2] (_max = 3).
+// The split {}{0,1,2} can be represented by both the empty split {} or the
+// {0,1,2} split. Because the {0,1,2} split contains the "0" - this will be the active split.
+// so we set _set[0] to be empty, and in _set[1] which is the active one (_reverse = true)
+// we insert the leaves.
+split::split (const int max): _max(max), _reverse(true){
+ for(int j=0;j<max;++j) {
+ _set[1].insert(j);
+ }
+}
+
+// isMember searches for the key in the active set.
+bool split::isMember(const int key) const {
+ return(_set[_reverse].find(key)!=_set[_reverse].end());
+}
+
+
+void split::reverseMembership(const int key){
+ assert(key<_max && key >= 0);
+
+ // where is the key now
+ // if the key is member, than in = _reverese;
+ // Otherwise in = !_reverse
+ bool in =(isMember(key))?_reverse:!_reverse;
+
+ _set[in].erase(key);
+ _set[!in].insert(key);
+ if (key==0) // if we add "0", we need to reverse the split
+ reverse();
+};
+
+
+int split::size() const {
+ int tmp = _set[_reverse].size();
+ return (tmp<_max-tmp?tmp:_max-tmp);
+}
+
+void split::print(ostream& sout) const{ // = cout
+ sout <<"size ="<<size()<<" ";
+ set<int>::const_iterator i;
+ for (i=_set[_reverse].begin();i != _set[_reverse].end();++i)
+ sout << *i << " ";
+ sout <<" | ";
+ for (i=_set[!_reverse].begin();i != _set[!_reverse].end();++i)
+ sout << *i << " ";
+ sout << endl;
+}
+
+bool split::lessThen(const split& other) const{
+ return(_set[_reverse]<other._set[other._reverse]);
+}
+
+bool split::compatible(const split& other) const {
+ set<int>::const_iterator i (_set[_reverse].begin());
+ set<int>::const_iterator i_end (_set[_reverse].end());
+ set<int>::const_iterator j (other._set[other._reverse].begin());
+ set<int>::const_iterator j_end (other._set[other._reverse].end());
+ return (includes(i,i_end,j,j_end) || includes(j,j_end,i,i_end));
+}
+
+void split::reverse(){ // actualy reverse membership in the set
+ _reverse=!_reverse;
+ }
+
+bool operator<(const split& a, const split& b) {
+ return(a.lessThen(b));
+}
+
+ostream& operator<< (ostream &sout, const split& split) {
+ split.print(sout);
+ return sout;
+}
+
+
diff --git a/libs/phylogeny/split.h b/libs/phylogeny/split.h
new file mode 100644
index 0000000..bbd3342
--- /dev/null
+++ b/libs/phylogeny/split.h
@@ -0,0 +1,75 @@
+// $Id: split.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___SPLIT
+#define ___SPLIT
+
+#include "definitions.h"
+#include <set>
+#include <vector>
+#include <iostream>
+#include <cassert>
+using namespace std;
+
+
+// this split always has the member "1" in it.
+// if not, it will take the reverse of the split, so that it dose have the "1" member.
+
+class split {
+public:
+ explicit split (const int max=0); // empty split
+
+// get an itarator of members and the max member.
+
+template<class Iterator>
+split (Iterator& i,
+ Iterator& end,
+ int max):_max(max), _reverse(true){
+ for(int j=0;j<max;++j)
+ _set[1].insert(j);
+
+ for (;i!=end;++i){
+ assert((*i)<_max && (*i) >= 0);
+ _set[0].insert(*i);
+ _set[1].erase(*i);
+ if (*i==0) // if we add "0", we may need to reverse the split
+ reverse();
+ }
+}
+
+ bool isMember(const int key) const;
+ int size() const ;
+ void print(ostream& sout = cout) const;
+ bool lessThen(const split& other) const;
+ bool compatible(const split& other) const ;
+
+ // remove the key from the active set to the non-active set or vice versa.
+ // for example if the split is {0,1 | 2}
+ // reverseMembership(1) will change the split to this one: {0 | 1,2 }
+ void reverseMembership(const int key);
+
+ void getId(vector<int> & id) const {
+ id.clear();
+ bool small(_set[0].size()>_set[1].size());
+ for (set<int>::const_iterator i=_set[small].begin();i!=_set[small].end();++i)
+ id.push_back(*i);
+ }
+
+private:
+ void reverse();
+
+
+ int _max; // max element. all elements are asumed to be in the range [1..max]
+ set<int> _set[2];
+ bool _reverse;
+};
+
+bool operator<(const split& a,
+ const split& b) ;
+
+
+
+ostream& operator<< (ostream &sout, const split& split) ;
+
+
+
+#endif // ___SPLIT
diff --git a/libs/phylogeny/splitMap.cpp b/libs/phylogeny/splitMap.cpp
new file mode 100644
index 0000000..dd96966
--- /dev/null
+++ b/libs/phylogeny/splitMap.cpp
@@ -0,0 +1,50 @@
+// $Id: splitMap.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "splitMap.h"
+#include <algorithm>
+using namespace std;
+
+int splitMap::add(const split & in) { // add a split and return it's new count.
+ return(_map[in]=_map[in]+1);
+}
+
+class valCmp {
+public:
+ bool operator()(const pair<split,int> & elem1, const pair<split,int> & elem2) {
+ return (elem1.second > elem2.second);
+ }
+};
+
+vector<pair<split,int> > splitMap::sortSplits() const{
+ vector<pair<split,int> > svec(_map.size());
+ partial_sort_copy(_map.begin(),_map.end(),svec.begin(),svec.end(),valCmp());
+ return svec;
+}
+
+int splitMap::counts(const split& in) const {
+ mapSplitInt::const_iterator i(_map.find(in));
+ if (i==_map.end()) return 0;
+ return i->second;
+}
+
+void splitMap::print(ostream& sout) const {// default cout.
+ for (mapSplitInt::const_iterator i = _map.begin(); i != _map.end();++i) {
+ sout << i->second<<"\t"<<i->first;
+ }
+ sout <<endl;
+}
+
+
+ostream& operator<< (ostream &sout, const splitMap& split_map) {
+ split_map.print(sout);
+ return sout;
+}
+
+/*splitMap::reverse_mapSplitInt splitMap::reverse() const
+{
+ reverse_sMap_t rmap;
+ for (sMap_t::const_iterator i=_map.begin(); i!=_map.end();++i)
+ rmap.insert(rMapPair_t(i->second,i->first));
+ return rmap;
+}
+*/
diff --git a/libs/phylogeny/splitMap.h b/libs/phylogeny/splitMap.h
new file mode 100644
index 0000000..6c98476
--- /dev/null
+++ b/libs/phylogeny/splitMap.h
@@ -0,0 +1,37 @@
+// $Id: splitMap.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___SPLITMAP
+#define ___SPLITMAP
+
+#include "definitions.h"
+#include "split.h"
+#include <map>
+using namespace std;
+
+// splitMap is a map of split to integers used for counting the occurences of each split.
+// Questions we want the class to be able to answer:
+// 1. What is the occurence a specific split.
+// 2. what is the most common split
+// 3. Sort the splits according to their frequency.
+
+class splitMap {
+// public:
+// typedef pair<int,const split> rMapPair_t;
+// typedef multimap<const int,const split> reverse_sMap_t;
+// typedef multimap<int,split> reverse_sMap_t;
+// reverse_sMap_t reverse() const ;
+public:
+ explicit splitMap(){}; // empty constractor
+ int add(const split & in); // return the new frequency.
+ int counts(const split& in) const; // counts the number of occurances
+ void print(ostream& sout = cout) const;
+ vector<pair<split,int> > sortSplits() const;
+private:
+
+ typedef map<split,int> mapSplitInt;
+ mapSplitInt _map;
+};
+
+ostream& operator<< (ostream &sout, const splitMap& split_map);
+#endif
+
diff --git a/libs/phylogeny/splitTreeUtil.cpp b/libs/phylogeny/splitTreeUtil.cpp
new file mode 100644
index 0000000..7b44887
--- /dev/null
+++ b/libs/phylogeny/splitTreeUtil.cpp
@@ -0,0 +1,109 @@
+// $Id: splitTreeUtil.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "splitTreeUtil.h"
+#include "someUtil.h"
+
+static int idFromName(const string name, const map<string, int> & nameIdMap)
+{
+ map<string, int>::const_iterator i=nameIdMap.find(name);
+ if (i==nameIdMap.end()) errorMsg::reportError(" error in splitTreeUtil. Name not found in nameIdMap");
+ return (i->second);
+}
+
+// returns true if all the sons of myNode are in the split.
+// return false if all the sons of myNode are NOT in the split
+// if some of the sons are in and some are not - set foundTheNodeAlready to true.
+// and set splitNode to be that node.
+static bool findNodeToSplitRecursive( const tree::nodeP myNode,
+ const split& mySplit,
+ tree::nodeP& splitNode,
+ bool & foundTheNodeAlready,
+ const map<string, int> & nameIdMap) {
+if (myNode->isLeaf()) return (mySplit.isMember(idFromName(myNode->name(),nameIdMap)));
+bool inSplit = findNodeToSplitRecursive(myNode->getSon(0),mySplit,splitNode,foundTheNodeAlready,nameIdMap);
+ if (foundTheNodeAlready) return true;
+ for (int i=1; i < myNode->getNumberOfSons(); ++i) {
+ bool tmp = findNodeToSplitRecursive(myNode->getSon(i),mySplit,splitNode,foundTheNodeAlready,nameIdMap);
+ if (foundTheNodeAlready) return true;
+ if (tmp != inSplit) {
+ foundTheNodeAlready = true;
+ splitNode = myNode;
+ return true;
+ }
+ }
+ return inSplit;
+}
+
+
+
+tree::nodeP findNodeToSplit(const tree& et,
+ const split& mySplit,
+ const map<string, int> & nameIdMap) {
+ tree::nodeP res;
+ bool foundTheNodeAlready = false;
+ findNodeToSplitRecursive(et.getRoot(),mySplit,res,foundTheNodeAlready,nameIdMap);
+ return res;
+}
+
+void applySplit(tree& et,
+ const split& mySplit,
+ const map<string, int> & nameIdMap) {
+ tree::nodeP node2split = findNodeToSplit(et,mySplit,nameIdMap);
+ et.rootAt(node2split);
+ applySplitToRoot(et,mySplit,nameIdMap);
+}
+
+void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector<tree::nodeP> & son2split)
+{
+ for (int k=0; k < son2split.size(); ++k) {
+ if (son2split[k]->father() != fatherNode )
+ errorMsg::reportError(" error in function bootstrap::splitSonsFromNode - nodes don't have the same father");
+ }
+ // if the split allready exists, we do not need to do anything.
+ if (son2split.size()==fatherNode->getNumberOfSons() // the branch above us is the required split
+ || son2split.size() <=1 // the branch below us is it
+ || (fatherNode->father()==NULL && son2split.size()==fatherNode->getNumberOfSons()-1)
+ // the branch above us is the required split
+ )
+ return;
+
+ tree::nodeP theNewNode = et.createNode(fatherNode,et.getNodesNum());
+ theNewNode->setName("N"+int2string(theNewNode->id()));
+ for (int i=0; i < son2split.size(); ++i) {
+ son2split[i]->setFather(theNewNode);
+ theNewNode->setSon(son2split[i]);
+ // remove from son list of father node.
+ fatherNode->removeSon(son2split[i]);
+ }
+}
+
+void applySplitToRoot(tree& et,
+ const split& mySplit,
+ const map<string, int> & nameIdMap) {
+ vector<tree::nodeP> sonsThatHaveToBeSplit = findSonsThatHaveToBeSplit(et,mySplit,nameIdMap);
+ splitSonsFromNode(et, et.getRoot(), sonsThatHaveToBeSplit);
+}
+
+vector<tree::nodeP> findSonsThatHaveToBeSplit(const tree& et,
+ const split& mySplit,
+ const map<string, int> & nameIdMap){
+// we assume that split is compatible with the tree and that the split is a subset of the children of the root.
+// i.e., the node that has to be splitted is the root.
+ vector<tree::nodeP> res;
+ for (int i=0; i < et.getRoot()->getNumberOfSons(); ++i) {
+ if (childIsInTheSplit(et.getRoot()->getSon(i),mySplit,nameIdMap)) {
+ res.push_back(et.getRoot()->getSon(i));
+ }
+ }
+ return res;
+}
+
+bool childIsInTheSplit(const tree::nodeP & myNode,
+ const split& mySplit,
+ const map<string, int> & nameIdMap) {
+ if (myNode->isInternal()) return childIsInTheSplit(myNode->getSon(0),mySplit,nameIdMap);
+ else {// we are in a leaf
+ return (mySplit.isMember(idFromName(myNode->name(),nameIdMap)));
+ }
+}
+
diff --git a/libs/phylogeny/splitTreeUtil.h b/libs/phylogeny/splitTreeUtil.h
new file mode 100644
index 0000000..83c79d2
--- /dev/null
+++ b/libs/phylogeny/splitTreeUtil.h
@@ -0,0 +1,25 @@
+// $Id: splitTreeUtil.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___SPLIT_TREE_UTIL
+#define ___SPLIT_TREE_UTIL
+#include "tree.h"
+#include "split.h"
+
+#include <vector>
+#include <map>
+using namespace std;
+
+
+tree::nodeP findNodeToSplit(const tree& et,const split& mySplit,const map<string, int> & nameIdMap);
+void applySplit(tree& et, const split& mySplit,const map<string, int> & nameIdMap);
+void splitSonsFromNode(tree & et, tree::nodeP fatherNode, vector<tree::nodeP> & son2split);
+void applySplitToRoot(tree& et, const split& mySplit,const map<string, int> & nameIdMap);
+vector<tree::nodeP> findSonsThatHaveToBeSplit(const tree& et,const split& mySplit,const map<string, int> & nameIdMap);
+bool childIsInTheSplit(const tree::nodeP & myNode, const split& mySplit,const map<string, int> & nameIdMap);
+
+
+
+#endif
+
+
+
diff --git a/libs/phylogeny/ssrvDistanceSeqs2Tree.cpp b/libs/phylogeny/ssrvDistanceSeqs2Tree.cpp
new file mode 100644
index 0000000..03b7cc9
--- /dev/null
+++ b/libs/phylogeny/ssrvDistanceSeqs2Tree.cpp
@@ -0,0 +1,149 @@
+// $Id: ssrvDistanceSeqs2Tree.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "ssrvDistanceSeqs2Tree.h"
+//#include "bestAlphaAndNu.h"
+#include "bestParamUSSRV.h"
+#include "someUtil.h"
+#include <float.h>
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _alpha = initAlpha;
+ _newNu = _nu = initNu;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, true);
+}
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternal(sc, false);
+}
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights, const tree* constraintTreePtr) {
+ _constraintTreePtr=constraintTreePtr;
+ _weights = weights;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, initTree);
+}
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights, const tree* constraintTreePtr) {
+ _alpha = initAlpha;
+ _weights = weights;
+
+ _constraintTreePtr=constraintTreePtr;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, false, initTree, initAlpha);
+}
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights, const tree* constraintTreePtr) {
+ _alpha = initAlpha;
+ _newNu = _nu = initNu;
+ _weights = weights;
+
+ _constraintTreePtr=constraintTreePtr;
+ return seqs2TreeIterativeInternalInitTreeGiven(sc, true, initTree, initAlpha);
+}
+
+// NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _alpha = alpha;
+ _newNu = _nu = nu;
+ _constraintTreePtr=constraintTreePtr;
+ seqs2TreeOneIterationInternal(sc, true);
+ return _newTree;
+}
+
+tree ssrvDistanceSeqs2Tree::seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights, const tree* constraintTreePtr) {
+ _weights = weights;
+ _alpha = alpha;
+ _newNu = _nu = nu;
+ return static_cast<iterativeDistanceSeqs2Tree *>(this)->seqs2TreeBootstrap(sc, weights, constraintTreePtr);
+}
+
+// NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+tree ssrvDistanceSeqs2Tree::seqs2Tree(const sequenceContainer &sc, const Vdouble *weights, const tree* constraintTreePtr) {
+ return seqs2TreeIterative(sc,weights,constraintTreePtr);
+}
+
+MDOUBLE ssrvDistanceSeqs2Tree::optimizeSideInfo(const sequenceContainer &sc, tree &et)
+{
+ if (!dynamic_cast<tamura92*>(
+ static_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel())
+ ->getBaseRM()
+ )
+ ) {
+ bestParamSSRV optimizer(true,true,false,true); // optimize alpha, nu, NOT tamura92 params, and bbl
+ optimizer(et,sc,*static_cast<stochasticProcessSSRV*>(_spPtr),_weights,
+ 15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
+ _newAlpha=optimizer.getBestAlpha();
+ _newNu=optimizer.getBestNu();
+ return(optimizer.getBestL());
+ } else {
+ bestParamSSRV optimizer(true,true,true,true); // optimize alpha, nu, tamura92 params, and bbl
+ optimizer(et,sc,*static_cast<stochasticProcessSSRV*>(_spPtr),_weights,
+ 15,15,0.5,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
+ _newAlpha=optimizer.getBestAlpha();
+ _newNu=optimizer.getBestNu();
+ return(optimizer.getBestL());
+ }
+}
+
+MDOUBLE ssrvDistanceSeqs2Tree::calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha)
+{
+ _newAlpha = alpha;
+ (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(alpha);
+
+ // optimize only nu (and tamura92 params, if relevant)
+ if (!dynamic_cast<tamura92*>(
+ static_cast<replacementModelSSRV*>(_spPtr->getPijAccelerator()->getReplacementModel())
+ ->getBaseRM()
+ )
+ ) {
+ bestParamSSRV optimizer(false,true,false,false);
+ optimizer(et,sc,*(static_cast<stochasticProcessSSRV*>(_spPtr)),_weights,
+ 15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
+ _newNu=optimizer.getBestNu();
+ return(optimizer.getBestL());
+ } else {
+ bestParamSSRV optimizer(false,true,true,false);
+ optimizer(et,sc,*(static_cast<stochasticProcessSSRV*>(_spPtr)),_weights,
+ 15,15,_epsilonLikelihoodImprovement4alphaOptimiz,_epsilonLikelihoodImprovement,
+ _epsilonLikelihoodImprovement4BBL,_maxIterationsBBL,5);
+ _newNu=optimizer.getBestNu();
+ return(optimizer.getBestL());
+ }
+}
+
+void ssrvDistanceSeqs2Tree::acceptSideInfo()
+{
+ _alpha = _newAlpha;
+ _nu = _newNu;
+}
+
+void ssrvDistanceSeqs2Tree::utilizeSideInfo()
+{
+ // set new alpha value in the sp that is used in _distM
+ LOG(10,<<"# utilizing alpha "<<_alpha<<" and nu "<<_nu<<endl);
+ (static_cast<gammaDistribution*>(_spPtr->distr()))->setAlpha(_alpha);
+ (static_cast<stochasticProcessSSRV*>(_spPtr))->setRateOfRate(_nu);
+}
+
+void ssrvDistanceSeqs2Tree::printSideInfo(ostream& out) const
+{
+ out<<"Alpha: "<< _alpha <<" Nu: "<< _nu <<endl;
+}
+
+// non virtual
+void ssrvDistanceSeqs2Tree::setSideInfo(const MDOUBLE alpha, MDOUBLE nu)
+{
+ _alpha = alpha;
+ _nu = nu;
+}
+
+ssrvDistanceSeqs2Tree::alphaAndNu ssrvDistanceSeqs2Tree::getSideInfo() const
+{
+ return alphaAndNu(_alpha, _nu);
+}
diff --git a/libs/phylogeny/ssrvDistanceSeqs2Tree.h b/libs/phylogeny/ssrvDistanceSeqs2Tree.h
new file mode 100644
index 0000000..10bcea8
--- /dev/null
+++ b/libs/phylogeny/ssrvDistanceSeqs2Tree.h
@@ -0,0 +1,63 @@
+// $Id: ssrvDistanceSeqs2Tree.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___SSRV_DISTANCE_SEQS2TREE
+#define ___SSRV_DISTANCE_SEQS2TREE
+
+#include "distanceBasedSeqs2Tree.h"
+#include "tree.h"
+
+/* class ssrvDistanceSeqs2Tree
+A type of distance-based tree reconstruction method like the iterative
+method commonAlphaDistanceSeqs2Tree, but using a model with SSRV
+(Site-Specific Rate Variation, AKA covarion model). Compared to
+commonAlphaDistanceSeqs2Tree, we change the distance method to use an
+SSRV model, and in the optimizations we estimate ni in addition to
+alpha.
+*/
+class ssrvDistanceSeqs2Tree : public iterativeDistanceSeqs2Tree {
+public:
+ // Given likeDist is assumed to hold a gamma-distribution, SSRV stochasticProcess
+ ssrvDistanceSeqs2Tree(likeDist &distM, distances2Tree &dist2et, const Vdouble *weights = NULL,
+ const MDOUBLE epsilonLikelihoodImprovement = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4paramOptimiz = 0.001,
+ const MDOUBLE epsilonLikelihoodImprovement4BBL = 0.001,
+ const int maxIterationsBBL = 50)
+ : iterativeDistanceSeqs2Tree(distM, dist2et, weights, epsilonLikelihoodImprovement, epsilonLikelihoodImprovement4paramOptimiz, epsilonLikelihoodImprovement4BBL, maxIterationsBBL) {}
+ virtual ~ssrvDistanceSeqs2Tree () {}
+
+ // Datastruct for handling side info for the SSRV model (used as return value)
+ struct alphaAndNu {
+ MDOUBLE alpha;
+ MDOUBLE nu;
+ alphaAndNu(){}
+ alphaAndNu(MDOUBLE setAlpha, MDOUBLE setNu) : alpha(setAlpha), nu(setNu) {}
+ };
+
+ // NOTE! This version calls ITERATIVE seqs2Tree because side info is not given by the user, so we have to generate and optimize it
+ virtual tree seqs2Tree(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // NOTE! This version is a NON-ITERATIVE version that uses the side info supplied by the user
+ tree seqs2Tree(const sequenceContainer &sc, MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ // Does one bootstrap iteration
+ tree seqs2TreeBootstrap(const sequenceContainer &sc, const MDOUBLE alpha, MDOUBLE nu, const Vdouble *weights, const tree* constraintTreePtr=NULL);
+ // Explicitly ask for iterations
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL); // homogenous rates will be used for first iteration
+ tree seqs2TreeIterative(const sequenceContainer &sc, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ virtual tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+ tree seqs2TreeIterative(const sequenceContainer &sc, const tree &initTree, MDOUBLE initAlpha, MDOUBLE initNu, const Vdouble *weights=NULL, const tree* constraintTreePtr=NULL);
+
+ // handling side info
+ virtual MDOUBLE optimizeSideInfo(const sequenceContainer &sc, tree &et);
+ virtual MDOUBLE calcSideInfoGivenTreeAndAlpha(const sequenceContainer &sc, const tree &et, MDOUBLE alpha);
+ virtual void acceptSideInfo();
+ virtual void utilizeSideInfo();
+ virtual void printSideInfo(ostream& out) const;
+ void setSideInfo(const MDOUBLE alpha, MDOUBLE nu);
+ alphaAndNu getSideInfo() const;
+
+protected:
+ MDOUBLE _nu;
+ MDOUBLE _newNu;
+};
+
+#endif
diff --git a/libs/phylogeny/stochasticProcess.cpp b/libs/phylogeny/stochasticProcess.cpp
new file mode 100644
index 0000000..68248ab
--- /dev/null
+++ b/libs/phylogeny/stochasticProcess.cpp
@@ -0,0 +1,57 @@
+// $Id: stochasticProcess.cpp 4660 2008-08-12 14:31:38Z cohenofi $
+
+#include "stochasticProcess.h"
+#include "errorMsg.h"
+
+stochasticProcess& stochasticProcess::operator=(const stochasticProcess &otherStoc) {
+ if (this != &otherStoc) { // Check for self-assignment
+ if (_pijAccelerator) delete _pijAccelerator;
+ if (otherStoc._pijAccelerator)
+ {
+ pijAccelerator* p2 = otherStoc._pijAccelerator->clone(); // Create the new one FIRST...
+ _pijAccelerator = p2;
+ }
+ else
+ _pijAccelerator = NULL;
+
+ if (_distr) delete _distr;
+ if (otherStoc._distr)
+ {
+ distribution* d2 = otherStoc._distr->clone();
+ _distr = d2;
+ }
+ else{
+ _distr = NULL;
+ _isReversible = otherStoc.isReversible();
+ }
+ }
+// if (_distr) delete _distr;
+// _distr = new distribution(*otherStoc._distr);
+ return *this;
+}
+
+
+stochasticProcess::stochasticProcess(const distribution *in_distr,const pijAccelerator *pijAccelerator, bool isReversible) :
+ _distr(in_distr->clone()), _pijAccelerator(pijAccelerator->clone()), _isReversible(isReversible){
+
+}
+
+stochasticProcess::stochasticProcess(const stochasticProcess& other):
+ _distr(NULL), _pijAccelerator(NULL){
+ if (other._pijAccelerator != NULL) _pijAccelerator = other._pijAccelerator->clone();
+ if (other._distr != NULL) _distr = other._distr->clone();
+ _isReversible = other.isReversible();
+}
+
+stochasticProcess::~stochasticProcess() {
+ delete _distr;
+ delete _pijAccelerator;
+}
+
+
+void stochasticProcess::setDistribution(const distribution* in_distr)
+{
+ if (_distr) delete _distr;
+ if (in_distr == NULL) _distr = NULL;
+ else _distr = in_distr->clone();
+}
diff --git a/libs/phylogeny/stochasticProcess.h b/libs/phylogeny/stochasticProcess.h
new file mode 100644
index 0000000..1362a11
--- /dev/null
+++ b/libs/phylogeny/stochasticProcess.h
@@ -0,0 +1,58 @@
+// $Id: stochasticProcess.h 2511 2007-11-04 12:08:50Z cohenofi $
+
+#ifndef ___STOCHASTIC_PROCESS
+#define ___STOCHASTIC_PROCESS
+
+#include "pijAccelerator.h"
+#include "distribution.h"
+#include <cassert>
+
+class stochasticProcess{
+public:
+ explicit stochasticProcess(const distribution *in_distr,const pijAccelerator *pijAccelerator, bool isReversible = true);
+ explicit stochasticProcess() {
+ _distr=NULL; _pijAccelerator=NULL; _isReversible=true;
+ }
+ stochasticProcess(const stochasticProcess& other);
+ virtual stochasticProcess* clone() const {return new stochasticProcess(*this);}
+
+ const int alphabetSize() const {return _pijAccelerator->alphabetSize();} // The alphabet size is the same as the matrix Pij size
+
+ virtual const int categories() const {return _distr->categories();}
+ virtual const MDOUBLE rates(const int i) const {return _distr->rates(i);}
+ virtual const MDOUBLE ratesProb(const int i) const {return _distr->ratesProb(i);}
+
+
+ virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const {
+ if (t!=0) return _pijAccelerator->Pij_t(i,j,t);
+ return (i==j)? 1 : 0;
+ }
+
+ const MDOUBLE freq(const int i) const {assert(i>=0);return _pijAccelerator->freq(i);} // P(i)
+ const MDOUBLE dPij_dt(const int i,const int j,const MDOUBLE t) const { return _pijAccelerator->dPij_dt(i,j,t);}
+ const MDOUBLE d2Pij_dt2(const int i, const int j, const MDOUBLE t) const { return _pijAccelerator->d2Pij_dt2(i,j,t);}
+
+
+ virtual distribution* distr() const {return _distr;} // @@@@ this const is a lie !!!
+ virtual const pijAccelerator* getPijAccelerator() const {return _pijAccelerator;}
+ virtual void setDistribution(const distribution* in_distr);
+
+ stochasticProcess& operator=(const stochasticProcess &otherStoc);
+ virtual ~stochasticProcess();
+ virtual void setGlobalRate(const MDOUBLE x) {_distr->setGlobalRate(x);}
+ virtual MDOUBLE getGlobalRate() const {return _distr->getGlobalRate();}
+ const bool isReversible() const {return _isReversible;}
+
+
+protected:
+ distribution *_distr;
+ pijAccelerator *_pijAccelerator;
+ bool _isReversible;
+};
+
+
+
+#endif
+
+
+// Stochastic process is composed of two objects: a distribution of rates and a Pij accelerator.
diff --git a/libs/phylogeny/stochasticProcessSSRV.cpp b/libs/phylogeny/stochasticProcessSSRV.cpp
new file mode 100644
index 0000000..268549a
--- /dev/null
+++ b/libs/phylogeny/stochasticProcessSSRV.cpp
@@ -0,0 +1,19 @@
+// $Id: stochasticProcessSSRV.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "stochasticProcessSSRV.h"
+#include "replacementModelSSRV.h"
+
+// it's important to call static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->updateQ(), after changing
+// this returned pointer. (when changing alpha)
+distribution* stochasticProcessSSRV::distr() const
+{
+ return ( static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->getDistribution() );
+}
+
+
+void stochasticProcessSSRV::setDistribution(const distribution* in_distr)
+{
+ static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())->setDistribution(in_distr);
+}
+
+
diff --git a/libs/phylogeny/stochasticProcessSSRV.h b/libs/phylogeny/stochasticProcessSSRV.h
new file mode 100644
index 0000000..3292a72
--- /dev/null
+++ b/libs/phylogeny/stochasticProcessSSRV.h
@@ -0,0 +1,48 @@
+// $Id: stochasticProcessSSRV.h 1923 2007-04-04 16:38:14Z privmane $
+
+
+#ifndef ___STOCHASTIC_PROCESS_SSRV
+#define ___STOCHASTIC_PROCESS_SSRV
+
+#include "stochasticProcess.h"
+#include "replacementModelSSRV.h"
+
+// This is a Stochastic process that its distribution is located inside its accelerator.
+// _dist should be NULL all the time.
+// The number of categories is always 1.
+// _pijAccelerator must contain a replacementModelSSRV* as a member.
+// The distribution is located inside the replacement model which is a member of _pijAccelerator.
+
+class stochasticProcessSSRV : public stochasticProcess{
+public:
+ explicit stochasticProcessSSRV(const pijAccelerator *pijAccelerator) :
+ stochasticProcess() { _pijAccelerator = pijAccelerator->clone();}
+ explicit stochasticProcessSSRV() : stochasticProcess() {}
+ stochasticProcessSSRV(const stochasticProcessSSRV& other) : stochasticProcess(other) {}
+ stochasticProcessSSRV& operator=(const stochasticProcessSSRV &other) {stochasticProcess::operator=(other); return (*this);}
+ virtual stochasticProcess* clone() const {return new stochasticProcessSSRV(*this);}
+
+ virtual ~stochasticProcessSSRV() {}
+
+ virtual const int categories() const { return 1; }
+ virtual const MDOUBLE rates(const int i) const {return 1.0;}
+ virtual const MDOUBLE ratesProb(const int i) const {return 1.0;}
+
+ virtual const MDOUBLE Pij_t(const int i, const int j, const MDOUBLE t) const {
+ // as opposed to normal stochastic-process. even when t=0 and i!=j the result might be > 0
+ return _pijAccelerator->Pij_t(i,j,t);
+ }
+
+ virtual distribution* distr() const; // @@@@ this const is a lie !!!
+ virtual void setDistribution(const distribution* in_distr);
+
+ virtual void setGlobalRate(const MDOUBLE x) {distr()->setGlobalRate(x);} // @@@@ should this also call updateQ of the RM ??? Doesn't really metter when using gamma distribution
+ virtual MDOUBLE getGlobalRate() const {return distr()->getGlobalRate();}
+
+ void setRateOfRate(MDOUBLE rateOfRate) {
+ static_cast<replacementModelSSRV*>(_pijAccelerator->getReplacementModel())
+ ->setRateOfRate(rateOfRate);
+ }
+};
+
+#endif
diff --git a/libs/phylogeny/suffStatComponent.cpp b/libs/phylogeny/suffStatComponent.cpp
new file mode 100644
index 0000000..fa47278
--- /dev/null
+++ b/libs/phylogeny/suffStatComponent.cpp
@@ -0,0 +1,6 @@
+// $Id: suffStatComponent.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "suffStatComponent.h"
+
+
+
diff --git a/libs/phylogeny/suffStatComponent.h b/libs/phylogeny/suffStatComponent.h
new file mode 100644
index 0000000..1a152c5
--- /dev/null
+++ b/libs/phylogeny/suffStatComponent.h
@@ -0,0 +1,204 @@
+// $Id: suffStatComponent.h 3045 2007-12-18 15:55:08Z itaymay $
+
+#ifndef ___SUFF_STAT_COMPONENT
+#define ___SUFF_STAT_COMPONENT
+
+#include "definitions.h"
+#include <vector>
+using namespace std;
+
+// spec = for a specific node. global = for all the nodes
+// hom = no rate variation. gam = with rate variation
+// pos = for one position
+//-------------------------------------------------------------
+class suffStatSpecHomPos{ // this is for a specific node.
+ public:
+ void set(const int letter,const doubleRep& val) {
+ _V[letter]=val;
+ }
+
+ doubleRep get(const int letter) const {
+ doubleRep tmp=_V[letter];
+// cout << "tmp =";
+// tmp.outputn(cout);
+
+ return tmp;
+ }
+
+ void allocatePlace(const int alphabetSize) {
+ _V.resize(alphabetSize);
+ }
+ bool isEmpty (){return (_V.empty());};
+ int size() const {return _V.size();}
+
+ private:
+ vector<doubleRep> _V;//size = letter
+};
+//-------------------------------------------------------------
+/*
+class suffStatSpecGamPos{// this is for a specific node with rates
+ public:
+ void set(const int rateCategor,
+ const int letter,const MDOUBLE val) {
+ _V[rateCategor].set(letter,val);
+ }
+
+ MDOUBLE get(const int rateCategor,
+ const int letter) const {
+ return _V[rateCategor].get(letter);
+ }
+ void allocatePlace(const int numberOfrateCategories,const int alphabetSize) {
+ _V.resize(numberOfrateCategories);
+ for (int i=0; i < numberOfrateCategories; ++i) {
+ _V[i].allocatePlace(alphabetSize);
+ }
+ }
+ bool isEmpty (){return (_V.empty());};
+ private:
+ vector<suffStatSpecHomPos> _V;//rateCategor,letter
+};
+*/
+//-------------------------------------------------------------
+/*
+class suffStatSpecGam{// this is for a specific node with rates
+ public:
+ void set(const int pos,const int rateCategor,
+ const int letter,const MDOUBLE val) {
+ _V[pos].set(rateCategor,letter,val);
+ }
+
+ MDOUBLE get(const int pos,const int rateCategor,
+ const int letter) const {
+ return _V[pos].get(rateCategor,letter);
+ }
+
+ void allocatePlace(const int pos,const int numberOfrateCategories,const int alphabetSize) {
+ _V.resize(pos);
+ for (int i=0;i<pos;++i) _V[i].allocatePlace(numberOfrateCategories,alphabetSize);
+ }
+ bool isEmpty (){return (_V.empty());};
+ suffStatSpecGamPos& operator[] (int index) {return _V[index];}
+ const suffStatSpecGamPos& operator[] (int index) const {return _V[index];}
+ private:
+ vector<suffStatSpecGamPos> _V;//pos,rateCategor,letter
+};
+*/
+//-------------------------------------------------------------
+/*
+class suffStatGlobalGam {
+public:
+ MDOUBLE get(const int nodeId, const int pos,const int rateCategor,
+ const int letter) const {
+ return _V[nodeId].get(pos,rateCategor,letter);
+ }
+ void allocatePlace(const int numOfNodes,
+ const int pos,
+ const int numberOfrateCategories,
+ const int alphabetSize) {
+ _V.resize(numOfNodes);
+ for (int i=0;i<numOfNodes;++i) _V[i].allocatePlace(pos,numberOfrateCategories,alphabetSize);
+ }
+ int size() const {return _V.size();}
+ suffStatSpecGam& operator[] (int index) {return _V[index];}
+ const suffStatSpecGam& operator[] (int index) const {return _V[index];}
+
+private:
+ vector<suffStatSpecGam> _V;
+};
+*/
+//-------------------------------------------------------------
+class suffStatGlobalHomPos{ // this is for all nodes
+ public:
+ void set(const int nodeId,const int letter,const doubleRep val) {
+ _V[nodeId].set(letter,val);
+ }
+
+ doubleRep get(const int nodeId,const int letter) const {
+ doubleRep tmp(_V[nodeId].get(letter));
+// tmp;
+
+// cout << "tmp2=";
+// tmp.outputn(cout);
+ return tmp;
+ }
+
+ void allocatePlace(const int numOnNodes,const int alphabetSize) {
+ _V.resize(numOnNodes);
+ for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);}
+ }
+ bool isEmpty (){return (_V.empty());};
+ int size() const {return _V.size();}
+ private:
+ vector<suffStatSpecHomPos> _V;//size = number of nodes.
+};
+//-------------------------------------------------------------
+class suffStatGlobalGamPos{ // this is for all nodes
+ public:
+ void set(const int categor,const int nodeId,const int letter,const doubleRep val) {
+ _V[categor].set(nodeId,letter,val);
+ }
+
+ doubleRep get(const int categor,const int nodeId,const int letter) const {
+ return _V[categor].get(nodeId,letter);
+ }
+
+ void allocatePlace(const int categor,const int numOnNodes,const int alphabetSize) {
+ _V.resize(categor);
+ for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes,alphabetSize);}
+ }
+ bool isEmpty (){return (_V.empty());}
+ int size() const {return _V.size();}
+
+ suffStatGlobalHomPos& operator[] (int index) {return _V[index];}
+ const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];}
+ private:
+ vector<suffStatGlobalHomPos> _V;//size = number of categories
+};
+//-------------------------------------------------------------
+class suffStatGlobalGam{ // this is for all positions (and for all nodes).
+ public:
+ void set(const int pos,const int categor,const int nodeId,const int letter,const doubleRep val) {
+ _V[pos].set(categor,nodeId,letter,val);
+ }
+
+ doubleRep get(const int pos,const int categor,const int nodeId,const int letter) const {
+ return _V[pos].get(categor,nodeId,letter);
+ }
+
+ void allocatePlace(const int pos,const int categor,const int numOnNodes,const int alphabetSize) {
+ _V.resize(pos);
+ for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(categor,numOnNodes,alphabetSize);}
+ }
+ bool isEmpty (){return (_V.empty());}
+ int size() const {return _V.size();}
+ suffStatGlobalGamPos& operator[] (int index) {return _V[index];}
+ const suffStatGlobalGamPos& operator[] (int index) const {return _V[index];}
+ private:
+ vector<suffStatGlobalGamPos> _V;
+};
+
+// from ItayM not to use with the EM algorithm.
+class suffStatGlobalHom{ // this is for all positions (and for all nodes).
+ public:
+ void set(const int pos, const int nodeId, const int letter,const doubleRep val) {
+ _V[pos].set(nodeId, letter, val);
+ }
+
+ doubleRep get(const int pos, const int nodeId, const int letter) const {
+ return _V[pos].get(nodeId, letter);
+ }
+
+ void allocatePlace(const int pos, const int numOnNodes, const int alphabetSize) {
+ _V.resize(pos);
+ for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(numOnNodes, alphabetSize);}
+ }
+ bool isEmpty (){return (_V.empty());};
+ suffStatGlobalHomPos& operator[] (int index) {return _V[index];}
+ const suffStatGlobalHomPos& operator[] (int index) const {return _V[index];}
+ private:
+ vector<suffStatGlobalHomPos> _V;
+};
+
+
+#endif
+
diff --git a/libs/phylogeny/suffStatGammaMixture.cpp b/libs/phylogeny/suffStatGammaMixture.cpp
new file mode 100644
index 0000000..d5fe4a7
--- /dev/null
+++ b/libs/phylogeny/suffStatGammaMixture.cpp
@@ -0,0 +1,236 @@
+#include "suffStatGammaMixture.h"
+#include "mixtureDistribution.h"
+#include "computePijComponent.h"
+#include "likelihoodComputation.h"
+#include "gammaUtilities.h"
+#include "uniDistribution.h"
+
+
+#include <cmath>
+#include <fstream>
+using namespace likelihoodComputation;
+
+
+suffStatGammaMixture::suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree)
+{
+ _pSp = &cur_sp;
+ _pSc = ≻
+ _pTree = &inTree;
+}
+
+suffStatGammaMixture::~suffStatGammaMixture()
+{
+}
+
+
+void suffStatGammaMixture::allocatePlaceForSuffStat() {
+ mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ int componentNum = pMixture->getComponentsNum();
+ _MkVec.clear();
+ _MkVec.resize(componentNum, 0);
+ _AkVec.clear();
+ _AkVec.resize(componentNum, 0);
+ _BkVec.clear();
+ _BkVec.resize(componentNum, 0);
+}
+
+void suffStatGammaMixture::computePijForEachComponent(vector<computePijGam>& cpgVec,
+ vector<stochasticProcess>& spVec) {
+ mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ int componentNum = pMixture->getComponentsNum();
+ for (int comp = 0; comp < componentNum; ++comp) {
+ //create a local sp so to compute likelihoods of this component only
+ stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
+ cpgVec[comp].fillPij(*_pTree, compSp);
+ spVec.push_back(compSp);
+ }
+}
+
+void suffStatGammaMixture::computeStatistics()
+{
+ ///////////////as in getTreeLikelihoodAllPosAlphTheSame
+ //computePijGam pi;
+ //pi.fillPij(*_pTree, *_pSp);
+ //MDOUBLE res =0;
+ //doubleRep LofPos;
+ //int k;
+ //for (k=0; k < _pSc->seqLen(); ++k)
+ //{
+ // doubleRep tmp=0;
+ // for (int i=0; i < _pSp->categories();++i)
+ // {
+ // tmp += getLofPos(k, *_pTree, *_pSc, pi[i], *_pSp) * _pSp->ratesProb(i);
+ // }
+ // LofPos = tmp;
+ // res += log(LofPos);
+ //}
+ //////////////////////////////////////////////
+
+ //mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ //int componentNum = pMixture->getComponentsNum();
+ //MDOUBLE res2 = 0.0;
+ //vector<computePijGam> cpgVec(componentNum);
+ //vector<stochasticProcess> spVec;
+ //
+ //for (int comp = 0; comp < componentNum; ++comp) {
+ // //create a local sp so to compute likelihoods of this component only
+ // stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
+ // cpgVec[comp].fillPij(*_pTree, compSp);
+ // spVec.push_back(compSp);
+ //}
+ //
+ //for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ //{
+ // int comp;
+ // for (comp = 0; comp < componentNum; ++comp)
+ // {
+ // const generalGammaDistribution* pDist = pMixture->getComponent(comp);
+ // for (int cat=0; cat < pDist->categories(); ++cat)
+ // {
+ // MDOUBLE tmp = pDist->ratesProb(cat) * getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], *_pSp);
+ // res2 += log(tmp);
+ // }
+ // }
+ //}
+ //////////////////////////////////////////////
+ allocatePlaceForSuffStat();
+ mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ int componentNum = pMixture->getComponentsNum();
+
+ //compute Pij for each component
+ vector<computePijGam> cpgVec(componentNum);
+ vector<stochasticProcess> spVec;
+ computePijForEachComponent(cpgVec,spVec);
+
+
+ //compute statistics: M_k, A_k, B_k
+ //Here we sum over all positions.
+ //go over all positions [pos] and compute for each component [k]: M_k(pos), E[R]_k(pos), E[logR]_k(pos)
+ //Then compute A_k and B_k for that position.
+ for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ {
+ MDOUBLE sumAllComponents = 0.0;
+ Vdouble MkPosVec(componentNum, 0.0); //the contribution of position pos to the M_K statistic
+ Vdouble Exp_RkVec(componentNum, 0.0);
+ Vdouble Exp_LogRkVec(componentNum, 0.0);
+ int comp;
+ for (comp = 0; comp < componentNum; ++comp)
+ {
+ // here we compute P(H[i]=k, data| cur_mixtureDistribution)
+ //P(H[i]=k, data| teta) = P(H[i]=k)* (sum_over_all_categories{P(data|r)P(r))
+ ///////////////////////////
+ const generalGammaDistribution* pDist = pMixture->getComponent(comp);
+ MDOUBLE Exp_Rk, Exp_LogRk, sum;
+ Exp_Rk = Exp_LogRk = sum = 0.0;
+ for (int cat=0; cat < pDist->categories(); ++cat)
+ {
+ MDOUBLE LofP = convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]));
+ MDOUBLE Pr = pDist->ratesProb(cat) * LofP;
+ sum += Pr;
+ Exp_RkVec[comp] += Pr * pDist->rates(cat);
+ Exp_LogRkVec[comp] += Pr * log(pDist->rates(cat));
+ }
+ MkPosVec[comp] = sum;
+ sumAllComponents += MkPosVec[comp] * pMixture->getComponentProb(comp);;
+ }
+
+ for (comp = 0; comp < componentNum; ++comp)
+ {
+ MDOUBLE factor = pMixture->getComponentProb(comp)/ sumAllComponents;
+ _MkVec[comp] += factor* MkPosVec[comp] ;
+ _AkVec[comp] += factor * Exp_RkVec[comp];
+ _BkVec[comp] += factor * Exp_LogRkVec[comp];
+ }
+ }// end of loop over positions
+ spVec.clear();
+ cpgVec.clear();
+}
+
+
+#include "uniformDistribution.h"
+void suffStatGammaMixture::plotStatistics(ofstream& outFile)
+{
+ mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ if (pMixture->getComponentsNum() != 1)
+ errorMsg::reportError("Sorry, I plot only 1 component");
+
+ outFile <<"R"<<"\t"<<"Postr"<<"\t"<<"Er"<<"\t"<<"Elog_r"<<endl;
+ const generalGammaDistribution* pDist = pMixture->getComponent(0);
+ int numCat = 200, maxR = 10;
+ uniformDistribution uniDist(numCat, 0, maxR);
+ /////////calc the prior of each interval
+ Vdouble priorProbs(uniDist.categories());
+ MDOUBLE upperP, lowerP = 0;
+ for (int i = 0; i<uniDist.categories();++i)
+ {
+ upperP = pDist->getCumulativeProb(uniDist.getBorder(i+1));
+ priorProbs[i] = upperP - lowerP;
+ lowerP = upperP;
+ }
+
+ distribution * pUni = new uniDistribution;
+
+ stochasticProcess uniSp(pUni, _pSp->getPijAccelerator());
+ //loop over all r
+ for (int ri=0; ri < uniDist.categories(); ++ri)
+ {
+ MDOUBLE Exp_R = 0.0;
+ MDOUBLE Exp_LogR = 0.0;
+ MDOUBLE PosteriorR = 0.0;
+ MDOUBLE rate = uniDist.rates(ri);
+ if (rate == 0.0)
+ rate = 0.000001;
+
+ //Here we sum over all positions.
+ //go over all positions [pos] and compute: PosrteriorR(=P(D|r)*P(r)), E[R]_k(pos), E[logR]_k(pos)
+ for (int pos = 0; pos < _pSc->seqLen(); ++pos)
+ {
+ MDOUBLE PrPos = priorProbs[ri] * convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, uniSp, rate));
+ PosteriorR += PrPos;
+ Exp_R += PrPos * rate;
+ Exp_LogR += PrPos * log(rate);
+
+ }
+
+ outFile <<rate<<"\t"<<PosteriorR<<"\t"<<Exp_R<<"\t"<<Exp_LogR<<endl;
+ }
+
+ delete pUni;
+}
+
+
+MDOUBLE suffStatGammaMixture::computeQ2()
+{
+ MDOUBLE res=0;
+
+ return res;
+}
+
+
+
+MDOUBLE suffStatGammaMixture::computeQ()
+{
+ mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
+ MDOUBLE res = 0.0;
+ //////////////////////////////////
+ MDOUBLE res2 = 0.0;
+ int compNum = pMixture->getComponentsNum();
+ ///////////////////////////////////
+ for (int comp = 0;comp < compNum ; ++comp)
+ {
+ MDOUBLE P_k = pMixture->getComponentProb(comp);
+ MDOUBLE alpha_k = pMixture->getAlpha(comp);
+ MDOUBLE beta_k = pMixture->getBeta(comp);
+ MDOUBLE first = _MkVec[comp] * log(P_k);
+ MDOUBLE second = _MkVec[comp] * alpha_k*log(beta_k);
+ MDOUBLE third = -_MkVec[comp] * gammln(alpha_k);
+ MDOUBLE fourth = -_AkVec[comp]*beta_k;
+ MDOUBLE fifth = _BkVec[comp]*(alpha_k-1.0);
+ res += _MkVec[comp] * (log(P_k) + alpha_k*log(beta_k) - gammln(alpha_k))
+ - (_AkVec[comp]*beta_k)
+ + _BkVec[comp]*(alpha_k-1);
+ ////////////////////////////////////
+ }
+ res2 = computeQ2();
+ return res;
+}
diff --git a/libs/phylogeny/suffStatGammaMixture.h b/libs/phylogeny/suffStatGammaMixture.h
new file mode 100644
index 0000000..7e8d7ae
--- /dev/null
+++ b/libs/phylogeny/suffStatGammaMixture.h
@@ -0,0 +1,58 @@
+#ifndef ___SUFF_STAT_GAMMA_MIXTURE
+#define ___SUFF_STAT_GAMMA_MIXTURE
+/************************************************************
+The suffStatGammaMixture class is used to obtain the sufficient statistics
+that are neccessary for the EM algorithm to compute the mixture distribution parameters.
+The following notations are used below:
+P(h[i]=k): the probability that position i belongs to the Kth Gamma component.
+teta_t: the current mixture distribution parameters (the alpha, beta, and the probability of each component).
+
+There are 3 sufficient statistics:
+M_k: the expected number of positions belong to the Kth component.
+ sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution)}
+A_k: sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution) * E[r|h[i] = k, data, cur_mixtureDistribution]}
+B_k: sigma(i = 1 to seqLen){P(h[i] = k|data, cur_mixtureDistribution) * E[log(r)|h[i] = k, data, cur_mixtureDistribution]}
+************************************************************/
+#include "definitions.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "tree.h"
+#include "mixtureDistribution.h"
+#include "computePijComponent.h"
+
+class suffStatGammaMixture{
+
+public:
+
+ explicit suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree);
+ virtual ~suffStatGammaMixture();
+
+ void computeStatistics();
+
+ void plotStatistics(ofstream & outF);
+ MDOUBLE getMk(int comp) const {return _MkVec[comp];}
+ MDOUBLE getAk(int comp) const {return _AkVec[comp];}
+ MDOUBLE getBk(int comp) const {return _BkVec[comp];}
+ MDOUBLE computeQ();
+ MDOUBLE computeQ2();
+
+
+private:
+ MDOUBLE computeStatisticsForComponent(int pos, int componentNum, const computePijGam& cpg);
+ void allocatePlaceForSuffStat();
+ void computePijForEachComponent(vector<computePijGam>& cpgVec,vector<stochasticProcess>& spVec);
+
+private:
+ Vdouble _MkVec;
+ Vdouble _AkVec;
+ Vdouble _BkVec;
+
+ const stochasticProcess* _pSp;
+ const sequenceContainer* _pSc;
+ const tree* _pTree;
+};
+
+
+
+#endif
+
diff --git a/libs/phylogeny/talRandom.cpp b/libs/phylogeny/talRandom.cpp
new file mode 100644
index 0000000..f91ccbb
--- /dev/null
+++ b/libs/phylogeny/talRandom.cpp
@@ -0,0 +1,73 @@
+// $Id: talRandom.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "talRandom.h"
+
+RandintTal talRandom::r = static_cast<long>(time(0)) ;
+
+MDOUBLE talRandom::DblGammaGreaterThanOne(MDOUBLE dblAlpha) {
+ // Code adopted from David Heckerman
+ //-----------------------------------------------------------
+ // DblGammaGreaterThanOne(dblAlpha)
+ //
+ // routine to generate a gamma random variable with unit scale and
+ // alpha > 1
+ // reference: Ripley, Stochastic Simulation, p.90
+ // Chang and Feast, Appl.Stat. (28) p.290
+ //-----------------------------------------------------------
+ MDOUBLE rgdbl[6];
+
+ rgdbl[1] = dblAlpha - 1.0;
+ rgdbl[2] = (dblAlpha - (1.0 / (6.0 * dblAlpha))) / rgdbl[1];
+ rgdbl[3] = 2.0 / rgdbl[1];
+ rgdbl[4] = rgdbl[3] + 2.0;
+ rgdbl[5] = 1.0 / sqrt(dblAlpha);
+
+ for (;;)
+ {
+ MDOUBLE dblRand1;
+ MDOUBLE dblRand2;
+ do
+ {
+ dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0);
+ dblRand2 = giveRandomNumberBetweenZeroAndEntry(1.0);
+
+ if (dblAlpha > 2.5)
+ dblRand1 = dblRand2 + rgdbl[5] * (1.0 - 1.86 * dblRand1);
+
+ } while (!(0.0 < dblRand1 && dblRand1 < 1.0));
+
+ MDOUBLE dblTemp = rgdbl[2] * dblRand2 / dblRand1;
+
+ if (rgdbl[3] * dblRand1 + dblTemp + 1.0 / dblTemp <= rgdbl[4] ||
+ rgdbl[3] * log(dblRand1) + dblTemp - log(dblTemp) < 1.0)
+ {
+ return dblTemp * rgdbl[1];
+ }
+ }
+ assert(false);
+ return 0.0;
+}
+
+MDOUBLE talRandom::DblGammaLessThanOne(MDOUBLE dblAlpha){
+//routine to generate a gamma random variable with
+//unit scale and alpha < 1
+//reference: Ripley, Stochastic Simulation, p.88
+ MDOUBLE dblTemp;
+ const MDOUBLE dblexp = exp(1.0);
+ for (;;){
+ MDOUBLE dblRand0 = giveRandomNumberBetweenZeroAndEntry(1.0);
+ MDOUBLE dblRand1 = giveRandomNumberBetweenZeroAndEntry(1.0);
+ if (dblRand0 <= (dblexp / (dblAlpha + dblexp))){
+ dblTemp = pow(((dblAlpha + dblexp) * dblRand0) /
+ dblexp, 1.0 / dblAlpha);
+ if (dblRand1 <= exp(-1.0 * dblTemp)) return dblTemp;
+ } else {
+ dblTemp = -1.0 * log((dblAlpha + dblexp) * (1.0 - dblRand0) /
+ (dblAlpha * dblexp));
+ if (dblRand1 <= pow(dblTemp,dblAlpha - 1.0)) return dblTemp;
+ }
+ }
+ assert(false);
+ return 0.0;
+} // DblGammaLessThanOne
+
diff --git a/libs/phylogeny/talRandom.h b/libs/phylogeny/talRandom.h
new file mode 100644
index 0000000..6d26ad4
--- /dev/null
+++ b/libs/phylogeny/talRandom.h
@@ -0,0 +1,98 @@
+// $Id: talRandom.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___TAL_RANDOM
+#define ___TAL_RANDOM
+
+#include "definitions.h"
+#include "logFile.h"
+#include <cmath>
+#include <cassert>
+#include <ctime>
+
+class RandintTal {
+ unsigned long randx;
+public:
+ RandintTal(long s=0) {randx=s;}
+ void seedTal(long s) {randx=s;}
+ int absTal(int x) {return x&0x7fffffff;}
+ static MDOUBLE maxTal() {return 2147483648.0;}
+ int drawTal() {return randx = randx*1103515245+12345;}
+ MDOUBLE fdrawTal() {return absTal(drawTal())/maxTal();} //random number between zero and 1
+};
+
+class talRandom {
+public:
+ // note the number you get is between 0 and entry not including entry!
+ static MDOUBLE giveRandomNumberBetweenZeroAndEntry(MDOUBLE entry) {
+ MDOUBLE tm=r.fdrawTal();
+ return (tm * entry);
+ }
+
+ static bool flipCoin() {
+ return ((talRandom::giveRandomNumberBetweenZeroAndEntry(1.0)-0.5)>0);
+ }
+
+ // note the number you get is between 0 and entry not including entry!
+ static int giveIntRandomNumberBetweenZeroAndEntry(int entry) {
+ return (int)(giveRandomNumberBetweenZeroAndEntry(entry));
+ }
+
+ static void setSeed(const unsigned long seed) {
+ r.seedTal(seed);
+ }
+
+ static const MDOUBLE rand_gaussian(const MDOUBLE mean, const MDOUBLE variance) {
+ const int N=100;
+ static MDOUBLE X;
+ X=0.0-N/2; /* set mean to 0 */
+ for (int ri = 0;ri< N;ri++){
+ // X += 1.0*rand()/RAND_MAX;
+ X += giveRandomNumberBetweenZeroAndEntry(1.0);
+ }
+
+ /* for uniform randoms in [0,1], mu = 0.5 and var = 1/12 */
+ /* adjust X so mu = 0 and var = 1 */
+
+ // X = X * sqrt(12 / N); /* adjust variance to 1 */
+ // cout <<X * sqrt(variance*12.0/N) + mean<<" ";
+ MDOUBLE g = X * sqrt(variance*12.0/N) + mean;
+ return (g);
+ }
+
+ static MDOUBLE SampleGamma(MDOUBLE Alpha, MDOUBLE Beta) {
+ MDOUBLE x= SampleGammaNorm(Alpha)/Beta;
+ //LOG(700, << "SampleGamma(" << Alpha << " " << Beta << ") = " << x << "\n");
+ return x;
+ }
+ static MDOUBLE SampleGamma(MDOUBLE Alpha) {
+ MDOUBLE x= SampleGamma(Alpha, Alpha);
+ //LOG(700, << "SampleGamma(" << Alpha << ") = " << x << "\n");
+ return x;
+ }
+ static MDOUBLE rand_exp(const MDOUBLE mean) {
+ return - mean * log(giveRandomNumberBetweenZeroAndEntry(1.0));//pg 64: Ross, Simulation 2nd.
+ }
+
+ static MDOUBLE giveRandomNumberBetweenTwoPoints(const MDOUBLE lower_point, const MDOUBLE upper_point) {
+ MDOUBLE u = giveRandomNumberBetweenZeroAndEntry(upper_point - lower_point);
+ return (u + lower_point);
+ }
+
+
+private:
+ static RandintTal r;
+
+ // Routine to generate a gamma random variable with unit scale (beta = 1)
+ static MDOUBLE SampleGammaNorm(MDOUBLE dblAlpha) {
+ assert(dblAlpha > 0.0);
+ if( dblAlpha < 1.0 ) return DblGammaLessThanOne(dblAlpha);
+ else if( dblAlpha > 1.0 ) return DblGammaGreaterThanOne(dblAlpha);
+ return -log(giveRandomNumberBetweenZeroAndEntry(1.0));
+ }
+ static MDOUBLE DblGammaGreaterThanOne(MDOUBLE dblAlpha);
+ static MDOUBLE DblGammaLessThanOne(MDOUBLE dblAlpha);
+
+
+};
+#endif
+
diff --git a/libs/phylogeny/tamura92.cpp b/libs/phylogeny/tamura92.cpp
new file mode 100644
index 0000000..cd21c09
--- /dev/null
+++ b/libs/phylogeny/tamura92.cpp
@@ -0,0 +1,167 @@
+// $Id: tamura92.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "tamura92.h"
+#include "errorMsg.h"
+
+// This implementation was copied from the Bio++ Phyl library (by Julien Dutheil) - file T92.cpp
+
+tamura92::tamura92(const MDOUBLE theta,
+ const MDOUBLE TrTv)
+ : _theta(theta), _TrTv(TrTv) {
+
+ _freq.resize(4);
+ changeTheta(theta);
+}
+
+void tamura92::changeTheta(const MDOUBLE theta) {
+ _theta = theta;
+ _freq[0] = _freq[3] = (1.0 - theta) / 2.0;
+ _freq[1] = _freq[2] = theta / 2.0;
+}
+
+const MDOUBLE tamura92::Pij_t(const int i, const int j, const MDOUBLE t) const {
+ double k = (_TrTv + 1.0) / 2.0;
+ double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
+ double l = r * t;
+ double exp1 = exp(-l);
+ double exp2 = exp(-k * l);
+
+ switch(i) {
+ //A
+ case 0 : {
+ switch(j) {
+ case 0 : return _freq[0] * (1.0 + exp1) + _theta * exp2; //A
+ case 1 : return _freq[1] * (1.0 - exp1); //C
+ case 2 : return _freq[2] * (1.0 + exp1) - _theta * exp2; //G
+ case 3 : return _freq[3] * (1.0 - exp1); //T, U
+ }
+ }
+ //C
+ case 1 : {
+ switch(j) {
+ case 0 : return _freq[0] * (1.0 - exp1); //A
+ case 1 : return _freq[1] * (1.0 + exp1) + (1. - _theta) * exp2; //C
+ case 2 : return _freq[2] * (1.0 - exp1); //G
+ case 3 : return _freq[3] * (1.0 + exp1) - (1. - _theta) * exp2; //T, U
+ }
+ }
+ //G
+ case 2 : {
+ switch(j) {
+ case 0 : return _freq[0] * (1.0 + exp1) - (1. - _theta) * exp2; //A
+ case 1 : return _freq[1] * (1.0 - exp1); //C
+ case 2 : return _freq[2] * (1.0 + exp1) + (1. - _theta) * exp2; //G
+ case 3 : return _freq[3] * (1.0 - exp1); //T, U
+ }
+ }
+ //T, U
+ case 3 : {
+ switch(j) {
+ case 0 : return _freq[0] * (1.0 - exp1); //A
+ case 1 : return _freq[1] * (1.0 + exp1) - _theta * exp2; //C
+ case 2 : return _freq[2] * (1.0 - exp1); //G
+ case 3 : return _freq[3] * (1.0 + exp1) + _theta * exp2; //T, U
+ }
+ }
+ }
+ return -1;
+}
+
+const MDOUBLE tamura92::dPij_dt(const int i,const int j, const MDOUBLE t) const {
+ double k = (_TrTv + 1.0) / 2.0;
+ double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
+ double l = r * t;
+ double exp1 = exp(-l);
+ double exp2 = exp(-k * l);
+
+ switch(i) {
+ //A
+ case 0 : {
+ switch(j) {
+ case 0 : return r * (_freq[0] * - exp1 + _theta * -k * exp2); //A
+ case 1 : return r * (_freq[1] * exp1); //C
+ case 2 : return r * (_freq[2] * - exp1 - _theta * -k * exp2); //G
+ case 3 : return r * (_freq[3] * exp1); //T, U
+ }
+ }
+ //C
+ case 1 : {
+ switch(j) {
+ case 0 : return r * (_freq[0] * exp1); //A
+ case 1 : return r * (_freq[1] * - exp1 + (1.0 - _theta) * -k * exp2); //C
+ case 2 : return r * (_freq[2] * exp1); //G
+ case 3 : return r * (_freq[3] * - exp1 - (1.0 - _theta) * -k * exp2); //T, U
+ }
+ }
+ //G
+ case 2 : {
+ switch(j) {
+ case 0 : return r * (_freq[0] * - exp1 - (1.0 - _theta) * -k * exp2); //A
+ case 1 : return r * (_freq[1] * exp1); //C
+ case 2 : return r * (_freq[2] * - exp1 + (1.0 - _theta) * -k * exp2); //G
+ case 3 : return r * (_freq[3] * exp1); //T, U
+ }
+ }
+ //T, U
+ case 3 : {
+ switch(j) {
+ case 0 : return r * (_freq[0] * exp1); //A
+ case 1 : return r * (_freq[1] * - exp1 - _theta * -k * exp2); //C
+ case 2 : return r * (_freq[2] * exp1); //G
+ case 3 : return r * (_freq[3] * - exp1 + _theta * -k * exp2); //T, U
+ }
+ }
+ }
+ return -1;
+}
+
+const MDOUBLE tamura92::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
+ double k = (_TrTv + 1.0) / 2.;
+ double k2 = k * k;
+ double r = 2.0 / (1.0 + 2.0 * _theta * _TrTv - 2.0 * _theta * _theta * _TrTv);
+ double l = r * t;
+ double r2 = r * r;
+ double exp1 = exp(-l);
+ double exp2 = exp(-k * l);
+
+ switch(i) {
+ //A
+ case 0 : {
+ switch(j) {
+ case 0 : return r2 * (_freq[0] * exp1 + _theta * k2 * exp2); //A
+ case 1 : return r2 * (_freq[1] * - exp1); //C
+ case 2 : return r2 * (_freq[2] * exp1 - _theta * k2 * exp2); //G
+ case 3 : return r2 * (_freq[3] * - exp1); //T, U
+ }
+ }
+ //C
+ case 1 : {
+ switch(j) {
+ case 0 : return r2 * (_freq[0] * - exp1); //A
+ case 1 : return r2 * (_freq[1] * exp1 + (1.0 - _theta) * k2 * exp2); //C
+ case 2 : return r2 * (_freq[2] * - exp1); //G
+ case 3 : return r2 * (_freq[3] * exp1 - (1.0 - _theta) * k2 * exp2); //T, U
+ }
+ }
+ //G
+ case 2 : {
+ switch(j) {
+ case 0 : return r2 * (_freq[0] * exp1 - (1.0 - _theta) * k2 * exp2); //A
+ case 1 : return r2 * (_freq[1] * - exp1); //C
+ case 2 : return r2 * (_freq[2] * exp1 + (1.0 - _theta) * k2 * exp2); //G
+ case 3 : return r2 * (_freq[3] * - exp1); //T, U
+ }
+ }
+ //T, U
+ case 3 : {
+ switch(j) {
+ case 0 : return r2 * (_freq[0] * - exp1); //A
+ case 1 : return r2 * (_freq[1] * exp1 - _theta * k2 * exp2); //C
+ case 2 : return r2 * (_freq[2] * - exp1); //G
+ case 3 : return r2 * (_freq[3] * exp1 + _theta * k2 * exp2); //T, U
+ }
+ }
+ }
+ return -1;
+}
+
diff --git a/libs/phylogeny/tamura92.h b/libs/phylogeny/tamura92.h
new file mode 100644
index 0000000..549d030
--- /dev/null
+++ b/libs/phylogeny/tamura92.h
@@ -0,0 +1,36 @@
+// $Id: tamura92.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___TAMURA92
+#define ___TAMURA92
+
+#include "replacementModel.h"
+#include <cmath>
+
+class tamura92 : public replacementModel {
+public:
+ explicit tamura92(const MDOUBLE theta,
+ const MDOUBLE TrTv);
+
+ virtual replacementModel* clone() const { return new tamura92 (*this); }
+
+ const int alphabetSize() const {return 4;}
+ inline void changeTrTv(const MDOUBLE TrTv) { _TrTv = TrTv; }
+ void changeTheta(const MDOUBLE theta);
+ MDOUBLE getTrTv() const {return _TrTv;}
+ MDOUBLE getTheta() const {return _theta;}
+
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const;
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const;
+
+ const MDOUBLE dPij_tdBeta(const int i, const int j, const MDOUBLE t) const;
+
+private:
+ Vdouble _freq;
+ MDOUBLE _theta;
+ MDOUBLE _TrTv;
+};
+
+#endif
+
diff --git a/libs/phylogeny/threeStateAlphabet.cpp b/libs/phylogeny/threeStateAlphabet.cpp
new file mode 100644
index 0000000..52c63e1
--- /dev/null
+++ b/libs/phylogeny/threeStateAlphabet.cpp
@@ -0,0 +1,58 @@
+#include "threeStateAlphabet.h"
+
+threeStateAlphabet::threeStateAlphabet() {}
+
+int threeStateAlphabet::fromChar(const char s) const{
+ switch (s) {
+ case '0': return 0; break;
+ case '1': return 1; break;
+ case '2': return 2; break;
+ default:
+ vector<string> err;
+ err.push_back(" The threeStateAlphabet sequences contained the character: ");
+ err[0]+=s;
+ err.push_back(" threeStateAlphabet was not one of the following: ");
+ err.push_back(" 0, 1, 2");
+ errorMsg::reportError(err);
+ }// end of switch
+ return -99; // never suppose to be here.
+}// end of function
+
+vector<int> threeStateAlphabet::fromString(const string &str) const {
+ vector<int> vec;
+ for (int i=0;i<str.size();i++)
+ vec.push_back(fromChar(str[i]));
+ return vec;
+}
+
+string threeStateAlphabet::fromInt(const int in_id) const{
+ char res = 0;
+ switch (in_id) {
+ case 0 : res = '0' ; break;
+ case 1 : res = '1' ; break;
+ case 2 : res = '2' ; break;
+ default:
+ vector<string> err;
+ err.push_back("unable to print threeState_id. threeState_id was not one of the following: ");
+ err.push_back("0,1,2");
+ errorMsg::reportError(err);
+ }//end of switch
+ string vRes;
+ vRes.append(1,res);
+ return vRes;
+}// end of function
+
+// There are no relations here.
+int threeStateAlphabet::relations(const int charInSeq, const int charToCheck) const{
+ if (charInSeq == charToCheck)
+ return 1;
+ return 0;
+}
+
+int threeStateAlphabet::fromChar(const string& str, const int pos) const{
+ return fromChar(str[pos]);
+}
+
+
+
+
diff --git a/libs/phylogeny/threeStateAlphabet.h b/libs/phylogeny/threeStateAlphabet.h
new file mode 100644
index 0000000..bea165c
--- /dev/null
+++ b/libs/phylogeny/threeStateAlphabet.h
@@ -0,0 +1,26 @@
+#ifndef ___3STATE_ALPH
+#define ___3STATE_ALPH
+
+#include "alphabet.h"
+#include "errorMsg.h"
+
+
+class threeStateAlphabet : public alphabet {
+public:
+ explicit threeStateAlphabet();
+ virtual ~threeStateAlphabet() {}
+ virtual alphabet* clone() const { return new threeStateAlphabet(*this); }
+ int unknown() const {return -2;}
+ int gap() const {errorMsg::reportError("The method indel::gap() is used"); return -1;} // What is it for ? I don't need this !!!
+ int size() const {return 3;}
+ int stringSize() const {return 1;} // one letter code.
+ int relations(const int charInSeq, const int charToCheck) const;
+ int fromChar(const string& str, const int pos) const;
+ int fromChar(const char s) const;
+ string fromInt(const int in_id) const;
+ vector<int> fromString(const string& str) const;
+ bool isSpecific(const int id) const {return (id>=0 && id < size());}
+
+};
+
+#endif
diff --git a/libs/phylogeny/threeStateModel.cpp b/libs/phylogeny/threeStateModel.cpp
new file mode 100644
index 0000000..041bacc
--- /dev/null
+++ b/libs/phylogeny/threeStateModel.cpp
@@ -0,0 +1,254 @@
+#include "threeStateModel.h"
+#include "matrixUtils.h"
+#include "someUtil.h"
+
+///////////////////////////////////////////////////////////
+//non reversible model
+///////////////////////////////////////////////////////////
+
+const MDOUBLE EPSILON_3STATEMODEL = 1e-04;
+
+
+threeStateModel::threeStateModel(const MDOUBLE m1, const MDOUBLE m2,
+ const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting)
+ :_gainState1(m1),_gainState0(m2), _lossState1(m3),_lossState0(m4),_freq(freq),_useMarkovLimiting(useMarkovLimiting){
+ resizeMatrix(_Q,alphabetSize(),alphabetSize());
+ resizeMatrix(_lastPtCalculated, alphabetSize(), alphabetSize());
+ updateQ();
+ }
+
+threeStateModel& threeStateModel::operator=(const threeStateModel &other){
+ _gainState1 = other._gainState1;
+ _gainState0 = other._gainState0;
+ _lossState1 = other._lossState1;
+ _lossState0 = other._lossState0;
+ _freq = other._freq;
+ _useMarkovLimiting = other._useMarkovLimiting;
+ _Q = other._Q;
+ _bQchanged = other._bQchanged;
+ _lastPtCalculated = other._lastPtCalculated;
+ _lastTcalculated = other._lastTcalculated;
+
+ return *this;
+}
+
+void threeStateModel::updateQ(){
+ setEpsilonForZeroParams();
+ _Q[0][0] = -_gainState1;
+ _Q[0][1] = 0;
+ _Q[0][2] = _gainState1;
+ _Q[1][0] = 0;
+ _Q[1][1] = -_gainState0;
+ _Q[1][2] = _gainState0;
+ _Q[2][0] = _lossState1;
+ _Q[2][1] = _lossState0;
+ _Q[2][2] = - _Q[2][0] - _Q[2][1];
+ for (int i=0; i<_Q.size();i++) {
+ MDOUBLE sum = _Q[i][0]+_Q[i][1]+_Q[i][2];
+ if ((abs(sum)>err_allow_for_pijt_function()))
+ errorMsg::reportError("Error in threeStateModel::updateQ, sum of row is not 0");
+ }
+ if ((!checkIsNullModel()) && (_useMarkovLimiting))
+ computeMarkovLimitingDistribution();
+ _bQchanged = true;
+}
+
+// when Q matrix parameters are zero the lib code underflows and the likelihood is set to EPSILON
+void threeStateModel::setEpsilonForZeroParams(){
+ if (DEQUAL(_gainState0,0.0,EPSILON_3STATEMODEL))
+ _gainState0 = EPSILON_3STATEMODEL;
+ if (DEQUAL(_gainState1,0.0,EPSILON_3STATEMODEL))
+ _gainState1 = EPSILON_3STATEMODEL;
+ if (DEQUAL(_lossState0,0.0,EPSILON_3STATEMODEL))
+ _lossState0 = EPSILON_3STATEMODEL;
+ if (DEQUAL(_lossState1,0.0,EPSILON_3STATEMODEL))
+ _lossState1 = EPSILON_3STATEMODEL;
+}
+
+void threeStateModel::setMu1(const MDOUBLE val) {
+ _gainState1 = val;
+ updateQ();
+}
+
+void threeStateModel::setMu2(const MDOUBLE val) {
+ _gainState0 = val;
+ updateQ();
+}
+
+void threeStateModel::setMu3(const MDOUBLE val) {
+ _lossState1 = val;
+ updateQ();
+}
+
+void threeStateModel::setMu4(const MDOUBLE val) {
+ _lossState0 = val;
+ updateQ();
+}
+
+
+
+bool threeStateModel::pijt_is_prob_value(MDOUBLE val) const {
+ if ((abs(val)+err_allow_for_pijt_function()<0) || (val>1+err_allow_for_pijt_function()))
+ return false;
+ else
+ return true;
+}
+
+bool threeStateModel::areFreqsValid(Vdouble freq) const{
+ MDOUBLE sum=0.0;
+ for (int i=0; i<freq.size(); ++i){
+ if (freq[i]<0.0)
+ return false;
+ sum+=freq[i];
+ }
+ if (!DEQUAL(sum,1.0)) {
+ return false;
+ }
+ return true;
+}
+
+bool threeStateModel::checkIsNullModel(){
+ if (_gainState0!=EPSILON_3STATEMODEL)
+ return false;
+ if (_gainState0!=EPSILON_3STATEMODEL)
+ return false;
+ if (!(DEQUAL(_freq[2],1.0,EPSILON_3STATEMODEL)))
+ return false;
+ return true;
+}
+
+void threeStateModel::setFreq(const Vdouble &freq){
+ if (freq.size()!=_freq.size()) {
+ errorMsg::reportError("Error in threeStateModel::setFreq, size of freq is different than member");
+ }
+
+ if (!areFreqsValid(freq)) {
+ string strErr = "Error in threeStateModel::setFreq, sum of freq is different than 1 or negative freq value";
+ errorMsg::reportError(strErr);
+ }
+ for (int i=0; i<freq.size(); ++i){
+ _freq[i] = freq[i];
+ }
+}
+
+
+
+
+
+
+void threeStateModel::computeMarkovLimitingDistribution(){
+
+ VVdouble P;
+ int as = alphabetSize();
+ resizeMatrix(P,as, as);
+ // initializing P with P at time 1
+ for (int i=0; i< as; ++i) {
+ for (int j=0; j< as; ++j) {
+ P[i][j]=Pij_t(i,j,1.0);
+ }
+ }
+ VVdouble previous_P = P;
+ int numIterations = 0;
+ Vdouble freqs(3,-1.0);
+ bool converged = false;
+ MDOUBLE epsilon=0.000001;
+ int row, col;
+
+ while ( converged==false ) {
+ previous_P = P;
+ P = multiplyMatrixes(P,P);
+ // due to rounding errors, we set the diagonal to be 1-(the rest)
+ P[0][0]=1.0-P[0][1]-P[0][2];
+ P[1][1]=1.0-P[1][0]-P[1][2];
+ P[2][2]=1.0-P[2][0]-P[2][1];
+ for (int d=0; d<as;++d){
+ freqs[d] = P[0][d];// ** taking the freqs as the first row; this is not necessarily correct if 3 rows are different
+ }
+ converged = true;
+ for (row = 0; row < P.size(); ++row) {
+ for (col = 0; col < P.size(); ++col)
+ {
+ MDOUBLE diff = abs(convert(previous_P[row][col] - P[row][col]));
+ if ( ( ( ( !DEQUAL(diff,0.0,epsilon) ) || (!areFreqsValid(freqs) ) ) )){
+ converged = false;
+ }
+ }
+ }
+ numIterations++;
+ if (numIterations>100) {
+ string err = "Error in threeStateModel::computeMarkovLimitingDistribution, too many iterations =" + double2string(numIterations);
+ errorMsg::reportError(err);
+ }
+
+ }
+//making sure that the three rows are the same
+ for (row =1; row < P.size(); ++row) {
+ for (col = 0; col < P.size(); ++col)
+ {
+ if (!(DEQUAL(P[row][col],P[row-1][col],epsilon))) {
+ errorMsg::reportError("Error in threeStateModel::computeMarkovLimitingDistribution, rows are not equal" );
+
+ }
+
+ }
+
+ }
+
+ setFreq(freqs);
+}
+
+// new implementation copied from Itay Mayrose which saves the last values of t computed
+const MDOUBLE threeStateModel::Pij_t(const int i,const int j, const MDOUBLE d) const
+{
+ if (!_bQchanged && DEQUAL(d, _lastTcalculated))
+ return convert(_lastPtCalculated[i][j]);
+ // converting Q into doubleRep format
+ VVdoubleRep QdblRep;
+ resizeMatrix(QdblRep,_Q.size(),_Q.size());
+ for (int row=0;row<_Q.size();row++){
+ for (int col=0;col<_Q[row].size();col++)
+ QdblRep[row][col]=convert(_Q[row][col]);
+ }
+
+ VVdoubleRep Qt = multiplyMatrixByScalar(QdblRep, d);
+ VVdoubleRep unit;
+ unitMatrix(unit,_Q.size());
+ _lastPtCalculated = add(unit,Qt) ; // I + Qt
+ VVdoubleRep Qt_power = Qt;
+ VVdoubleRep prevIter_matrix = _lastPtCalculated;
+ VVdoubleRep diffM = _lastPtCalculated; //init to whatever
+ int n=2;
+ bool bConverged = false;
+ while (bConverged == false)
+ {
+ prevIter_matrix = _lastPtCalculated;
+ VVdoubleRep tempQ = multiplyMatrixByScalar(Qt,1.0/n);
+ Qt_power = multiplyMatrixes(Qt_power,tempQ);
+ _lastPtCalculated = add(_lastPtCalculated,Qt_power); // I + Qt + Qt^2/2! + .... + Qt^n/n!
+ //check if the difference between the cur and prev iteration is smaller than the allowed error of all matrix entries
+ bConverged = true;
+ for (int row = 0; row < _lastPtCalculated.size(); ++row) {
+ for (int col = 0; col < _lastPtCalculated.size(); ++col)
+ {
+ MDOUBLE diff = abs(convert(_lastPtCalculated[row][col] - prevIter_matrix[row][col]));
+ if ((diff > err_allow_for_pijt_function()) || (!pijt_is_prob_value(convert(_lastPtCalculated[i][j]))))
+ bConverged = false;
+ }
+ }
+ n++;
+ if (n>150) {
+ string err = "Error in threeStateModel::Pij_t, too many iterations for t = " + double2string(d);
+ //cerr<<diff<<endl;
+ errorMsg::reportError(err);
+ }
+ }
+ MDOUBLE val = convert(_lastPtCalculated[i][j]);
+ if (!pijt_is_prob_value(val))
+ errorMsg::reportError("Error in threeStateModel::Pij_t, pijt <0 or >1");
+ if (val<0.0)
+ val = EPSILON; // absolute zero creates a problem later on in computations
+ if (val>1.0)
+ val = 1.0;
+ _bQchanged = false;
+ return val;
+}
diff --git a/libs/phylogeny/threeStateModel.h b/libs/phylogeny/threeStateModel.h
new file mode 100644
index 0000000..dc238fb
--- /dev/null
+++ b/libs/phylogeny/threeStateModel.h
@@ -0,0 +1,131 @@
+#ifndef ___3STATE_MODEL
+#define ___3STATE_MODEL
+
+#include "definitions.h"
+#include "replacementModel.h"
+#include "fromQtoPt.h"
+#include "errorMsg.h"
+#include "matrixUtils.h"
+
+class threeStateModel : public replacementModel {
+public:
+ explicit threeStateModel(const MDOUBLE m1, const MDOUBLE m2,
+ const MDOUBLE m3, const MDOUBLE m4,const Vdouble &freq, bool useMarkovLimiting = true);
+ threeStateModel(const threeStateModel& other) {*this = other;}
+ virtual threeStateModel& operator=(const threeStateModel &other);
+ virtual threeStateModel* clone() const { return new threeStateModel(*this); }
+ virtual ~threeStateModel() {}
+ const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
+ const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const ;
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ if (d==0.0)
+ return _Q[i][j];
+ errorMsg::reportError("Error in threeStateModel, dPij_dt called");
+ return 0.0; // not supposed to be here
+ }
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ errorMsg::reportError("Error in threeStateModel, d2Pij_dt2 called");
+ return 0.0; // not supposed to be here
+ }
+ const MDOUBLE freq(const int i) const {
+ if (i >= _freq.size())
+ errorMsg::reportError("Error in threeStateModel::freq, i > size of frequency vector");
+ return _freq[i];
+ }
+ const Vdouble getFreqs() const {return _freq;}
+ void setFreq(const Vdouble &freq);
+ void setMu1(const MDOUBLE val) ;
+ void setMu2(const MDOUBLE val) ;
+ void setMu3(const MDOUBLE val) ;
+ void setMu4(const MDOUBLE val) ;
+ const MDOUBLE getMu1() const {return _gainState1;}
+ const MDOUBLE getMu2() const {return _gainState0;}
+ const MDOUBLE getMu3() const {return _lossState1;}
+ const MDOUBLE getMu4() const {return _lossState0;}
+ void computeMarkovLimitingDistribution(); // compute P(infinity), which specifies the stationary distribution
+
+private:
+ virtual void updateQ();
+ void setEpsilonForZeroParams();
+ bool checkIsNullModel();
+ bool pijt_is_prob_value(MDOUBLE val) const;
+ bool areFreqsValid(Vdouble freq) const; // tests if frequencies are valid (>0, sum=1)
+
+private:
+
+ MDOUBLE _gainState1; // _Q[0][2]
+ MDOUBLE _gainState0; // _Q[1][2]
+ MDOUBLE _lossState1; // _Q[2][0]
+ MDOUBLE _lossState0; // _Q[2][1]
+ VVdouble _Q;
+ Vdouble _freq;
+ bool _useMarkovLimiting; // should the markov limiting distribution be used to estimate the root frequencies
+ mutable bool _bQchanged; //indicates whether the Q matrix was changed after the last Pij_t call
+ mutable MDOUBLE _lastTcalculated;
+ mutable VVdoubleRep _lastPtCalculated;
+
+
+
+};
+
+/*class gainLossModel : public replacementModel {
+public:
+explicit gainLossModel(const MDOUBLE m1, const MDOUBLE m2, const Vdouble freq);
+virtual replacementModel* clone() const { return new gainLossModel(*this); }
+gainLossModel(const gainLossModel& other): _q2pt(NULL) {*this = other;}
+virtual gainLossModel& operator=(const gainLossModel &other);
+
+virtual ~gainLossModel() {if (_q2pt) delete _q2pt;}
+const int alphabetSize() const {return 3;} // two states and an intermediate (both states at once)
+const MDOUBLE err_allow_for_pijt_function() const {return 1e-4;} // same as q2p definitions
+const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+return _q2pt->Pij_t(i,j,d);
+}
+const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+return _q2pt->dPij_dt(i,j,d);
+}
+const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+return _q2pt->d2Pij_dt2(i,j,d);
+}
+const MDOUBLE freq(const int i) const {
+if (i >= _freq.size())
+errorMsg::reportError("Error in gainLossModel::freq, i > size of frequency vector");
+return _freq[i];
+}
+void setMu1(const MDOUBLE val, bool isReversible=true) { _gainState1 = val; updateQ(isReversible);}
+void setMu2(const MDOUBLE val,bool isReversible=true) { _gainState0 = val; updateQ(isReversible);}
+const MDOUBLE getMu1() const {return _gainState1;}
+const MDOUBLE getMu2() const {return _gainState0;}
+
+
+protected:
+virtual void updateQ(bool isReversible=true);
+virtual void normalizeQ();
+
+
+protected:
+Vdouble _freq;
+MDOUBLE _gainState1;
+MDOUBLE _gainState0;
+VVdouble _Q;
+q2pt *_q2pt;
+
+
+
+};
+*/
+/*
+Q is a matrix of the following form:
+
+0 1 01
+0 1-m1 0 m1
+1 0 1-m2 m2
+01 (filled in assuming reversibility)
+
+i.e. no direct change from state 0 to state 1 is allowed
+*/
+
+#endif // ___3STATE_MODEL
+
+
diff --git a/libs/phylogeny/tree.cpp b/libs/phylogeny/tree.cpp
new file mode 100644
index 0000000..fd7b357
--- /dev/null
+++ b/libs/phylogeny/tree.cpp
@@ -0,0 +1,1150 @@
+// $Id: tree.cpp 5806 2009-01-20 09:21:15Z adido $
+
+#include "definitions.h"
+#include "tree.h"
+#include "treeUtil.h"
+#include "logFile.h"
+#include "someUtil.h"
+#include <cassert>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <ctime>
+
+using namespace std;
+
+const MDOUBLE tree::FLAT_LENGTH_VALUE = 0.3f;
+const int tree::TREE_NULL = -1;
+const MDOUBLE tree::SHORT_LENGTH_VALUE = 0.000001f;
+
+
+//removeSon: remove pSon from sons list.
+//does not delete pSon
+void tree::TreeNode::removeSon(TreeNode* pSon) {
+ vector<nodeP>::iterator vec_iter = remove(_sons.begin(), _sons.end(), pSon);
+ _sons.erase(vec_iter,_sons.end()); // pg 1170, primer.
+}
+
+void tree::TreeNode::claimSons(){
+ for(int i=0;i<getNumberOfSons();i++) {
+ getSon(i)->setFather(this);
+ }
+}
+//*******************************************************************************
+// Constructors Destructors
+//*******************************************************************************
+tree::tree() {
+ _root=NULL;
+}
+
+// this function will accept "-" for cases where the input in from the standard input (cin)
+tree::tree(const string& treeFileName, vector<char>& isFixed) {
+ ifstream in;
+ istream* inPtr = &cin; // default
+ if (treeFileName != "-"){
+ in.open(treeFileName.c_str());
+ if (! in.is_open())
+ errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
+ inPtr = ∈
+ }
+ if (readPhylipTreeTopology(*inPtr,isFixed)) {
+ if (in.is_open())
+ in.close();
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+ }
+ if (in.is_open())
+ in.close();
+ errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
+}
+
+// this function will accept "-" for cases where the input in from the standard input (cin)
+tree::tree(const string& treeFileName) {
+ ifstream in;
+ istream* inPtr = &cin; // default
+ if (treeFileName != "-"){
+ in.open(treeFileName.c_str());
+ if (! in.is_open())
+ errorMsg::reportError(string("Error - unable to open tree file ")+treeFileName,1);
+ inPtr = ∈
+ }
+ if (readPhylipTreeTopology(*inPtr)) {
+ if (in.is_open())
+ in.close();
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+ }
+ if (in.is_open())
+ in.close();
+ errorMsg::reportError(string("Unable to read tree from the file ")+treeFileName,1);
+}
+
+tree::tree(istream &in) {
+ if (readPhylipTreeTopology(in)) {
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+ }
+ errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
+}
+
+tree::tree(istream &in,vector<char>& isFixed) {
+ if (readPhylipTreeTopology(in,isFixed)) {
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+ }
+ errorMsg::reportError("Unable to read phylip tree file",1);// also quit the program
+}
+
+tree::tree(const vector<char>& tree_contents) {
+ readPhylipTreeTopology(tree_contents);
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+}
+
+tree::tree(const vector<char>& tree_contents, vector<char>& isFixed) {
+ readPhylipTreeTopology(tree_contents,isFixed);
+ create_names_to_internal_nodes();
+ makeSureAllBranchesArePositive();
+ return;
+}
+
+tree::tree(const tree &otherTree) {
+ _root = NULL;
+ if (otherTree._root == NULL)
+ return; // if tree to copy is empty.
+ createRootNode();
+ _root->setName(otherTree._root->name());
+ _root->setID(otherTree._root->id());
+ _root->setComment(otherTree._root->getComment());
+ for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
+ recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
+ }
+}
+
+
+tree& tree::operator=(const tree &otherTree) {
+ if (this == &otherTree)
+ return *this;
+ if (otherTree._root == NULL) {
+ clear();
+ return *this; // if tree to copy is empty.
+ }
+ createRootNode();
+ _root->setName(otherTree._root->name());
+ _root->setComment(otherTree._root->getComment());
+ for (int i=0; i <otherTree._root->getNumberOfSons(); ++i) {
+ recursiveBuildTree( _root, otherTree.getRoot()->getSon(i));
+ }
+ return *this;
+}
+
+void tree::clear() {
+ vector<nodeP> vec;
+ getAllNodes(vec, _root);
+
+ for (int k=0; k < vec.size(); k++) {
+ delete(vec[k]);
+ }
+
+ _nodes = 0;
+ _leaves =0;
+ _root = NULL;
+
+}
+
+//*******************************************************************************
+// questions on the tree topology
+//*******************************************************************************
+
+//stores the father and sons of node inNodeP in vNeighbourVector
+void tree::getNeigboursOfNode(vector<nodeP> &vNeighbourVector, const nodeP inNodeP) const {
+ vNeighbourVector.clear();
+ for (int i=0; i < inNodeP->getNumberOfSons();++i) {
+ vNeighbourVector.push_back(inNodeP->getSon(i));
+ }
+ if (getRoot() != inNodeP)
+ vNeighbourVector.push_back(inNodeP->father());
+}
+
+
+// get nodePTR from name
+// "myNode" is a pointer to the root of the subtree in which we want to find the node "inName"
+tree::nodeP tree::findNodeByName(const string inName, nodeP myNode) const{
+ if (myNode==NULL) myNode=_root;
+ if (myNode->name() == inName) return myNode;
+ for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
+ nodeP answer = findNodeByName(inName, myNode->getSon(i));
+ if (answer!=NULL) return answer;
+ }
+ return NULL;
+}
+
+
+// get nodePTR from id
+// similar to tree::findNodeByName
+// "myNode" is a pointer to the root of the subtree in which we want to find the node "inId"
+tree::nodeP tree::findNodeById(const int inId, nodeP myNode) const{
+ if (myNode==NULL) myNode=_root;
+ if (myNode->id() == inId) return myNode;
+ for (int i=0 ; i < myNode->getNumberOfSons(); i++ ) {
+ nodeP answer = findNodeById(inId, myNode->getSon(i));
+ if (answer!=NULL) return answer;
+ }
+ return NULL;
+}
+
+//getPathBetweenAnyTwoNodes: store all nodes on the path from node1 to node2 in path
+//the first node in path is node1. the last node is node2
+//1. store all nodes from node1 to the root and node2 to the root
+//2. starting from the root - finds the first node (common_father) which is father to both node1 and node2
+//3. store in <path> all nodes in the path from node1 to common_father, from node2 to common_father and common_father itself
+void tree::getPathBetweenAnyTwoNodes(vector<nodeP> &path, const nodeP node1, const nodeP node2) const {
+
+ path.clear();
+ vector<nodeP> pathMatrix1;
+ vector<nodeP> pathMatrix2;
+
+ nodeP nodeup = node1;
+ while (nodeup != _root) {
+ pathMatrix1.push_back(nodeup);
+ nodeup = nodeup->father();
+ }
+ pathMatrix1.push_back(_root);
+
+ nodeup = node2;
+ while (nodeup != _root) {
+ pathMatrix2.push_back(nodeup);
+ nodeup = nodeup->father();
+ }
+ pathMatrix2.push_back(_root);
+
+ int tmp1 = pathMatrix1.size()-1;
+ int tmp2 = pathMatrix2.size()-1;
+
+ while ((tmp1 >= 0) && (tmp2 >= 0)) {
+ if (pathMatrix1[tmp1] != pathMatrix2[tmp2])
+ break;
+ tmp1--;
+ tmp2--;
+ }
+
+ for (int y=0; y <= tmp1; ++y)
+ path.push_back(pathMatrix1[y]);
+ path.push_back(pathMatrix1[tmp1+1]); // pushing once, the TreeNode that was common father to both.
+ for (int j=tmp2; j >= 0; --j) {
+ path.push_back(pathMatrix2[j]);
+ }
+ return;
+}
+
+
+void tree::getFromLeavesToRoot(vector<nodeP> &vNeighbourVector) const {
+ getFromRootToLeaves(vNeighbourVector);
+ reverse(vNeighbourVector.begin(),vNeighbourVector.end());
+}
+
+
+void tree::getFromRootToLeaves(vector<nodeP> &vec) const {
+ getFromNodeToLeaves(vec,_root);
+}
+
+
+void tree::getFromNodeToLeaves(vector<nodeP> &vec, const nodeP fromHereDown) const {
+ vec.push_back(fromHereDown);
+ for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
+ getFromNodeToLeaves(vec, fromHereDown->getSon(k));
+ }
+ return;
+}
+
+
+void tree::getAllHTUs(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ vec.clear();
+ getAllHTUsPrivate(vec,fromHereDown);
+}
+
+
+void tree::getAllHTUsPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ if (fromHereDown == NULL) return;
+ if (fromHereDown->isInternal()) vec.push_back(fromHereDown);
+ for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
+ getAllHTUsPrivate(vec,fromHereDown->getSon(k));
+ }
+ return;
+}
+
+
+void tree::getAllNodes(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ vec.clear();
+ getAllNodesPrivate(vec,fromHereDown);
+}
+
+
+void tree::getAllNodesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ //DFS: depth first search
+ if (fromHereDown == NULL)
+ return;
+ vec.push_back(fromHereDown);
+ for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
+ getAllNodesPrivate(vec,fromHereDown->getSon(k));
+ }
+ return;
+}
+
+
+void tree::getAllLeaves(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ vec.clear();
+ getAllLeavesPrivate(vec,fromHereDown);
+}
+
+
+void tree::getAllLeavesPrivate(vector<nodeP> &vec, const nodeP fromHereDown ) const {
+ if (fromHereDown == NULL) return;
+ if (fromHereDown->isLeaf()) vec.push_back(fromHereDown);
+ for (int k=0; k < fromHereDown->getNumberOfSons(); k++) {
+ getAllLeavesPrivate(vec,fromHereDown->getSon(k));
+ }
+ return;
+}
+
+MDOUBLE tree::findLengthBetweenAnyTwoNodes(const nodeP node1, const nodeP node2) const {
+ vector<nodeP> pathMatrix;
+ MDOUBLE sumOfDistances =0;
+ getPathBetweenAnyTwoNodes(pathMatrix, node1, node2);
+ for (int i=0; i < pathMatrix.size() ; i++) {
+ // two cases: first, the previous node is closer to the root
+ // than the current one. NOTE: this can not be the case for the
+ // first node in the path
+ if (i>0 && pathMatrix[i]->father() == pathMatrix[i-1])
+ sumOfDistances += pathMatrix[i]->dis2father();
+ else
+ // else: the next node is closer to the root than this node
+ // again, it can not be the last node in the path
+ if (i<pathMatrix.size()-1 && pathMatrix[i]->father() == pathMatrix[i+1])
+ sumOfDistances += pathMatrix[i]->dis2father();
+ // if both cases are false, then the current node is the
+ // closest to the root over the path, and therefor the
+ // distance to its father is not in the path at all.
+ }
+ return sumOfDistances;
+}
+
+// simular to above, but for all nodes at once. O(n^3) or so, but this should not be an issue
+// in any reasonable scenario
+// only disTab[i][j] is filled. disTab[j][i] remains zero.
+void tree::getTreeDistanceTableAndNames(VVdouble& disTab, vector <string>& vNames) const {
+ vector<nodeP> nodepV;
+ getAllLeaves(nodepV, _root);
+ disTab.resize(nodepV.size());
+ vNames.resize(nodepV.size());
+ for (int i=0;i<nodepV.size();++i) {
+ disTab[i].resize(nodepV.size());
+ vNames[i]=nodepV[i]->name();
+ for(int j=i+1;j<nodepV.size();++j){
+ disTab[i][j]=findLengthBetweenAnyTwoNodes(nodepV[i],nodepV[j]);
+ }
+ }
+}
+
+
+// find length between two neighbouring nodes only
+MDOUBLE tree::lengthBetweenNodes(const nodeP i, const nodeP j) const {
+ if (i->father() == j)
+ return i->dis2father();
+ assert (j->father() == i);
+ return j->dis2father();
+}
+
+//*******************************************************************************
+// change tree topoplogy parameters - should be applied carefully
+//*******************************************************************************
+
+//set the new root at p_iNewRoot
+// The method doesn't convert an "unrooted tree" = "a tree in which the root has 3 sons"
+// to a rooted one = "a tree in which the root has <= 2 sons".
+// The new root will still have 3 sons.
+void tree::rootAt(const nodeP p_iNewRoot) {
+ if (_root == p_iNewRoot)
+ return;
+ vector<nodeP> pathMatrix;
+ getPathBetweenAnyTwoNodes(pathMatrix, _root, p_iNewRoot);
+ //pathMatrix size is always bigger than 2.
+
+ for (int i = 0; i < pathMatrix.size() - 1 ; i++) {
+ pathMatrix[i]->_father = pathMatrix[i+1];
+ pathMatrix[i]->setDisToFather( pathMatrix[i+1]->dis2father() );
+ pathMatrix[i]->removeSon(pathMatrix[i+1]);
+ pathMatrix[i+1]->_sons.push_back(pathMatrix[i+1]->father());
+ pathMatrix[i+1]->_father = NULL;
+ }
+ _root = p_iNewRoot;
+}
+
+
+void tree::makeSureAllBranchesArePositive() {
+ vector<nodeP> _nodevec;
+ getAllNodes(_nodevec,_root);
+ for (int i=0; i < _nodevec.size(); ++i) {
+ if (_nodevec[i]!=_root) {
+ if (_nodevec[i]->dis2father()<=0) {
+ _nodevec[i]->setDisToFather(tree::SHORT_LENGTH_VALUE);
+ }
+ }
+ }
+}
+void tree::makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon) {
+ vector<nodeP> _nodevec;
+ getAllNodes(_nodevec,_root);
+ for (int i=0; i < _nodevec.size(); ++i) {
+ if (_nodevec[i]!=_root) {
+ if (_nodevec[i]->dis2father()<epsilon) {
+ LOGnOUT(4,<<" @@@ Warning: brachLength too short:"<<endl
+ <<" - the node: "<<_nodevec[i]->name()<<", length: "<<_nodevec[i]->dis2father()<<" is changed to: "<<epsilon<<endl);
+ _nodevec[i]->setDisToFather(epsilon);
+ }
+ }
+ }
+}
+
+//create new names to all internal nodes.
+//the new name will be NXX, where XX is htu number
+void tree::create_names_to_internal_nodes() {
+ vector<nodeP> htuVec;
+ getAllHTUs(htuVec,_root);
+
+ for (int i=0; i<htuVec.size(); ++i) {
+ string name = int2string(i+1);
+ htuVec[i]->setName((string)"N" + name);
+ }
+}
+
+
+void tree::multipleAllBranchesByFactor(MDOUBLE InFactor) {
+ vector<nodeP> vec;
+ getAllNodes(vec,_root );
+ for (int i = 0; i < vec.size(); ++i) {
+ if (vec[i]->father() != NULL)
+ vec[i]->setDisToFather(vec[i]->dis2father() * InFactor);
+ }
+ _root->setDisToFather(TREE_NULL);
+}
+
+
+void tree::createFlatLengthMatrix(const MDOUBLE newFlatDistance) {
+ vector<nodeP> vec;
+ getAllNodes(vec,_root );
+ for (int i=0; i< vec.size(); ++i) {
+ if (vec[i]->father() != NULL) vec[i]->setDisToFather(newFlatDistance);
+ }
+}
+
+/*
+void tree::set_length_to_father(nodeP iSon, MDOUBLE dLength) {
+ iSon->setDisToFather(dLength);
+}
+*/
+
+// helper function
+class eqNameVLOCAL {
+ public:
+ explicit eqNameVLOCAL(const string& x) : _x(x) {}
+ const string& _x;
+ bool operator() (const tree::nodeP y){
+ return _x == y->name();
+ }
+};
+
+// removes sonNode from its father according to the name of sonNode
+// this function should ONLY be used when the node, sonNode, is to be recycled soon!
+// because this function does not change the number of leaves nor the number of nodes!
+// nor does it change the father of sonNode.
+void tree::removeNodeFromSonListOfItsFather(nodeP sonNode) {
+ vector<tree::nodeP>::iterator vec_iter;
+ vec_iter = remove_if(sonNode->_father->_sons.begin(), sonNode->_father->_sons.end(), eqNameVLOCAL(sonNode->name()));
+ sonNode->father()->_sons.erase(vec_iter,sonNode->father()->_sons.end()); // pg 1170, primer.
+}
+
+
+//*******************************************************************************
+// Input-Output
+//*******************************************************************************
+
+
+void tree::output(string treeOutFile, TREEformats fmt, bool withHTU ) const {
+ ofstream os(treeOutFile.c_str());
+ output(os, fmt, withHTU);
+ os.close();
+}
+
+void tree::output(ostream& os, TREEformats fmt, bool withHTU) const {
+ if (_root == NULL) {
+ LOG(1,<<" empty tree ");
+ return;
+ }
+ if (fmt == PHYLIP)
+ outputInPhylipTreeFormat(os, withHTU);
+ else if (fmt == PAML)
+ outputInPamlTreeFormat(os, withHTU);
+ else if (fmt == ANCESTOR)
+ outputInAncestorTreeFormat(os,withHTU);
+ else if (fmt == ANCESTORID)
+ outputInAncestorIdTreeFormat(os,withHTU);
+ os<<endl;
+ //this returns the ostream properies to its previos ones (it was changed to ios::fixed in function outputInPhylipTreeFormat())
+ os<<setiosflags(ios::scientific);
+}
+
+void tree::outputInAncestorTreeFormat(ostream& treeOutStream, bool distances) const{
+ time_t ltime;
+ int i,k,spaces;
+ vector<nodeP> vec;
+ int maxNameLen = 0;
+
+ getAllLeaves(vec,_root);
+ for (int w=0; w<vec.size();++w) {
+ if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
+ }
+ maxNameLen++; // this is just the longest name of taxa plus one
+
+
+
+ time( <ime );
+ treeOutStream<<"# created on "<< ctime( <ime ) ;
+
+ treeOutStream<<"name";
+ spaces = maxNameLen-4;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ treeOutStream<<" parent";
+ spaces = 7-6;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (distances) {
+ treeOutStream<<"disance to father";
+ treeOutStream<<" ";
+ }
+
+ treeOutStream<<" child";
+ spaces = maxNameLen-4;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ treeOutStream<<endl;
+
+
+ for (i=0; i<vec.size();++i) {
+ treeOutStream<<vec[i]->name();
+ spaces = maxNameLen-vec[i]->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (vec[i] != _root) {
+ treeOutStream<<vec[i]->father()->name();
+ spaces = 7-vec[i]->father()->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+ else {
+ treeOutStream<<"root!";
+ spaces = 7-5;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+
+ if ((vec[i] != _root) && distances) {
+ treeOutStream<<vec[i]->dis2father();
+ }
+
+ for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
+ treeOutStream<<" "<<vec[i]->_sons[j]->name();
+ }
+ treeOutStream<<endl;
+ }
+
+ vec.clear();
+ getAllHTUs(vec,_root );
+
+ for (i=0; i<vec.size();++i) {
+ treeOutStream<<vec[i]->name();
+ spaces = maxNameLen-vec[i]->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (vec[i] != _root) {
+ treeOutStream<<vec[i]->father()->name();
+ spaces = 7-vec[i]->father()->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+ else {
+ treeOutStream<<"root!";
+ spaces = maxNameLen-5;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+
+ if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
+
+ for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
+ treeOutStream<<" "<<vec[i]->_sons[j]->name();
+ }
+ treeOutStream<<endl;
+ }
+}
+
+void tree::outputInPhylipTreeFormat(ostream& os, bool withHTU ) const {
+ // special case of a tree with 1 or 2 taxa.
+ if (getLeavesNum() == 1) {
+ os<<"("<<_root->name()<<")"<<endl;
+ return;
+ }
+ else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
+ os<<"("<<_root->name()<<":0.0";
+ if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
+ os<<",";
+ os<<_root->getSon(0)->name()<<":" <<setiosflags(ios::fixed) <<_root->getSon(0)->dis2father();
+ if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
+ os <<")"<<endl;
+ return;
+ }
+ // ========================================
+ os<<"(";
+ // going over all the son
+ int i;
+ for (i=0; i<_root->getNumberOfSons()-1; ++i)
+ {
+ print_from(_root->getSon(i),os, withHTU);
+ os<<",";
+ }
+
+ print_from(_root->getSon(i),os, withHTU);
+ os<<")";
+ if (withHTU==true) os<<_root->name();
+ if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
+ char c=';';// 59 is dot-line
+ os<<c;
+}
+
+//this format is like phylip format except first line is the number of leaves in the tree and the number of trees (1)
+void tree::outputInPamlTreeFormat(ostream& os, bool withHTU ) const {
+ // special case of a tree with 1 or 2 taxa.
+ if (getLeavesNum() == 1) {
+ os<<"("<<_root->name()<<")"<<endl;
+ return;
+ }
+ else if ((getLeavesNum() == 2) && (_root->getNumberOfSons()==1)) { // very special case of a root with one son.
+ os<<"("<<_root->name()<<":0.0";
+ if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
+ os<<",";
+ os<<_root->getSon(0)->name()<<":" <<setiosflags(ios::fixed) <<_root->getSon(0)->dis2father();
+ if (_root->getSon(0)->getComment().length()) os << "[&&NHX" << _root->getSon(0)->getComment() <<"]";
+ os <<")"<<endl;
+ return;
+ }
+ // ========================================
+ vector<nodeP> vec;
+ getAllLeaves(vec, _root);
+ int num = vec.size();
+ os<<num<<" 1"<<endl;
+ os<<"(";
+ // going over all the son
+ int i;
+ for (i=0; i<_root->getNumberOfSons()-1; ++i)
+ {
+ print_from(_root->getSon(i),os, withHTU);
+ os<<",";
+ }
+
+ print_from(_root->getSon(i),os, withHTU);
+ os<<")";
+ if (withHTU==true) os<<_root->name();
+ if (_root->getComment().length()) os << "[&&NHX" << _root->getComment() <<"]";
+ char c=';';// 59 is dot-line
+ os<<c;
+}
+
+
+int tree::print_from(nodeP from_node, ostream& os, bool withHTU ) const {
+ int i;
+ if (from_node->isLeaf())
+ os<<from_node->name();
+ else {
+ os<<"(";
+ for (i=0; i<from_node->getNumberOfSons()-1; ++i) {
+ print_from(from_node->getSon(i),os,withHTU);
+ os<<",";
+ }
+ print_from(from_node->getSon(i),os,withHTU);
+ os<<")";
+ if (withHTU==true)
+ os<<from_node->name();
+ }
+ os<<":"<<setiosflags(ios::fixed) <<from_node->dis2father();
+ if (from_node->getComment().length()) os << "[&&NHX" << from_node->getComment() <<"]";
+
+ return 0;
+}
+
+
+bool tree::readPhylipTreeTopology(istream &in) {
+ const vector<char> tree_contents = PutTreeFileIntoVector(in);
+ return readPhylipTreeTopology(tree_contents);
+}
+
+bool tree::readPhylipTreeTopology(istream &in,vector<char>& isFixed) {
+ const vector<char> tree_contents = PutTreeFileIntoVector(in);
+ return readPhylipTreeTopology(tree_contents,isFixed);
+}
+
+
+
+bool tree::readPhylipTreeTopology(const vector<char>& tree_contents) {
+ vector<char> isFixed;
+ return readPhylipTreeTopology(tree_contents,isFixed);
+}
+
+string getName(vector<char>::const_iterator& p_itCurrent) {
+ string tmpname;
+ tmpname.erase();
+ while (((*p_itCurrent)!=')') &&
+ ((*p_itCurrent)!='(') &&
+ ((*p_itCurrent)!=':') &&
+ ((*p_itCurrent)!=',') &&
+ ((*p_itCurrent)!='}') &&
+ ((*p_itCurrent)!='{')) {
+ tmpname +=(*p_itCurrent);
+ ++p_itCurrent;
+ }
+ return tmpname;
+}
+
+bool tree::readPhylipTreeTopology(const vector<char>& tree_contents,vector<char>& isFixed) {
+
+
+ int nextFreeID =0; // to give id's for nodes.
+ _leaves = GetNumberOfLeaves(tree_contents);
+ _root = new TreeNode(nextFreeID);
+ if (_leaves == 1) {// very special case of a tree that is only 1 leaf...
+ vector<char>::const_iterator itCurrent = tree_contents.begin();
+ itCurrent++;
+ _root->setName(getName(itCurrent));
+ return true;
+ }
+
+ ++nextFreeID;
+ _nodes = GetNumberOfInternalNodes(tree_contents) + _leaves;
+
+ isFixed.resize(_nodes,0); // 0 = not fixed, 1 = fixed.
+ nodeP conection2part=NULL;
+ vector<char>::const_iterator itCurrent = tree_contents.begin();
+
+ if (verifyChar(itCurrent,OPENING_BRACE)||verifyChar(itCurrent,OPENING_BRACE2)){
+ do {
+ itCurrent++;
+ conection2part = readPart(itCurrent,nextFreeID,isFixed);
+ // readPart returns a pointer to himself
+ _root->_sons.push_back(conection2part);
+ conection2part->_father = _root;
+
+ } while (verifyChar(itCurrent, COMMA));
+ }
+ if (!(verifyChar(itCurrent, CLOSING_BRACE)||verifyChar(itCurrent, CLOSING_BRACE2))) {
+ errorMsg::reportError("Bad format in tree file.",1); // also quit
+ } else itCurrent++; // skip closing brace
+ _root->setComment(readPosibleComment(itCurrent));
+ if (verifyChar(itCurrent, SEMI_COLLON)) itCurrent++;
+ // this part is for the cases where all the edges are fixed. In such case - this part changes
+ // all the branches to not fixed.
+ int z=0;
+ bool allFixed = true;
+ for (z=1; z< isFixed.size(); ++z) {
+ if (isFixed[z] == 0) {
+ allFixed = false;
+ break;
+ }
+ }
+ if (allFixed) {
+ for (z=1; z< isFixed.size(); ++z) {
+ isFixed[z] = 0;
+ }
+ }
+
+
+ return true;
+}
+
+
+
+// isFixed is actually a bool vector. Sometimes we want to fix a subtree of the tree, for example
+// "human and chimp" so we won't try any topologies that interrupt with this constraint.
+// When isFixed[i] == 1, it means that the branch above node i is fixed. This happens for every leaf,
+// and for nodes indicated by CLOSING_BRACE2 which is '}'.
+tree::nodeP tree::readPart( vector<char>::const_iterator& p_itCurrent,
+ int& nextFreeID,
+ vector<char> & isFixed) {
+ if ( IsAtomicPart(p_itCurrent) ) {
+ // read the name, i.e. - the content from the file
+ nodeP newLeaf = new TreeNode(nextFreeID);
+ isFixed[nextFreeID] = 1; // all edges to the leaves are fixed...
+ ++nextFreeID;
+
+ string tmpname = getName(p_itCurrent);
+ newLeaf->setName(tmpname);
+
+ // if a number(==distance) exists on the right-hand, update the distance table
+ if ( DistanceExists(p_itCurrent) )
+ newLeaf->setDisToFather(getDistance(p_itCurrent));
+ // clearPosibleComment(p_itCurrent);
+ newLeaf->setComment(readPosibleComment(p_itCurrent));
+ return newLeaf;
+
+ }
+ else // this is a complex part
+ {
+ nodeP newHTU = new TreeNode(nextFreeID);
+ ++nextFreeID;
+ nodeP conection2part=NULL;
+
+ do {
+ ++p_itCurrent;
+ conection2part = readPart(p_itCurrent,nextFreeID,isFixed);
+ conection2part->_father = newHTU;
+ newHTU->_sons.push_back(conection2part);
+ } while (verifyChar(p_itCurrent, COMMA));
+ if (verifyChar(p_itCurrent, CLOSING_BRACE)) {
+ isFixed[newHTU->id()] = 1;
+ } else if (verifyChar(p_itCurrent, CLOSING_BRACE2)) {
+ isFixed[newHTU->id()] = 0;
+ } else {
+ errorMsg::reportError("Bad format in tree file (2)");
+ }
+ ++p_itCurrent;
+
+ // if a number(==distance) exists on the right-hand, update the distance table
+ if ( DistanceExists(p_itCurrent) )
+ newHTU->setDisToFather(getDistance(p_itCurrent));
+ // clearPosibleComment(p_itCurrent);
+ newHTU->setComment(readPosibleComment(p_itCurrent));
+ return newHTU;
+
+ }
+}
+
+//copy the information from other_nodePTR to a new node, and set the father to father_nodePTR
+//does not update the number of nodes and leaves
+tree::nodeP tree::recursiveBuildTree(tree::nodeP father_nodePTR, const tree::nodeP other_nodePTR) {
+
+ tree::nodeP childPTR = createNode(father_nodePTR, other_nodePTR->id());
+ childPTR->setName(other_nodePTR->name());
+ childPTR->setComment(other_nodePTR->getComment());
+ childPTR->setDisToFather(other_nodePTR->dis2father());
+ for (int k = 0 ; k < other_nodePTR->getNumberOfSons() ; ++k) {
+ recursiveBuildTree(childPTR, other_nodePTR->getSon(k));
+ }
+ return childPTR;
+}
+
+
+
+void tree::updateNumberofNodesANDleaves() {
+ vector<nodeP> vec;
+ getAllLeaves(vec,getRoot());
+ _leaves = vec.size();
+ vec.clear();
+ getAllNodes(vec,getRoot());
+ _nodes = vec.size();
+}
+
+//removeLeaf: removes nodePTR from tree. also deletes nodePTR
+void tree::removeLeaf(nodeP nodePTR) {
+ if (!(nodePTR->isLeaf())) {
+ errorMsg::reportError("Error in function deleteLeaf - Unable to remove a node, which is not a leaf ");
+ }
+
+ if (getNodesNum() == 1) {
+ delete getRoot();
+ _root = NULL;
+ }
+
+ if (nodePTR->isRoot()) {
+ assert (nodePTR->getNumberOfSons() == 1);
+ nodeP sonOfRoot = nodePTR->getSon(0);
+ rootAt(sonOfRoot);
+ }
+
+ // leaf is not the root:
+ nodeP fatheOfLeafToRemove = nodePTR->father();
+ fatheOfLeafToRemove->removeSon(nodePTR);
+ delete nodePTR;
+
+ int tmpSons = fatheOfLeafToRemove->getNumberOfSons();
+ if (tmpSons == 1)
+ shrinkNode(fatheOfLeafToRemove);
+ else if ((_root == fatheOfLeafToRemove) && (tmpSons == 2)) {
+ nodeP tmp = _root;
+ rootAt(_root->getSon(0));
+ shrinkNode(tmp);
+ }
+ if (_root->isLeaf() && _root->getNumberOfSons() >0 )
+ rootAt(_root->getSon(0));
+ updateNumberofNodesANDleaves();
+ return;
+}
+
+
+//getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i]
+void tree::getAllBranches(vector<nodeP> &nodesUp, vector<nodeP> & nodesDown){
+ vector<nodeP> localVec;
+ getAllNodes(localVec, _root);
+ for (int i=0 ; i < localVec.size() ; i++) {
+ if (localVec[i]->father() != NULL) {
+ nodesUp.push_back(localVec[i]->father());
+ nodesDown.push_back(localVec[i]);
+ }
+ }
+ return;
+}
+
+
+
+
+
+// the idea is that if we have a node with only one son (a tree like: node1---node2---node3)
+// we can eliminate node2 (which is nodePTR)
+void tree::shrinkNode(nodeP nodePTR) {
+
+ if (nodePTR->getNumberOfSons() != 1) {
+ vector<string> err;
+ err.push_back("you requested to eliminate a node with more than 1 sons.");
+ err.push_back(" error in function shrink node");
+ errorMsg::reportError(err); // also quit the program.
+ }
+
+
+ nodeP fatherNode = nodePTR->father();
+ nodeP sonNode = nodePTR->getSon(0);
+
+ if( (nodePTR->isRoot())&&(nodePTR->getNumberOfSons() == 1) ) // refering the root to be sonNode.
+ {
+ MDOUBLE dis2root = sonNode->dis2father();
+ sonNode->setFather(NULL);
+ delete(_root);
+ _root = sonNode;
+
+ for (int i=0; i < sonNode->getNumberOfSons(); ++i)
+ {
+ MDOUBLE oldDis2Father = sonNode->getSon(i)->dis2father();
+ sonNode->getSon(i)->setDisToFather(oldDis2Father + dis2root);
+ }
+
+ _root->setDisToFather(TREE_NULL);
+
+ updateNumberofNodesANDleaves();
+ return;
+ }
+
+ // taking care of the son node:
+ sonNode->_father = fatherNode;
+ sonNode->setDisToFather(sonNode->dis2father() + nodePTR->dis2father());//if it is the root dont add the distance
+
+ // takind car of father node
+ fatherNode->removeSon(nodePTR);
+ fatherNode->_sons.push_back(sonNode);
+
+ // delete the nodePTR
+ delete nodePTR;
+ updateNumberofNodesANDleaves();
+}
+
+
+//createRootNode: erase the current tree and create a tree with one node.
+void tree::createRootNode() {
+ clear();
+ _root = new TreeNode(0);
+ _leaves=1;
+ _nodes=1;
+}
+
+
+tree::nodeP tree::createNode(nodeP fatherNode, const int id) {
+ nodeP tmp = new TreeNode(id);
+ _nodes++;
+ if (!fatherNode->isLeaf()) {
+ // if fatherNode is a leaf then we remove one leaf and add one leaf, so no change.
+ ++_leaves;
+ }
+ // there is one case when your father IS a leaf and yet you have to increase the number of leaves
+ // this is when you father is the root, and you add the first child
+ if (fatherNode->isRoot() && fatherNode->getNumberOfSons()==0) {
+ ++_leaves;
+ }
+ tmp->_father = fatherNode;
+ fatherNode->setSon(tmp);
+ return tmp;
+}
+
+// check whether the tree contains information about branch length
+bool tree::withBranchLength() const{
+ if (_root->_sons.empty()) return false;
+ else if (_root->getSon(0)->dis2father() != TREE_NULL) return true;
+ return false;
+}
+
+ostream &operator<<(ostream &out, const tree &tr){
+ tr.output(out,tree::ANCESTOR);
+ return out;
+}
+
+/*
+void tree::fillNodesID() {
+ vector<nodeP> vec;
+ getAllNodes(vec,_root );
+ for (int i=0; i< vec.size(); ++i) {
+ vec[i]->setID( i);
+ }
+}
+*/
+
+
+
+/*
+void tree::cut_tree_in_two_leaving_interMediate_node(nodeP node2split,tree &small1,tree &small2) const {
+ tree tmpCopyOfThisTree = (*this);
+ nodeP node2splitOnNewTree = tmpCopyOfThisTree.getNodeByName(node2split->name());
+ string interNode = "interNode";
+ assert(node2split->father() != NULL);
+ nodeP tmp = tmpCopyOfThisTree.makeNodeBetweenTwoNodes(node2splitOnNewTree->father(),node2splitOnNewTree, interNode);
+ tmpCopyOfThisTree.rootAt(tmp);
+ tmpCopyOfThisTree.cut_tree_in_two_special(tmp, small1,small2);
+ nodeP toDel1 = small1.getNodeByName(interNode);
+};
+*/
+
+
+void tree::outputInAncestorIdTreeFormat(
+ ostream& treeOutStream, bool distances) const{
+ time_t ltime;
+ int i,k,spaces;
+ vector<nodeP> vec;
+ int maxNameLen = 0;
+
+ getAllLeaves(vec,_root);
+ for (int w=0; w<vec.size();++w) {
+ if (maxNameLen<vec[w]->name().size()) maxNameLen = vec[w]->name().size();
+ }
+ maxNameLen++; // this is just the longest name of taxa plus one
+ maxNameLen+=5; // MN
+
+
+ time( <ime );
+ treeOutStream<<"# created on "<< ctime( <ime ) ;
+
+ treeOutStream<<"name";
+ spaces = maxNameLen-4;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ treeOutStream<<"father";
+ spaces = 7-6;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (distances) {
+ treeOutStream<<"disance to father";
+ treeOutStream<<" ";
+ }
+
+ treeOutStream<<" sons";
+ spaces = maxNameLen-4;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ treeOutStream<<endl;
+
+
+ for (i=0; i<vec.size();++i) {
+ treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
+ int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
+ spaces = maxNameLen-vec[i]->name().size()-len;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (vec[i] != _root) {
+ treeOutStream<<vec[i]->father()->name();
+ spaces = 7-vec[i]->father()->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+ else {
+ treeOutStream<<"root!";
+ spaces = 7-5;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+
+ if ((vec[i] != _root) && distances) {
+ treeOutStream<<vec[i]->dis2father();
+ }
+ //else treeOutStream<<" ";
+
+ for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
+ treeOutStream<<" "<<vec[i]->_sons[j]->name();
+ }
+ treeOutStream<<endl;
+ }
+
+ vec.clear();
+ getAllHTUs(vec,_root );
+
+ for (i=0; i<vec.size();++i) {
+ treeOutStream<<vec[i]->name()<<"("<<vec[i]->id()<<")";
+ int len=3; if (vec[i]->id()>=10) len++;if (vec[i]->id()>=100) len++;
+ spaces = maxNameLen-vec[i]->name().size()-len;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+
+ if (vec[i] != _root) {
+ treeOutStream<<vec[i]->father()->name();
+ spaces = 7-vec[i]->father()->name().size();
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+ else {
+ treeOutStream<<"root!";
+ spaces = maxNameLen-5;
+ for (k=0;k<spaces;++k) treeOutStream<<" ";
+ }
+
+ if (vec[i] != _root && distances) treeOutStream<<vec[i]->dis2father();
+
+ for (int j=0; j < vec[i]->getNumberOfSons(); j++) {
+ treeOutStream<<" "<<vec[i]->_sons[j]->name();
+ }
+ treeOutStream<<endl;
+ }
+}
+
+//1. remove one of the root's sons. this node is called "toRemove"
+//2. attach the sons of toRemove to the root.
+//toRemove must have 2 sons so that the the root will have 3 sons.
+//3. change the distToFather of the root's other son to be the sum of the distances of the root and its two sons
+//in practice: this func erase the root and makes toRemove the new root
+void tree::rootToUnrootedTree() {
+ if (getRoot()->getNumberOfSons() > 2) return; // tree is already unrooted!
+ if (getLeavesNum() <= 2) return; // Cannot be unrooted if the tree has less than 3 leaves.
+
+ if (getRoot()->getSon(0)->getNumberOfSons() == 0) {
+ tree::nodeP toRemove = getRoot()->getSon(1);
+ getRoot()->getSon(0)->setDisToFather(getRoot()->getSon(1)->dis2father() + getRoot()->getSon(0)->dis2father());
+ getRoot()->setSon(toRemove->getSon(0));
+ for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
+ getRoot()->setSon(toRemove->getSon(k));
+ }
+ delete toRemove;
+ getRoot()->removeSon(getRoot()->getSon(1));
+ getRoot()->claimSons();
+ }
+ else {
+ tree::nodeP toRemove = getRoot()->getSon(0);
+ getRoot()->getSon(1)->setDisToFather(getRoot()->getSon(0)->dis2father() + getRoot()->getSon(1)->dis2father());
+ getRoot()->setSon(toRemove->getSon(0));
+ for (int k = 1; k < toRemove->getNumberOfSons(); ++k) {
+ getRoot()->setSon(toRemove->getSon(k));
+ }
+ delete toRemove;
+ getRoot()->removeSon(getRoot()->getSon(0));
+ getRoot()->claimSons();
+ }
+ updateNumberofNodesANDleaves();
+}
diff --git a/libs/phylogeny/tree.h b/libs/phylogeny/tree.h
new file mode 100644
index 0000000..fc3abf7
--- /dev/null
+++ b/libs/phylogeny/tree.h
@@ -0,0 +1,208 @@
+// $Id: tree.h 3996 2008-05-13 13:06:16Z adido $
+
+#ifndef ___TREE
+#define ___TREE
+
+#include "definitions.h"
+#include "readTree.h"
+#include "errorMsg.h"
+#include "logFile.h"
+
+
+//***********************************************************************************
+// class tree represents only the topology. It has no MSA and assumes no model of evolution.
+//***********************************************************************************
+
+
+class tree {
+public:
+ static const MDOUBLE FLAT_LENGTH_VALUE;// = 0.3;
+ static const int TREE_NULL;// = -1;
+ static const MDOUBLE SHORT_LENGTH_VALUE;// = 0.000001f;
+
+//---------------------------- TREE NODE ----------------------
+public:
+ class TreeNode {
+ public:
+ explicit TreeNode(const int id) :_sons(0),_father(NULL),_id(id),_name( (string)"" ),_dis2father(TREE_NULL),_comment((string)"") {}
+ const int id() const {return _id;}
+ const string name() const {return _name;}
+ const MDOUBLE dis2father() const {return _dis2father;}
+ TreeNode* father() {return _father;}
+ void setName(const string &inS) {_name = inS;}
+ void setID(const int inID) {_id = inID;}
+ void setDisToFather(const MDOUBLE dis) {_dis2father = dis;}
+ void setFather(TreeNode* tn){_father=tn;}
+ int getNumberOfSons() const {return _sons.size();}
+ TreeNode* getSon (int i) {return _sons[i];}
+ TreeNode* getLastSon () {return _sons.back();}
+ void removeLastSon() {_sons.pop_back();}
+ void removeSon(TreeNode* pSon);
+ //setSon: updates only the father pointer to the son!
+ void setSon(TreeNode* pSon) {_sons.push_back(pSon);}
+ void setSon(TreeNode* pSon, int i) {_sons[i]=pSon;} // this will overwrite previous pointer!
+ bool isRoot() const {return (_father == NULL);}
+ bool isLeaf() const {
+ return (
+ (getNumberOfSons() ==0) ||
+ (isRoot() && (getNumberOfSons() ==1))
+ ) ;
+ }
+ bool isInternal() const {return (!isLeaf());}
+ //claimSons: sets the _father pointer of all sons to (this)
+ //this function is used after setSon has been called without updating the son pointer.
+ void claimSons();
+ void removeAllSons() {_sons.clear();}
+ void copySons(TreeNode* other) {//copy the vector of nodeP only from one node to the other
+ _sons=other->_sons;
+ }
+ void setComment(string comment) {_comment = comment;
+ if (comment.length())
+ LOG(16,<<"comment for "<<_name<<" set to "<<comment<<endl );}
+ const string getComment(void) const {return _comment;}
+ private:
+ vector<TreeNode*> _sons;
+ TreeNode* _father;
+ int _id;
+ string _name;
+ MDOUBLE _dis2father;
+ string _comment;
+ friend class tree;
+ };
+//------------------------------------------------------------
+
+
+public:
+ //NEWICK is the standard format
+ //ANCESTOR/ANCESTORID are for debugging purposes: output a list of nodes one for each line.
+ //for each node print the name, dist2father and its sons. id are printed only in ANCESTORID.
+ //PAML is like Newick format but with extra line: #of leaves space and #of trees
+ typedef enum { PHYLIP, ANCESTOR, ANCESTORID, PAML } TREEformats;
+ typedef TreeNode* nodeP;
+
+public:
+//*******************************************************************************
+// constructors
+//*******************************************************************************
+ tree();
+ tree(const string& treeFileName);
+ tree(istream &treeFile);
+ tree(const vector<char>& tree_contents);
+
+ tree(const string& treeFileName,vector<char>& isFixed);
+ tree(const vector<char>& tree_contents, vector<char>& isFixed);
+ tree(istream &in, vector<char>& isFixed);
+
+ tree(const tree &otherTree);
+ tree& operator=(const tree &otherTree);
+
+ virtual ~tree() {clear();};
+
+//*******************************************************************************
+// questions on the tree topology
+//*******************************************************************************
+
+ nodeP getRoot() const {return _root;};
+ inline int getLeavesNum() const;
+ inline int getNodesNum() const;
+ inline int getInternalNodesNum() const;
+ //findNodeByName: searches the subtree of myNode for a node with a specified name.
+ //if myNode==NULL: the search starts from the root
+ nodeP findNodeByName(const string inName, nodeP myNode=NULL) const;
+ nodeP findNodeById(const int inId, nodeP myNode=NULL) const;
+ bool withBranchLength() const;
+ //getNeigboursOfNode: stores into neighbourVec the father and sons of myNode
+ void getNeigboursOfNode(vector<nodeP> &neighbourVec, const nodeP myNode) const;
+ void getTreeDistanceTableAndNames(VVdouble& disTab, vector <string>& vNames) const;
+ MDOUBLE findLengthBetweenAnyTwoNodes(const nodeP node1,const nodeP node2) const;
+ //lengthBetweenNodes: find length between two neighbouring nodes only
+ MDOUBLE lengthBetweenNodes(const nodeP i, const nodeP j) const;
+
+ void getPathBetweenAnyTwoNodes(vector<nodeP> &path,const nodeP node1, const nodeP node2) const;
+ void getFromLeavesToRoot(vector<nodeP> &vNeighbourVector) const;
+ void getFromRootToLeaves(vector<nodeP> &vec) const;
+ void getFromNodeToLeaves(vector<nodeP> &vec, const nodeP fromHereDown) const;
+
+ void getAllHTUs(vector<nodeP> &vec,const nodeP fromHereDown) const ;
+ void getAllNodes(vector<nodeP> &vec,const nodeP fromHereDown) const ;
+ void getAllLeaves(vector<nodeP> &vec,const nodeP fromHereDown) const;
+
+//*******************************************************************************
+// change tree topoplogy parameters - should be applied carefully
+//*******************************************************************************
+ //rootAt: sets newRoot as the root. updates the iterator order lists.
+ void rootAt(const nodeP newRoot);
+ void rootToUnrootedTree();
+ void multipleAllBranchesByFactor(const MDOUBLE InFactor);
+ void create_names_to_internal_nodes();
+ void makeSureAllBranchesArePositive();
+ void makeSureAllBranchesAreLargerThanEpsilon(MDOUBLE epsilon);
+
+ // removeNodeFromSonListOfItsFather:
+ // removes sonNode from its father according to the name of sonNode
+ // this function should ONLY be used when sonNode is to be recycled soon!
+ // because this function does not change the number of leaves nor the number of nodes!
+ // nor does it change the father of sonNode.
+ void removeNodeFromSonListOfItsFather(nodeP sonNode);
+
+ void shrinkNode(nodeP nodePTR);
+ //removeLeaf: removes nodePTR from tree. also deletes nodePTR
+ void removeLeaf(nodeP nodePTR);
+ //getAllBranches: returns two vectors such that nodesUp[i] is the father of nodesDown[i]
+ void getAllBranches(vector<nodeP> &nodesUP, vector<nodeP> & nodesDown);
+ //createRootNode: erase the current tree and create a tree with one node.
+ void createRootNode();
+ nodeP createNode(nodeP fatherNode, const int id);
+ void updateNumberofNodesANDleaves();
+
+// **********************************************************
+// initialization
+// **********************************************************
+
+ //createFlatLengthMatrix: sets the distance of all branches to newFlatDistance
+ void createFlatLengthMatrix(const MDOUBLE newFlatDistance = FLAT_LENGTH_VALUE);
+ //recursiveBuildTree: copy the information from other_nodePTR to a new node, and set the father to father_nodePTR
+ //used by treeUtil
+ nodeP recursiveBuildTree(tree::nodeP father_nodePTR,const tree::nodeP other_nodePTR);
+
+//*******************************************************************************
+// Input-Output
+//*******************************************************************************
+ void output(string treeOutFile, TREEformats fmt= PHYLIP,bool withHTU=false) const;
+ void output(ostream& os, TREEformats fmt= PHYLIP,bool withHTU=false) const;
+
+private:
+ void clear();
+
+ void outputInAncestorTreeFormat(ostream& treeOutStream, bool withDist = false) const;
+ void outputInPhylipTreeFormat(ostream& treeOutStream,bool withHTU=false) const;
+ void outputInAncestorIdTreeFormat(ostream& treeOutStream, bool withDist = false) const;
+ void outputInPamlTreeFormat(ostream& treeOutStream, bool withHTU = false) const;
+ int print_from(nodeP from_node, ostream& os, bool withHTU) const;
+ int print_from(nodeP from_node, ostream& os, bool withHTU);
+
+ bool readPhylipTreeTopology(istream& in,vector<char>& isFixed); //same as the constructor with file name
+ bool readPhylipTreeTopology(const vector<char>& tree_contents,vector<char>& isFixed);
+ bool readPhylipTreeTopology(istream& in); //same as the constructor with file name
+ bool readPhylipTreeTopology(const vector<char>& tree_contents);
+ nodeP readPart(vector<char>::const_iterator& p_itCurrent, int& nextFreeID, vector<char> & isFixed);
+
+ void getAllHTUsPrivate(vector<nodeP> &vec,nodeP fromHereDown) const ;
+ void getAllNodesPrivate(vector<nodeP> &vec,nodeP fromHereDown) const ;
+ void getAllLeavesPrivate(vector<nodeP> &vec,nodeP fromHereDown) const;
+
+
+protected:
+ TreeNode *_root;
+ int _leaves;
+ int _nodes;
+};
+
+inline int tree::getLeavesNum() const {return _leaves;}
+inline int tree::getNodesNum() const {return _nodes;}
+inline int tree::getInternalNodesNum() const {return getNodesNum() - getLeavesNum();}
+
+ostream &operator<<(ostream &out, const tree &tr);
+
+#endif
+
diff --git a/libs/phylogeny/treeInference.cpp b/libs/phylogeny/treeInference.cpp
new file mode 100644
index 0000000..8a7c53a
--- /dev/null
+++ b/libs/phylogeny/treeInference.cpp
@@ -0,0 +1,16 @@
+// $Id: treeInference.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "treeInference.h"
+#include "likeDist.h"
+#include "distanceTable.h"
+
+tree treeInference::computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc,
+ const tree * const constraintTreePtr, const vector<MDOUBLE> * const weights) {
+
+ likeDist ld( sp, 0.01);
+ VVdouble disTab;
+ vector<string> vNames;
+ giveDistanceTable(&ld,sc,disTab,vNames,weights);
+ NJalg nj1;
+ return (nj1.computeTree(disTab,vNames,constraintTreePtr));
+}
+
diff --git a/libs/phylogeny/treeInference.h b/libs/phylogeny/treeInference.h
new file mode 100644
index 0000000..4b8f27d
--- /dev/null
+++ b/libs/phylogeny/treeInference.h
@@ -0,0 +1,26 @@
+// $Id: treeInference.h 962 2006-11-07 15:13:34Z privmane $
+//
+
+// version 1.01
+// last modified 23 May 2005
+
+#ifndef ___TREE_INFERENCE
+#define ___TREE_INFERENCE
+
+#include "definitions.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "nj.h"
+#include <vector>
+using namespace std;
+
+class treeInference {
+public:
+ static tree computeNJtreeWithLikeDist(const stochasticProcess &sp, const sequenceContainer &sc,
+ const tree * const constraintTreePtr = NULL, const vector<MDOUBLE> * const weights = NULL);
+
+};
+#endif
+
+
diff --git a/libs/phylogeny/treeIt.cpp b/libs/phylogeny/treeIt.cpp
new file mode 100644
index 0000000..77aef26
--- /dev/null
+++ b/libs/phylogeny/treeIt.cpp
@@ -0,0 +1,6 @@
+// $Id: treeIt.cpp 962 2006-11-07 15:13:34Z privmane $
+
+#include "definitions.h"
+#include "treeIt.h"
+
+
diff --git a/libs/phylogeny/treeIt.h b/libs/phylogeny/treeIt.h
new file mode 100644
index 0000000..5c8b350
--- /dev/null
+++ b/libs/phylogeny/treeIt.h
@@ -0,0 +1,128 @@
+// $Id: treeIt.h 962 2006-11-07 15:13:34Z privmane $
+
+#ifndef ___TREE_IT
+#define ___TREE_IT
+#include "definitions.h"
+#include "errorMsg.h"
+#include "tree.h"
+
+
+class treeIterTopDown{
+public:
+ treeIterTopDown(tree& t) : _t(t) , _current(_t.getRoot()) {
+ _childCheck.push_back(0);
+ }
+ tree::nodeP first() {
+ _childCheck.clear();
+ _childCheck.push_back(0);
+ _current = _t.getRoot();
+ return _t.getRoot();
+ }
+ tree::nodeP next() {
+ if (_childCheck.empty()) return NULL;
+ if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
+ _current = _current->getSon(_childCheck[_childCheck.size()-1]);
+ _childCheck[_childCheck.size()-1]++;
+ _childCheck.push_back(0);
+ }
+ else {
+ _current = _current->father();
+ _childCheck.pop_back();
+ return next();
+ }
+ return _current;
+ }
+ tree::nodeP operator++(int) {return next();}
+ tree::nodeP operator++() {return next();}
+ tree::nodeP end(){ return NULL;}
+ tree::nodeP operator-> (){ return _current;}
+ tree::TreeNode& operator* (){return *_current;}
+ bool operator!= (tree::nodeP t) {return (t != this->_current);}
+private:
+ vector<int> _childCheck;
+ tree& _t;
+ tree::nodeP _current;
+};
+
+class treeIterTopDownConst{
+public:
+ treeIterTopDownConst(const tree& t) : _t(t) , _current(_t.getRoot()) {
+ _childCheck.push_back(0);
+ }
+ tree::nodeP first() {
+ _childCheck.clear();
+ _childCheck.push_back(0);
+ _current = _t.getRoot();
+ return _t.getRoot();
+ }
+ tree::nodeP next() {
+ if (_childCheck.empty()) return NULL;
+ if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
+ _current = _current->getSon(_childCheck[_childCheck.size()-1]);
+ _childCheck[_childCheck.size()-1]++;
+ _childCheck.push_back(0);
+ }
+ else {
+ _current = _current->father();
+ _childCheck.pop_back();
+ return next();
+ }
+ return _current;
+ }
+ tree::nodeP operator++(int) {return next();}
+ tree::nodeP operator++() {return next();}
+ tree::nodeP end(){ return NULL;}
+ tree::nodeP operator-> (){ return _current;}
+ tree::TreeNode& operator* (){return *_current;}
+ bool operator!= (tree::nodeP t) {return (t != this->_current);}
+private:
+ vector<int> _childCheck;
+ const tree& _t;
+ tree::nodeP _current;
+};
+
+class treeIterDownTopConst{
+public:
+ treeIterDownTopConst(const tree& t) : _t(t) , _current(_t.getRoot()) {
+ _childCheck.push_back(0);
+ }
+ const tree::nodeP first() {
+ _childCheck.clear();
+ _childCheck.push_back(0);
+ _current = _t.getRoot();
+ return next();
+ }
+ const tree::nodeP next() {
+ if (_childCheck[_childCheck.size()-1]>_current->getNumberOfSons()) {//checked
+ _current = _current->father();
+ if (!_current) return NULL;
+ _childCheck.pop_back();
+ _childCheck[_childCheck.size()-1]++;
+ return next();
+ }
+ else if (_childCheck[_childCheck.size()-1]<_current->getNumberOfSons()) {
+ _current = _current->getSon(_childCheck[_childCheck.size()-1]);
+ _childCheck.push_back(0);
+ return next();
+ }
+// else //if (_childCheck[_childCheck.size()-1]==_current->getNumberOfSons())
+// {
+ _childCheck[_childCheck.size()-1]++;
+ return _current;
+// }
+
+// return next();
+ }
+ const tree::nodeP operator++(int) {return next();}
+ const tree::nodeP operator++() {return next();}
+ const tree::nodeP end(){ return NULL;}
+ const tree::nodeP operator-> (){ return _current;}
+ const tree::TreeNode& operator* (){return *_current;}
+ bool operator!= (tree::nodeP t) {return (t != this->_current);}
+private:
+ vector<int> _childCheck;
+ const tree& _t;
+ tree::nodeP _current;
+};
+
+#endif
diff --git a/libs/phylogeny/treeUtil.cpp b/libs/phylogeny/treeUtil.cpp
new file mode 100644
index 0000000..027f756
--- /dev/null
+++ b/libs/phylogeny/treeUtil.cpp
@@ -0,0 +1,348 @@
+// $Id: treeUtil.cpp 6091 2009-04-20 08:31:23Z rubi $
+
+#include "definitions.h"
+#include "treeUtil.h"
+#include "treeIt.h"
+#include <fstream>
+#include <iostream>
+#include <cassert>
+#include <map>
+using namespace std;
+
+vector<tree> getStartingTreeVecFromFile(string fileName) {
+ vector<tree> vecT;
+ ifstream in;
+ istream* inPtr = &cin; // default
+ if (fileName != "-"){
+ in.open(fileName.c_str());
+ if (! in.is_open())
+ errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1);
+ inPtr = ∈
+ }
+
+ while (!inPtr->eof()) {
+ //inputf.eatwhite();// do not remove. Tal: 1.1.2003
+ vector<char> myTreeCharVec = PutTreeFileIntoVector(*inPtr);
+ if (myTreeCharVec.size() >0) {
+ tree t1(myTreeCharVec);
+ //LOGDO(5,t1.output(myLog::LogFile()));
+ vecT.push_back(t1);
+ }
+ }
+ if (in.is_open())
+ in.close();
+ return vecT;
+}
+
+void getStartingTreeVecFromFile(string fileName,
+ vector<tree>& vecT,
+ vector<char>& constraintsOfT0) {
+ ifstream in;
+ istream* inPtr = &cin; // default
+ if (fileName != "-"){
+ in.open(fileName.c_str());
+ if (! in.is_open())
+ errorMsg::reportError(string("Error - unable to open tree vector file ")+fileName,1);
+ inPtr = ∈
+ }
+ //inputf.eatwhite();
+ for (int i=0; !inPtr->eof() ; ++i) {
+// while (!inPtr->eof()) {
+ vector<char> myTreeCharVec = PutTreeFileIntoVector(*inPtr);
+ if (myTreeCharVec.size() >0) {
+ if (i==0) {
+ tree t1(myTreeCharVec,constraintsOfT0);
+ vecT.push_back(t1);
+ }
+ else {
+ tree t1(myTreeCharVec);
+ vecT.push_back(t1);
+ }
+
+ }
+ }
+ if (in.is_open())
+ in.close();
+}
+
+
+
+
+
+
+
+#include <algorithm>
+using namespace std;
+
+bool sameTreeTolopogy(tree t1, tree t2){
+ if (t1.getNodesNum() != t2.getNodesNum()) {
+ errorMsg::reportError("error in function same tree topology (1)");
+ }
+ tree::nodeP x = t2.getRoot();
+ while (x->getNumberOfSons() > 0) x= x->getSon(0);
+ t1.rootAt(t1.findNodeByName(x->name())->father()); // now they have the same root
+ t2.rootAt(t2.findNodeByName(x->name())->father()); // now they have the same root
+ map<int,string> names1;
+ treeIterDownTopConst tit1(t1);
+ for (tree::nodeP nodeM = tit1.first(); nodeM != tit1.end(); nodeM = tit1.next()) {
+ vector<string> nameOfChild;
+ for (int i=0; i < nodeM->getNumberOfSons();++i) {
+ nameOfChild.push_back(names1[nodeM->getSon(i)->id()]);
+ }
+ if (nodeM->getNumberOfSons()==0) nameOfChild.push_back(nodeM->name());
+ sort(nameOfChild.begin(),nameOfChild.end());
+ string res = "(";
+ for (int k=0; k < nameOfChild.size(); ++k) {
+ res += nameOfChild[k];
+ }
+ res += ")";
+ names1[nodeM->id()] = res;
+ }
+
+ map<int,string> names2;
+ treeIterDownTopConst tit2(t2);
+ for (tree::nodeP nodeM2 = tit2.first(); nodeM2 != tit2.end(); nodeM2 = tit2.next()) {
+ vector<string> nameOfChild;
+ for (int i=0; i < nodeM2->getNumberOfSons();++i) {
+ nameOfChild.push_back(names2[nodeM2->getSon(i)->id()]);
+ }
+ if (nodeM2->getNumberOfSons()==0) nameOfChild.push_back(nodeM2->name());
+ sort(nameOfChild.begin(),nameOfChild.end());
+ string res = "(";
+ for (int k=0; k < nameOfChild.size(); ++k) {
+ res += nameOfChild[k];
+ }
+ res += ")";
+ names2[nodeM2->id()] = res;
+ }
+ return names1[t1.getRoot()->id()] == names2[t2.getRoot()->id()];
+
+
+
+}
+
+// bigTree is passed by value and not by reference. Therefore, this method doens't change the original bigTree,
+// but allocates a new bigTree to be split.
+bool cutTreeToTwo(tree bigTree,
+ const string& nameOfNodeToCut,
+ tree &small1,
+ tree &small2){// cutting above the NodeToCut.
+ // we want to cut the tree in two.
+ // first step: we make a new node between the two nodes that have to be splited,
+ tree::nodeP node2splitOnNewTree = bigTree.findNodeByName(nameOfNodeToCut);
+ string interNode = "interNode";
+ if (node2splitOnNewTree->father() == NULL) return(false);
+ // assert(node2splitOnNewTree->father() != NULL);
+ tree::nodeP tmp = makeNodeBetweenTwoNodes(bigTree,node2splitOnNewTree->father(),node2splitOnNewTree, interNode);
+ bigTree.rootAt(tmp); // tmp is the interNode and it's now the root of the tree. Its sons are node2splitOnNewTree and its father.
+ cutTreeToTwoSpecial(bigTree,tmp, small1,small2);
+
+ if (small1.getNodesNum() < 5 || small2.getNodesNum() < 5) return (false);
+ LOGDO(15,small1.output(myLog::LogFile(),tree::ANCESTORID));
+ LOGDO(15,small2.output(myLog::LogFile(),tree::ANCESTORID));
+
+
+ tree::nodeP toDel1 = small1.findNodeByName(interNode);
+ small1.removeLeaf(toDel1);
+ tree::nodeP toDel2 = small2.findNodeByName(interNode);
+ small2.removeLeaf(toDel2);
+ // this part fix the ids.
+ treeIterTopDown tIt(small1);
+ int newId =0;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ mynode->setID(newId);
+ newId++;
+ }
+ treeIterTopDown tIt2(small2);
+ int newId2 =0;
+ for (tree::nodeP mynode2 = tIt2.first(); mynode2 != tIt2.end(); mynode2 = tIt2.next()) {
+ mynode2->setID(newId2);
+ newId2++;
+ }
+ return (true); // successes!
+
+};
+
+// pre-request:
+// the intermediateNode is the root.
+// and it has two sons.
+// resultT1PTR & resultT2PTR are empty trees (root=NULL);
+void cutTreeToTwoSpecial(const tree& source, tree::nodeP intermediateNode,
+ tree &resultT1PTR, tree &resultT2PTR) {
+ // make sure that you got two empty trees:
+ if (resultT1PTR.getRoot() != NULL)
+ errorMsg::reportError("got a non empty tree1 in function cutTreeToTwoSpecial");
+ else if (resultT2PTR.getRoot() != NULL)
+ errorMsg::reportError("got a non empty tree2 in function cutTreeToTwoSpecial");
+
+ // make sure the the intermediateNode is really an intermediate Node;
+ if ((intermediateNode->getNumberOfSons() !=2 ) || (source.getRoot() != intermediateNode)) {
+ errorMsg::reportError("intermediateNode in function cutTreeToTwoSpecial, is not a real intermediate node ");
+ }
+
+ resultT1PTR.createRootNode();
+ resultT1PTR.getRoot()->setName(intermediateNode->name());
+
+ resultT2PTR.createRootNode();
+ resultT2PTR.getRoot()->setName(intermediateNode->name());
+
+
+ resultT1PTR.recursiveBuildTree(resultT1PTR.getRoot(),intermediateNode->getSon(0));
+ resultT2PTR.recursiveBuildTree(resultT2PTR.getRoot(),intermediateNode->getSon(1));
+}
+
+
+
+
+
+//insert a new node between fatherNode and sonNode
+tree::nodeP makeNodeBetweenTwoNodes(tree& et,
+ tree::nodeP fatherNode,
+ tree::nodeP sonNode,
+ const string &interName){
+ //make sure that fatherNode is indeed the father and sonNode is the son (and not the opposite).
+ if (fatherNode->father() == sonNode) {
+ tree::nodeP tmp = fatherNode;
+ fatherNode = sonNode;
+ sonNode = tmp;
+ }
+ else if (sonNode->father() != fatherNode) {
+ errorMsg::reportError("Error in function 'cut_tree_in_two'. the two nodes are not neighbours ");
+ }
+
+ tree::nodeP theNewNodePTR = new tree::TreeNode(et.getNodesNum());
+
+ //fix the tree information for the new node.
+ theNewNodePTR->setName(interName);
+ MDOUBLE tmpLen = sonNode->dis2father() * 0.5;
+ theNewNodePTR->setDisToFather(tmpLen);
+ theNewNodePTR->setFather(fatherNode);
+ theNewNodePTR->setSon(sonNode);
+
+ //fix the tree information for the father node.
+ fatherNode->removeSon(sonNode);
+ fatherNode->setSon(theNewNodePTR);
+
+ //fix the tree information for the sonNode.
+ sonNode->setFather(theNewNodePTR);
+ sonNode->setDisToFather(tmpLen);
+ return theNewNodePTR;
+}
+
+vector<string> getSequencesNames(const tree& t){
+ vector<tree::nodeP> vleaves;
+ t.getAllLeaves(vleaves,t.getRoot());
+ vector<string> res;
+ vector<tree::nodeP>::const_iterator i = vleaves.begin();
+ for ( ; i<vleaves.end(); ++i) {
+ res.push_back((*i)->name());
+ }
+ return res;
+}
+
+tree starTree(const vector<string>& names) {
+ tree et;
+ et.createRootNode();
+ for (int k=0 ; k < names.size(); ++k) {
+ tree::nodeP tmpNode;
+ tmpNode = et.createNode(et.getRoot(),et.getNodesNum());
+ tmpNode->setDisToFather(tree::FLAT_LENGTH_VALUE);
+ tmpNode->setName(names[k]);
+ }
+ et.create_names_to_internal_nodes();
+ return et;
+}
+
+
+MDOUBLE getSumOfBranchLengths(const tree &t){
+ treeIterDownTopConst tIt(t);
+ MDOUBLE sum = 0;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (!mynode->isRoot()){
+ sum+=mynode->dis2father();
+ }
+ }
+ return sum;
+}
+
+MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode){
+ if(myNode->isRoot())
+ return 0.0;
+ else
+ return ( myNode->dis2father() + getDistanceFromNode2ROOT(myNode->father()) );
+}
+
+void fillAllNodesNames(Vstring& Vnames,const tree& tr){
+ vector<tree::nodeP> vAllNodes;
+ tr.getAllNodes(vAllNodes,tr.getRoot());
+ Vnames.resize(vAllNodes.size());
+ for (int i = 0; i<vAllNodes.size();++i)
+ Vnames[vAllNodes[i]->id()] = vAllNodes[i]->name();
+}
+
+void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to) {
+ printTreeWithValuesAsBP(out,tr.getRoot(), values,probs,from,to);
+ out<<"["<<values[tr.getRoot()->id()]<<"];";
+}
+
+void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to) {
+ int fatherNodeIndex,sonNodeIndex;
+ if (myNode->isLeaf()) {
+ out<< myNode->name();
+ if(probs){
+ for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){
+ for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){
+ if((from == fatherNodeIndex)&&(to == sonNodeIndex)){
+ out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<<myNode->dis2father();
+ }
+ }
+ }
+ }
+ return;
+ } else {
+ out <<"(";
+ for (int i=0;i<myNode->getNumberOfSons();++i) {
+ if (i>0) out <<",";
+ printTreeWithValuesAsBP(out, myNode->getSon(i), values,probs,from,to);
+ }
+ out <<")";
+ if (myNode->isRoot()==false) {
+ out<< myNode->name();
+ if(probs){
+ for(fatherNodeIndex = 0;fatherNodeIndex < (*probs)[myNode->id()].size();++fatherNodeIndex){
+ for(sonNodeIndex = 0;sonNodeIndex < (*probs)[myNode->id()][fatherNodeIndex].size();++sonNodeIndex){
+ if((from == fatherNodeIndex)&&(to == sonNodeIndex)){
+ out<<"_P_"<<(*probs)[myNode->id()][fatherNodeIndex][sonNodeIndex]<< ":"<<myNode->dis2father(); //< "["<<values[myNode->id()]<<"]";
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) {
+ printDataOnTreeAsBPValues(out,data, tr.getRoot());
+ out<<";";
+}
+
+void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) {
+ if (myNode->isLeaf()) {
+ out << myNode->name()<< ":"<<myNode->dis2father();
+ return;
+ } else {
+ out <<"(";
+ for (int i=0;i<myNode->getNumberOfSons();++i) {
+ if (i>0) out <<",";
+ printDataOnTreeAsBPValues(out,data,myNode->getSon(i));
+ }
+ out <<")";
+ out.precision(3);
+ out<<data[myNode->id()];
+ if (myNode->isRoot()==false) {
+ out<<":"<<myNode->dis2father();
+ }
+ }
+}
+
+
diff --git a/libs/phylogeny/treeUtil.h b/libs/phylogeny/treeUtil.h
new file mode 100644
index 0000000..5ab0153
--- /dev/null
+++ b/libs/phylogeny/treeUtil.h
@@ -0,0 +1,49 @@
+// $Id: treeUtil.h 6091 2009-04-20 08:31:23Z rubi $
+
+#ifndef ___TREE_UTIL
+#define ___TREE_UTIL
+#include "definitions.h"
+#include "tree.h"
+
+vector<tree> getStartingTreeVecFromFile(string fileName);
+
+tree starTree(const vector<string>& names);
+
+void getStartingTreeVecFromFile(string fileName,
+ vector<tree>& vecT,
+ vector<char>& constraintsOfT0);
+
+
+bool sameTreeTolopogy(tree t1, tree t2);
+
+bool cutTreeToTwo(tree bigTree,
+ const string& nameOfNodeToCut,
+ tree &small1,
+ tree &small2);
+
+tree::nodeP makeNodeBetweenTwoNodes( tree& et,
+ tree::nodeP nodePTR1,
+ tree::nodeP nodePTR2,
+ const string &interName);
+
+void cutTreeToTwoSpecial(const tree& source,
+ tree::nodeP intermediateNode,
+ tree &resultT1PTR,
+ tree &resultT2PTR);
+
+vector<string> getSequencesNames(const tree& t);
+
+MDOUBLE getSumOfBranchLengths(const tree &t);
+
+void printDataOnTreeAsBPValues(ostream &out, Vstring &data, tree &tr) ;
+void printDataOnTreeAsBPValues(ostream &out, Vstring &data, const tree::nodeP &myNode) ;
+
+MDOUBLE getDistanceFromNode2ROOT(const tree::nodeP &myNode);
+void fillAllNodesNames(Vstring& Vnames,const tree& tr);
+
+void printTreeWithValuesAsBP(ostream &out, const tree &tr, Vstring values, VVVdouble *probs, int from, int to);
+void printTreeWithValuesAsBP(ostream &out, const tree::nodeP &myNode, Vstring values, VVVdouble *probs, int from, int to);
+
+
+#endif
+
diff --git a/libs/phylogeny/trivialAccelerator.h b/libs/phylogeny/trivialAccelerator.h
new file mode 100644
index 0000000..a035288
--- /dev/null
+++ b/libs/phylogeny/trivialAccelerator.h
@@ -0,0 +1,32 @@
+// $Id: trivialAccelerator.h 1925 2007-04-04 16:40:22Z privmane $
+
+#ifndef ___TRIVIAL_ACCELERATOR
+#define ___TRIVIAL_ACCELERATOR
+
+#include "pijAccelerator.h"
+#include "replacementModel.h"
+
+class trivialAccelerator : public pijAccelerator {
+public:
+
+ explicit trivialAccelerator(const replacementModel* pb): _pb(pb->clone()) {};
+ trivialAccelerator(const trivialAccelerator& other):_pb(NULL){if (other._pb != NULL) _pb = other._pb->clone();}
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {return _pb->Pij_t(i,j,d);}
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{return _pb->dPij_dt(i,j,d);};
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{return _pb->d2Pij_dt2(i,j,d);};
+ const MDOUBLE freq(const int i) const{return _pb->freq(i);}
+ virtual pijAccelerator* clone() const { return new trivialAccelerator(*this);}
+ virtual ~trivialAccelerator() {delete _pb;}
+ virtual const int alphabetSize() const {return _pb->alphabetSize();}
+ virtual replacementModel* getReplacementModel() const {return (_pb);}
+
+private:
+ replacementModel* _pb;
+};
+
+#endif
+
+// There is no distribution in the trivial accelerator. Actually, it's just an interface
+// to the replacement Model and it doesn't accelerate anything.
+// Every method retruns exactly the replacementModel corresponding method result.
+
diff --git a/libs/phylogeny/unObservableData.cpp b/libs/phylogeny/unObservableData.cpp
new file mode 100644
index 0000000..c2e5bb4
--- /dev/null
+++ b/libs/phylogeny/unObservableData.cpp
@@ -0,0 +1,82 @@
+#include "unObservableData.h"
+#include "likelihoodComputation.h"
+#include "likelihoodComputationGL.h"
+#include <math.h>
+
+
+using namespace std;
+
+unObservableData::unObservableData(const sequenceContainer& sc,const stochasticProcess* sp ,const gainLossAlphabet alph, const int minNumOfOnes)
+{
+ _scZero.startZeroSequenceContainerGL(sc,alph, minNumOfOnes);
+ _LforMissingDataPerCat.resize(sp->categories());
+}
+
+unObservableData::unObservableData(const unObservableData& other) //const
+{
+ _scZero = other._scZero;
+ _pi = other._pi;
+ _logLforMissingData = other._logLforMissingData;
+ _LforMissingDataPerCat = other._LforMissingDataPerCat;
+}
+Vdouble* unObservableData::getpLforMissingDataPerCat(){return &_LforMissingDataPerCat;}
+Vdouble unObservableData::getLforMissingDataPerCat(){return _LforMissingDataPerCat;}
+MDOUBLE unObservableData::getlogLforMissingData(){return _logLforMissingData;}
+int unObservableData::getNumOfUnObservablePatterns(){return _scZero.seqLen();}
+
+
+//void unObservableData::setLforMissingData(const tree& _tr, const stochasticProcess* _sp){
+// _pi.fillPij(_tr,*_sp);
+//// NOTE: The "perCat" is out
+// _LforMissingDataPerCat = likelihoodComputation::getLofPosPerCat(0,_tr,_scZero,_pi,*_sp); // L * sp.ratesProb(i)
+// _logLforMissingData = 0;
+// for (int i=0; i < _sp->categories();++i) {
+// _logLforMissingData += _LforMissingDataPerCat[i];
+// }
+// _logLforMissingData = log(_logLforMissingData);
+//}
+
+/********************************************************************************************
+*********************************************************************************************/
+void unObservableData::setLforMissingData(const tree& tr, const stochasticProcess* sp){
+ _pi.fillPij(tr,*sp);
+ _logLforMissingData = 0;
+ for(int pos=0; pos<_scZero.seqLen(); ++pos){
+ _logLforMissingData += convert(likelihoodComputation::getLofPos(pos,tr,_scZero,_pi,*sp));
+ }
+ _logLforMissingData = log(_logLforMissingData);
+}
+/********************************************************************************************
+*********************************************************************************************/
+void unObservableData::setLforMissingData(const tree& tr, const vector<vector<stochasticProcess*> >& spVVec,
+ const distribution* distGain, const distribution* distLoss)
+{
+
+ _logLforMissingData = 0;
+ int numOfRateCategories = spVVec[0][0]->categories();
+ vector<computePijGam> pi_vec(numOfRateCategories);
+ vector<suffStatGlobalGam> ssc_vec(numOfRateCategories);
+ vector<computeUpAlg> cup_vec(numOfRateCategories);
+ likelihoodComputationGL::fillPijAndUp(tr,_scZero, spVVec,distGain,distLoss,pi_vec,ssc_vec,cup_vec);
+
+ for (int k=0; k < _scZero.seqLen(); ++k) {
+ MDOUBLE resGivenRate = 0.0;
+ MDOUBLE lnL = 0;
+ for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
+ lnL = log(likelihoodComputationGL::getProbOfPosUpIsFilledSelectionGam(k,//pos,
+ tr,//const tree&
+ _scZero,// sequenceContainer& sc,
+ spVVec, // only needed for sp.freq(let)
+ ssc_vec[rateIndex][k],//const computePijGam& ,
+ distGain, distLoss)); // distributions
+ resGivenRate += lnL * spVVec[0][0]->ratesProb(rateIndex);
+ }
+ _logLforMissingData += exp(resGivenRate);
+ }
+ _logLforMissingData = log(_logLforMissingData);
+ //for(int rateIndex=0 ; rateIndex<numOfRateCategories; ++rateIndex){
+ // _logLforMissingData += likelihoodComputationGL::getTreeLikelihoodFromUp2(tr,_scZero,spVVec,ssc_vec[rateIndex], distGain,distLoss,NULL)
+ // * spVVec[0][0]->ratesProb(rateIndex);
+ //}
+}
+
diff --git a/libs/phylogeny/unObservableData.h b/libs/phylogeny/unObservableData.h
new file mode 100644
index 0000000..8d8dc79
--- /dev/null
+++ b/libs/phylogeny/unObservableData.h
@@ -0,0 +1,45 @@
+#ifndef ___unObservableData___GL
+#define ___unObservableData___GL
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "gainLossAlphabet.h"
+#include "computePijComponent.h"
+
+/********************************************************************************************
+unObservableData
+*********************************************************************************************/
+class unObservableData{
+public:
+ explicit unObservableData(const sequenceContainer& sc,const stochasticProcess* sp ,const gainLossAlphabet alph, const int minNumOfOnes);
+ unObservableData(const unObservableData& other); //const
+ virtual ~unObservableData(){};
+ virtual unObservableData* clone() const {return new unObservableData(*this);}
+ Vdouble* getpLforMissingDataPerCat();
+ Vdouble getLforMissingDataPerCat();
+ MDOUBLE getlogLforMissingData();
+ int getNumOfUnObservablePatterns();
+ void setLforMissingData(const tree& _tr, const stochasticProcess* _sp);
+ //void setLforMissingData(const tree& _tr, const stochasticProcess* _sp);
+ void setLforMissingData(const tree& _tr, const vector<vector<stochasticProcess*> >& spVVec, const distribution * distGain, const distribution* distLoss);
+
+
+
+ //MDOUBLE getCorrectedLikelihood(MDOUBLE likePre){return }
+
+
+protected:
+//func
+
+protected:
+//members
+ sequenceContainer _scZero;
+ Vdouble _LforMissingDataPerCat; // used foreach rate category
+ MDOUBLE _logLforMissingData;
+ computePijGam _pi;
+};
+
+
+#endif
diff --git a/libs/phylogeny/uniDistribution.cpp b/libs/phylogeny/uniDistribution.cpp
new file mode 100644
index 0000000..0a0e212
--- /dev/null
+++ b/libs/phylogeny/uniDistribution.cpp
@@ -0,0 +1,11 @@
+// $Id: uniDistribution.cpp 2711 2007-11-19 14:49:54Z itaymay $
+
+#include "uniDistribution.h"
+#include "errorMsg.h"
+
+
+void uniDistribution::change_number_of_categories(int in_number_of_categories)
+{
+ if (in_number_of_categories != 1)
+ errorMsg::reportError("error in uniDistribution::change_number_of_categories() - number of categories is not 1");
+}
diff --git a/libs/phylogeny/uniDistribution.h b/libs/phylogeny/uniDistribution.h
new file mode 100644
index 0000000..2c32c5c
--- /dev/null
+++ b/libs/phylogeny/uniDistribution.h
@@ -0,0 +1,37 @@
+// $Id: uniDistribution.h 2812 2007-11-25 10:32:11Z itaymay $
+
+ // version 2.00
+// last modified 21 Mar 2004
+#ifndef ___UNIFORM_DIST
+#define ___UNIFORM_DIST
+
+#include "distribution.h"
+
+/***********************************************************
+ This represents a distribution of one line over the value 1:
+ |
+________|________
+ 1
+_globalRate represents the rate for two joint genes.
+************************************************************/
+
+class uniDistribution : public distribution {
+
+public:
+ uniDistribution() {_globalRate=1;}
+ virtual const int categories() const { return 1;}
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual const MDOUBLE rates(const int i) const { return _globalRate;};
+ virtual const MDOUBLE ratesProb(const int i) const { return 1.0;};
+ virtual distribution* clone() const { return new uniDistribution(*this); }
+ virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
+ virtual MDOUBLE getGlobalRate() const{return _globalRate;}
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const {
+ if (x<1.0) return 0.0; else return 1.0;
+ }
+
+ MDOUBLE _globalRate;
+};
+
+#endif
+
diff --git a/libs/phylogeny/uniformDistribution.cpp b/libs/phylogeny/uniformDistribution.cpp
new file mode 100644
index 0000000..990b48c
--- /dev/null
+++ b/libs/phylogeny/uniformDistribution.cpp
@@ -0,0 +1,64 @@
+// $Id: uniformDistribution.cpp 2712 2007-11-19 14:50:12Z itaymay $
+
+#include "uniformDistribution.h"
+
+
+uniformDistribution::uniformDistribution(const int numOfCategories, MDOUBLE lowerBound,
+ MDOUBLE upperBound) :distribution() {
+ _globalRate=1.0;
+ setUniformParameters(numOfCategories, lowerBound, upperBound);
+}
+
+
+//copy constructor
+uniformDistribution::uniformDistribution(const uniformDistribution& other) :
+ _rates(other._rates),
+ _ratesProb(other._ratesProb),
+ _globalRate(other._globalRate),
+ _interval(other._interval),
+ _upperBound(other._upperBound),
+ _lowerBound(other._lowerBound)
+{
+}
+
+
+
+void uniformDistribution::setUniformParameters(const int number_of_categories,
+ MDOUBLE lowerBound, MDOUBLE upperBound){
+ _upperBound = upperBound;
+ _lowerBound = lowerBound;
+
+ _interval = ((upperBound - lowerBound) / (number_of_categories+0.0));
+ _rates.clear();
+ _rates.resize(number_of_categories);
+ _ratesProb.erase(_ratesProb.begin(),_ratesProb.end());
+ _ratesProb.resize(number_of_categories, 1.0/number_of_categories);
+ //setting _rates[i] as the middle value of each category
+ for (int i = 0; i < number_of_categories; ++i) {
+ _rates[i] = _lowerBound + (_interval * (i + 0.5));
+ }
+}
+
+//returns the ith border between categories
+//getBorder(0) = _lowerBound, getBorder(categories()) = _upperBound
+MDOUBLE uniformDistribution::getBorder(int i) const {
+ return (i == categories()) ? _upperBound : (_rates[i] - (_interval/2));
+}
+
+const MDOUBLE uniformDistribution::getCumulativeProb(const MDOUBLE x) const
+{
+ if (x<_lowerBound)
+ return 0;
+ else if (x>= _upperBound)
+ return 1;
+ else
+ return ((x-_lowerBound) / (_upperBound - _lowerBound));
+}
+
+void uniformDistribution::change_number_of_categories(int in_number_of_categories)
+{
+ if (in_number_of_categories == categories())
+ return;
+ setUniformParameters(in_number_of_categories, _lowerBound, _upperBound);
+}
+
diff --git a/libs/phylogeny/uniformDistribution.h b/libs/phylogeny/uniformDistribution.h
new file mode 100644
index 0000000..59d7f7c
--- /dev/null
+++ b/libs/phylogeny/uniformDistribution.h
@@ -0,0 +1,66 @@
+// $Id: uniformDistribution.h 5807 2009-01-20 09:23:51Z adido $
+
+ // version 2.00
+// last modified 21 Mar 2004
+#ifndef ___FLAT_DIST
+#define ___FLAT_DIST
+
+/************************************************************
+This represents a uniform distribution of one column (rectangular distribution) between
+a (lower_bound) and b (upper_bound)
+
+ |---|
+________|___|_____
+ a b
+the distribution (or rather (a,b)) is divided into categories (portions of the distribution)
+, where _rates is a vector with the median value for each category. _ratesProb represents
+the probability of each category.
+_globalRate represents the rate for two joint genes.
+************************************************************/
+
+
+#include "definitions.h"
+#include "distribution.h"
+
+class uniformDistribution : public distribution {
+
+public:
+ explicit uniformDistribution(const int numOfCategories, MDOUBLE lowerBound,
+ MDOUBLE upperBound);
+ explicit uniformDistribution(){_globalRate=1.0;};
+ explicit uniformDistribution(const uniformDistribution& other);
+
+ virtual ~uniformDistribution() {};
+
+ const int categories() const {return _rates.size();}
+ virtual void change_number_of_categories(int in_number_of_categories);
+ virtual const MDOUBLE rates(const int i) const {return _rates[i]*_globalRate;}
+ virtual const MDOUBLE ratesProb(const int i) const {return _ratesProb[i];}
+ virtual distribution* clone() const { return new uniformDistribution(*this); }
+ virtual void setGlobalRate(const MDOUBLE x) {_globalRate = x;}
+ virtual MDOUBLE getGlobalRate() const {return _globalRate;}
+
+ virtual const MDOUBLE getCumulativeProb(const MDOUBLE x) const;
+ MDOUBLE getBorder(const int i) const ; //return the ith border. Note: _bonderi[0] = m_lowerLimit, _bondery[categories()] = m_upperLimit
+
+ void setUniformParameters(const int numOfCategories, MDOUBLE lowerBound, MDOUBLE upperBound);
+
+
+
+private:
+ Vdouble _rates;
+ Vdouble _ratesProb;
+ MDOUBLE _globalRate;
+
+ MDOUBLE _interval;
+ MDOUBLE _upperBound;
+ MDOUBLE _lowerBound;
+};
+
+
+#endif
+
+//TO DO:
+//1. change categories() to numOfCategories()
+
+
diff --git a/libs/phylogeny/ussrvModel.cpp b/libs/phylogeny/ussrvModel.cpp
new file mode 100644
index 0000000..01c2120
--- /dev/null
+++ b/libs/phylogeny/ussrvModel.cpp
@@ -0,0 +1,125 @@
+// $Id: ussrvModel.cpp 962 2006-11-07 15:13:34Z privmane $
+#include "ussrvModel.h"
+
+ussrvModel::ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f)
+: _f(f),_baseSp(NULL),_ssrvSp(NULL)
+{
+ _baseSp = new stochasticProcess(baseSp);
+ _ssrvSp = new stochasticProcessSSRV(ssrvSp);
+
+ // get alpha from sp
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel());
+ _alpha = static_cast<gammaDistribution*>(pMulRM->getDistribution())->getAlpha();
+
+ // check that alpha is equal the baseSp alpha
+ MDOUBLE baseSpAlpha = static_cast<gammaDistribution*>(baseSp.distr())->getAlpha();
+ if (_alpha != baseSpAlpha)
+ errorMsg::reportError("Error in the constructor of ussrvModel. alpha of the ssrv stochastic process is different from that of the base model");
+}
+
+ussrvModel::~ussrvModel()
+{
+ if (_baseSp) delete _baseSp;
+ if (_ssrvSp) delete _ssrvSp;
+}
+
+ussrvModel::ussrvModel(const ussrvModel& other)
+{
+ _f = other._f;
+ _baseSp = new stochasticProcess(*other._baseSp);
+ _ssrvSp = new stochasticProcessSSRV(*other._ssrvSp);
+}
+
+ussrvModel& ussrvModel::operator=(const ussrvModel& other)
+{
+ if (_baseSp) delete _baseSp;
+ if (_ssrvSp) delete _ssrvSp;
+
+ _f = other._f;
+ _alpha = other._alpha;
+
+ _baseSp = new stochasticProcess(*other._baseSp);
+ _ssrvSp = new stochasticProcessSSRV(*other._ssrvSp);
+
+ return *this;
+}
+
+void ussrvModel::updateAlpha(const MDOUBLE& alpha)
+{
+ _alpha = alpha;
+ if (alpha<0)
+ {
+ LOG(4, << "ussrvModel::updateAlpha , alpha is < 0 " << endl);
+ return;
+ }
+ // update alpha of the ssrv model
+ replacementModelSSRV* pMulRM = static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel());
+ gammaDistribution* gammaDist = static_cast<gammaDistribution*>(pMulRM->getDistribution());
+ gammaDist->setAlpha(alpha);
+ pMulRM->updateQ();
+
+ // update alpha of the base model
+ (static_cast<gammaDistribution*>(_baseSp->distr()))->setAlpha(alpha);
+}
+
+void ussrvModel::updateNu(const MDOUBLE& nu)
+{
+ if (nu<0)
+ {
+ LOG(4,<<"ussrvModel::updateNu , nu is < 0 " <<endl);
+ return;
+ }
+ static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel())->setRateOfRate(nu);
+}
+
+MDOUBLE ussrvModel::getNu() const
+{
+ return (static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel())->getRateOfRate());
+}
+
+void ussrvModel::updateF(const MDOUBLE& f)
+{
+ if ((f<0) || (f>1))
+ {
+ LOG(4,<<"ussrvModel::updateF , f must be between 0 to 1. f is: "<< f << endl);
+ return;
+ }
+ _f=f;
+}
+
+// In order for the branch lengths and the nu parameter to be meaningfull, one must normalize the
+// matrices of both the replacement models (the base model and the ssrv model)
+// so that f*Sigma[i](PiQij) + (1-f)*Sigma[i](P`iQ`ij) = 1 (for i!=j)
+// where Q and P belong to the ssrv model, P` and Q` belong to the base model. (Q` doesn't include the rates)
+// The normalization doesn't affect the likelihood.
+// see below for more explanations.
+// Theoretically, we should therefore calculate this weighted sumPijQij (Denote by x), and then:
+// 1) devide nu by x.
+// 2) devide all the rates (of the base model and of the ssrv model) by x.
+// (this could be done using the _globalRate member of the gammaDistribution class)
+// 3) multiply every branch length by x.
+// Instead, we just report x, so that the user can do all this whenever he wishes to.
+
+MDOUBLE ussrvModel::calcNormalizeFactor()
+{
+ // calculate sumPijQij
+ MDOUBLE sumPijQij = 0.0;
+ int i;
+ // of the base model
+ int baseAlphabetSize = _baseSp->alphabetSize();
+ for (i=0; i < baseAlphabetSize; ++i)
+ sumPijQij-= _baseSp->freq(i) * _baseSp->dPij_dt(i,i,0);
+ sumPijQij*=(1-_f);
+
+ // of the ssrv model
+ sumPijQij+=_f*static_cast<replacementModelSSRV*>(_ssrvSp->getPijAccelerator()->getReplacementModel())->sumPijQij();
+
+ return sumPijQij;
+}
+
+// This is not done when using normal sp (instead of ussrvModel), since:
+// average(rates)=1 -->
+// (for 2 categories, f=0.5, 1-f =0.5) 0.5*r1*Sigma[i](PiQij) + 0.5*r2*Sigma[i](PiQij) = 1 -->
+// (since (r1+r2)*0.5 = 1) Sigma[i](PiQij) = 1 . This is always true, and taken care of in the readMatrix
+// method.
+
diff --git a/libs/phylogeny/ussrvModel.h b/libs/phylogeny/ussrvModel.h
new file mode 100644
index 0000000..d94ecd8
--- /dev/null
+++ b/libs/phylogeny/ussrvModel.h
@@ -0,0 +1,41 @@
+// $Id: ussrvModel.h 962 2006-11-07 15:13:34Z privmane $
+#ifndef _USSRV_MODEL
+#define _USSRV_MODEL
+
+#include "stochasticProcessSSRV.h"
+#include "stochasticProcess.h"
+#include "errorMsg.h"
+#include "gammaDistribution.h"
+#include "replacementModelSSRV.h"
+#include "logFile.h"
+class ussrvModel
+{
+public:
+ explicit ussrvModel(){errorMsg::reportError("This constractor shold never be used");}
+ explicit ussrvModel(const stochasticProcess& baseSp, const stochasticProcessSSRV& ssrvSp, const MDOUBLE& f);
+ virtual ~ussrvModel();
+ explicit ussrvModel(const ussrvModel& other);
+ ussrvModel& operator=(const ussrvModel& other);
+ // const int alphabetSize() const ;
+ MDOUBLE getF() const {return _f;}
+ MDOUBLE getAlpha() const {return _alpha;}
+ MDOUBLE getNu() const ;
+ const stochasticProcessSSRV& getSSRVmodel() const {return *_ssrvSp;}
+ const stochasticProcess& getBaseModel() const {return *_baseSp;}
+ int noOfCategor() const {return _baseSp->categories();}
+ MDOUBLE getCategorProb(int i) const {return _baseSp->distr()->ratesProb(i);}
+
+ void updateF(const MDOUBLE& f);
+ void updateAlpha(const MDOUBLE& alpha);
+ void updateNu(const MDOUBLE& nu);
+
+ MDOUBLE calcNormalizeFactor(); // return the factor according to which the model should be normalized.
+
+private:
+ MDOUBLE _f; //probability of SSRV model. The probability of the base model, i.e. no SSRV, is 1-_f .
+ MDOUBLE _alpha; // should be always equal to the _baseSp alpha and the _ssrvSp alpha.
+ stochasticProcess* _baseSp; // for the base model
+ stochasticProcessSSRV* _ssrvSp; // for the SSRV model
+};
+
+#endif // _USSRV_MODEL
diff --git a/libs/phylogeny/wYangModel.cpp b/libs/phylogeny/wYangModel.cpp
new file mode 100644
index 0000000..2df3e81
--- /dev/null
+++ b/libs/phylogeny/wYangModel.cpp
@@ -0,0 +1,96 @@
+#include "wYangModel.h"
+#include "codon.h"
+#include "readDatMatrix.h" // for the normalizeQ function.
+
+wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlph):
+ _w(inW),_k(inK),_globalW(globalW),_coAlpha(NULL){
+ _coAlpha = (codon*)(coAlph->clone());
+ codonUtility::initSubMatrices(*_coAlpha);
+ homogenousFreq();
+ _Q.resize(alphabetSize());
+ for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
+ updateQ();
+}
+
+wYangModel::wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon * coAlph):
+ _w(inW),_k(inK),_globalW(globalW),_freq(freq),_coAlpha(NULL){
+ _coAlpha = (codon*)(coAlph->clone());
+ _Q.resize(alphabetSize());
+ codonUtility::initSubMatrices(*_coAlpha);
+ for (int z=0; z < _Q.size();++z) _Q[z].resize(alphabetSize(),0.0);
+ updateQ();
+}
+
+
+wYangModel& wYangModel::operator=(const wYangModel &other) {
+ _w = other._w;
+ _k = other._k;
+ _q2pt = other._q2pt;
+ _Q = other._Q;
+ _globalW = other._globalW;
+ _freq = other._freq;
+ if (_coAlpha) delete _coAlpha;
+ if (other._coAlpha)
+ _coAlpha = (codon*)(other._coAlpha->clone());
+ else
+ _coAlpha = NULL;
+ return *this;
+
+}
+
+
+
+void wYangModel::updateQ() {
+ int i,j;
+ MDOUBLE sum=0.0;
+ for (i=0; i < _Q.size();++i) {
+ for (j=i+1; j < _Q.size();++j) {
+ MDOUBLE val;
+ if (codonUtility::codonReplacement(i,j) == codonUtility::non_synonymous) {
+ if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k*_w;
+ else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = _w;
+ else val = 0;//more than one substitution.
+ }
+ else {//synonymous
+ if (codonUtility::codonDiff(i,j) == codonUtility::tr) val = _k;
+ else if (codonUtility::codonDiff(i,j) == codonUtility::tv) val = 1;
+ else val = 0;//more than one substitution.
+ }
+ _Q[i][j] = val * _freq[j];
+ _Q[j][i] = val * _freq[i];
+ }
+ _Q[i][i] = 0.0; //temporary value
+ }
+ // filling the diagonal
+ for (i=0; i < _Q.size(); ++i){
+ sum = 0.0;
+ for (j=0; j < _Q.size(); ++j) {
+ sum += _Q[i][j];
+ }
+ _Q[i][i] = -sum;
+ }
+ if (_globalW == true) // w is not distributed, only one Q matrix
+ normalizeQ(_Q,_freq);
+
+ _q2pt.fillFromRateMatrix(_freq,_Q);
+}
+
+
+void wYangModel::norm(MDOUBLE scale){
+ for (int i=0; i < _Q.size(); ++i) {
+ for (int j=0; j < _Q.size(); ++j) {
+ _Q[i][j] *=scale;
+
+ }
+ }
+ _q2pt.fillFromRateMatrix(_freq,_Q);
+}
+
+
+MDOUBLE wYangModel::sumPijQij(){
+ MDOUBLE sum=0.0;
+ for (int i=0; i < _Q.size(); ++i) {
+ sum -= (_Q[i][i])*_freq[i];
+ }
+ return sum;
+}
diff --git a/libs/phylogeny/wYangModel.h b/libs/phylogeny/wYangModel.h
new file mode 100644
index 0000000..77602bf
--- /dev/null
+++ b/libs/phylogeny/wYangModel.h
@@ -0,0 +1,59 @@
+#ifndef _W_YANG_MODEL
+#define _W_YANG_MODEL
+
+#include "replacementModel.h"
+#include "fromQtoPt.h"
+#include "codon.h"
+
+
+class wYangModel : public replacementModel {
+public:
+ explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK,bool globalW, codon * coAlpha);
+ explicit wYangModel(const MDOUBLE inW, const MDOUBLE inK, const Vdouble& freq,bool globalW, codon *coAlpha);
+ explicit wYangModel(const wYangModel &other): _coAlpha(NULL) {(*this) = other;}
+ virtual wYangModel& operator=(const wYangModel &other);
+ virtual wYangModel* clone() const { return new wYangModel(*this); }
+ virtual ~wYangModel() {
+ if (_coAlpha)
+ delete _coAlpha;
+ }
+
+ const int alphabetSize() const {return _freq.size();}
+ const MDOUBLE Pij_t(const int i,const int j, const MDOUBLE d) const {
+ return _q2pt.Pij_t(i,j,d);
+ }
+ const MDOUBLE dPij_dt(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.dPij_dt(i,j,d);
+ }
+ const MDOUBLE d2Pij_dt2(const int i,const int j, const MDOUBLE d) const{
+ return _q2pt.d2Pij_dt2(i,j,d);
+ }
+ const MDOUBLE freq(const int i) const {return _freq[i];};
+ void setK(const MDOUBLE newK) { _k = newK; updateQ();}
+ void setW(const MDOUBLE newW) { _w = newW;updateQ();}
+ void homogenousFreq(){ _freq.erase(_freq.begin(),_freq.end()),_freq.resize(alphabetSize(),1.0/alphabetSize());}
+
+ MDOUBLE getK() const {return _k;}
+ MDOUBLE getW() const {return _w;}
+
+ MDOUBLE getQij(const int i,const int j)const {return _Q[i][j];}
+ void setGlobalW(bool globalW){_globalW = globalW;}
+ void norm(MDOUBLE scale);
+ MDOUBLE sumPijQij();
+private:
+ void updateQ();
+
+
+private:
+
+ MDOUBLE _w; //selection factor.
+ MDOUBLE _k; // Tr/Tv ratio.
+ q2pt _q2pt;
+ VVdouble _Q;
+ bool _globalW; //false when compute w per site
+ Vdouble _freq;
+ codon *_coAlpha;
+};
+
+
+#endif
diff --git a/libs/phylogeny/wag.dat.q b/libs/phylogeny/wag.dat.q
new file mode 100644
index 0000000..ad798a3
--- /dev/null
+++ b/libs/phylogeny/wag.dat.q
@@ -0,0 +1,42 @@
+" "
+" 0.551571 "
+" 0.509848 0.635346 "
+" 0.738998 0.147304 5.429420 "
+" 1.027040 0.528191 0.265256 0.0302949 "
+" 0.908598 3.035500 1.543640 0.616783 0.0988179 "
+" 1.582850 0.439157 0.947198 6.174160 0.021352 5.469470 "
+" 1.416720 0.584665 1.125560 0.865584 0.306674 0.330052 0.567717 "
+" 0.316954 2.137150 3.956290 0.930676 0.248972 4.294110 0.570025 0.249410 "
+" 0.193335 0.186979 0.554236 0.039437 0.170135 0.113917 0.127395 0.0304501 0.138190 "
+" 0.397915 0.497671 0.131528 0.0848047 0.384287 0.869489 0.154263 0.0613037 0.499462 3.170970 "
+" 0.906265 5.351420 3.012010 0.479855 0.0740339 3.894900 2.584430 0.373558 0.890432 0.323832 0.257555 "
+" 0.893496 0.683162 0.198221 0.103754 0.390482 1.545260 0.315124 0.174100 0.404141 4.257460 4.854020 0.934276 "
+" 0.210494 0.102711 0.0961621 0.0467304 0.398020 0.0999208 0.0811339 0.049931 0.679371 1.059470 2.115170 0.088836 1.190630 "
+" 1.438550 0.679489 0.195081 0.423984 0.109404 0.933372 0.682355 0.243570 0.696198 0.0999288 0.415844 0.556896 0.171329 0.161444 "
+" 3.370790 1.224190 3.974230 1.071760 1.407660 1.028870 0.704939 1.341820 0.740169 0.319440 0.344739 0.967130 0.493905 0.545931 1.613280 "
+" 2.121110 0.554413 2.030060 0.374866 0.512984 0.857928 0.822765 0.225833 0.473307 1.458160 0.326622 1.386980 1.516120 0.171903 0.795384 4.378020 "
+" 0.113133 1.163920 0.0719167 0.129767 0.717070 0.215737 0.156557 0.336983 0.262569 0.212483 0.665309 0.137505 0.515706 1.529640 0.139405 0.523742 0.110864 "
+" 0.240735 0.381533 1.086000 0.325711 0.543833 0.227710 0.196303 0.103604 3.873440 0.420170 0.398618 0.133264 0.428437 6.454280 0.216046 0.786993 0.291148 2.485390 "
+" 2.006010 0.251849 0.196246 0.152335 1.002140 0.301281 0.588731 0.187247 0.118358 7.821300 1.800340 0.305434 2.058450 0.649892 0.314887 0.232739 1.388230 0.365369 0.314730 "
+" 0.0866279 0.043972 0.0390894 0.0570451 0.0193078 0.0367281 0.0580589 0.0832518 0.0244313 0.048466 0.086209 0.0620286 0.0195027 0.0384319 0.0457631 0.0695179 0.0610127 0.0143859 0.0352742 0.0708956 "
+" A R N D C Q E G H I L K M F P S T W Y V "
+" Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val "
+" "
+" Symmetrical part of the rate matrix and aa frequencies, "
+" estimated from 3905 globular protein amino acid sequences forming 182 "
+" protein families. "
+" The first part above indicates the symmetric 'exchangeability' "
+" parameters, where s_ij = s_ji. The s_ij above are not scaled, but the "
+" PAML package will perform this scaling. "
+" The second part gives the amino acid frequencies (pi_i) "
+" estimated from the 3905 sequences. The net replacement rate from i to "
+" j is Q_ij = s_ij*pi_j. "
+" Prepared by Simon Whelan and Nick Goldman, September 2000. "
+" Citation: "
+" Whelan, S. and N. Goldman. In press. A general empirical model of "
+" protein evolution derived from multiple protein families using "
+" a maximum likelihood approach. Molecular Biology and "
+" Evolution. "
+" See the following reference for notation used here: "
+" Yang, Z., R. Nielsen and M. Hasegawa. 1998. Models of amino acid substitution and "
+" applications to mitochondrial protein evolution. Mol. Biol. Evol. 15:1600-1611. "
diff --git a/manifests/trustyvm.pp b/manifests/trustyvm.pp
new file mode 100644
index 0000000..79a34eb
--- /dev/null
+++ b/manifests/trustyvm.pp
@@ -0,0 +1,11 @@
+package { "dh-make":
+ ensure => "installed"
+ }
+
+package { ["gcc", "build-essential", "pkg-config", "devscripts"]:
+ ensure => "installed"
+ }
+
+package {"language-pack-en":
+ ensure => "installed"
+ }
diff --git a/programs/Makefile.generic b/programs/Makefile.generic
new file mode 100644
index 0000000..27d6544
--- /dev/null
+++ b/programs/Makefile.generic
@@ -0,0 +1,244 @@
+# this looks better in -*- Makefile -*- mode
+# $Id: Makefile.generic 3917 2008-04-22 08:23:04Z cohenofi $
+
+DEBUGEXEC = $(EXEC:=.debug)
+
+
+#TEST_EXEC_SUB =
+TEST_EXEC = $(addprefix tests/,$(TEST_EXEC_SUB))
+
+ifdef LIBNAME
+ ifneq ($(LIBNAME),"")
+ LIB = lib$(LIBNAME).a
+ endif
+endif
+DEBUGLIB = $(LIB:.a=Debug.a)
+DOUBLEREPLIB = $(LIB:.a=DoubleRep.a)
+
+all: lib $(EXEC)
+
+#CC=g++
+CXX=g++
+CC=$(CXX)
+
+libDir=../../libs/phylogeny
+binDir=../../bin
+
+ifndef libEvol
+ libEvol=$(libDir)/libEvolTree.a
+ #libEvol=-lEvolTree
+ libEvolDebug=$(libDir)/libEvolTreeDebug.a
+ libEvolDoubleRep=$(libDir)/libEvolTreedoubleRep.a
+endif
+
+vpath % $(libDir)
+
+
+#CPPFLAGS+= -I/usr/include/g++-v3
+
+LDFLAGS += -L$(libDir)
+
+#LDLIBS = -lEvolTree
+#debug: LDLIBS = -lEvolTreeDebug
+# LOADLIBES = $(LIB)
+
+#LDFLAGS=
+#CPPFLAGS+= -DLOG -DLOGCLS -DMEMCHK
+
+
+#GENGETOPT=/cs/++/phd/ninio/gengetopt-2.11/src/gengetopt
+#GENGETOPT = /opt/local/bin/gengetopt
+#GENGETOPT = ~privmane/code/gengetopt
+GENGETOPT = gengetopt
+
+.SECONDARY: $(addsuffix _cmdline.c,$(EXEC)) $(addsuffix _cmdline.h,$(EXEC)) $(addsuffix .ggo,$(EXEC))
+
+CPPFLAGS= -O3 -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG -ftemplate-depth-32
+CPPFLAGSDEBUG= -g -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG -ftemplate-depth-32
+
+LDFLAGSDEBUG := $(LDFLAGS) -g
+# sources
+sources= $(Libsources) $(LibCsources) $(addsuffix .cpp,$(EXEC) $(TEST_EXEC))
+
+.PHONY: tests lib test debug %.debug DOUBLEREP doubleRep
+
+ifdef DOUBLEREP
+CPPFLAGS+= -DDOUBLEREP
+CPPFLAGSDEBUG += -DDOUBLEREP
+LDFLAGSDEBUG += -DDOUBLEREP
+endif
+
+test: all tests
+ +cd tests && make -k
+
+debug: $(DEBUGLIB) $(DEBUGEXEC)
+
+debug: CPPFLAGS = $(CPPFLAGSDEBUG)
+#debug: LDLIBS = -lEvolTreeDebug
+debug: LIB = $(DEBUGLIB)
+# debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -I$(libDir) -DLOG
+# debug: all
+
+
+
+
+#$(libEvol) le:
+# +cd $(libDir);make -f Makefile all
+
+#$(libEvolDebug):
+# +cd $(libDir);make -f Makefile debug
+
+lib: $(LIB)
+
+#lib$(LIBNAME).a: lib$(LIBNAME).a($(Libsources:.cpp=.o) $(LibCsources:.c=.o))
+lib$(LIBNAME).a: $(Libsources:.cpp=.o) $(LibCsources:.c=.o)
+ ar rv $@ $?
+ ranlib $@
+
+tags: *.cpp *.h
+ etags --members --language=c++ $^
+EVOLLIB=-lEvolTree
+libEvolDebug=-lEvolTreeDebug
+libEvolDoubleRep=-lEvolTreeDoubleRep
+
+debug: EVOLLIB=$(libEvolDebug)
+
+ifdef LIBNAME
+# LocalLib = -l$(LIBNAME)
+ LocalLib = lib$(LIBNAME).a
+endif
+
+#$(EXEC): LDLIBS += $(EVOLLIB)
+#$(EXEC) $(TEST_EXEC): $(LIB) #$(EVOLLIB)
+#$(EXEC) $(TEST_EXEC): $(LIB) $(EVOLLIB)
+$(EXEC) $(TEST_EXEC): $(LocalLib) $(libEvol)
+$(DEBUGEXEC) $(TEST_EXEC): $(DEBUGLIB) $(libEvolDebug)
+
+tests: $(TEST_EXEC) $(EXEC)
+
+-include make.dep
+
+install: $(addprefix $(binDir)/,$(EXEC))
+$(binDir)/%: %
+ cp $< $@
+
+
+
+clean:
+ -rm -f $(LIB) $(DEBUGLIB) $(DOUBLEREPLIB) $(EXEC) $(TEST_EXEC) $(DEBUGEXEC) $(DOUBLEREPEXEC) *.o
+
+
+ifneq ($(wildcard make.dep), make.dep)
+ make.dep: depend
+endif
+
+
+depend makedep: _make.dep
+ @mv -f _make.dep make.dep
+
+_make.dep: $(sources)
+ @echo making depend
+# $(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@ ; [ -s $@ ] || rm -f $@'
+# @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ > $@'
+ @$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $^ | sed "s/\(^[^.]*\)\.o/\1.o \1.debug.o/g" > $@'
+
+_fast:
+ +cd fast && make -k all
+
+fast.% _fast.%:
+ +cd fast && make -k $(*)
+
+$(libEvol):
+ +cd $(libDir)&&make -f Makefile all
+
+$(libEvolDebug):
+ +cd $(libDir)&&make -f Makefile debug
+
+define ggo_template
+ifeq ($(wildcard $(1).ggo), $(1).ggo)
+ $(1): $(1)_cmdline.o
+endif
+endef
+
+ $(foreach exec,$(EXEC),$(eval $(call ggo_template,$(exec))))
+
+#$(EXEC): $(addsuffix _cmdline.o,$(EXEC))
+
+define ggo_template_debug
+ $(1).debug: $(1)_cmdline.debug.o
+endef
+
+$(foreach exec,$(EXEC),$(eval $(call ggo_template_debug,$(exec))))
+
+define ggo_template_doublerep
+ifeq ($(wildcard $(1).ggo), $(1).ggo)
+ $(1).doubleRep: $(1)_cmdline.o
+endif
+endef
+
+ $(foreach exec,$(EXEC),$(eval $(call ggo_template_doublerep,$(exec))))
+
+#$(addsuffix .debug,$(EXEC)): $(addsuffix _cmdline.debug.o,$(EXEC))
+
+%.ggo: %.args $(libDir)/evolObjs.args
+ cat $^ > $@
+
+
+# commandline (gengetopts)
+%_cmdline.h %_cmdline.c: %.ggo
+ $(GENGETOPT) -i$< -F$(*)_cmdline
+
+
+debug: CPPFLAGS = $(CPPFLAGSDEBUG)
+debug: $(addsuffix .debug,$(EXEC))
+#$(addsuffix .debug,$(EXEC)): $(libEvolDebug)
+pl:
+ echo $(LIB)
+
+
+%.debug: CPPFLAGS = -g -Wall -Wno-sign-compare -I. -I../.. -DLOG -ftemplate-depth-25
+
+%.debug: %.o
+
+
+
+#debug: LDLIBS = -lEvolTreeDebug
+debug: LIB = $(DEBUGLIB)
+
+%.debug: CPPFLAGS = $(CPPFLAGSDEBUG)
+%.debug: LDFLAGS = $(LDFLAGSDEBUG)
+#%.debug: %
+# @echo "made \""$(*)"\" in debug mode"
+
+
+%.debug.o: %.c
+ $(CC) -c $(CPPFLAGSDEBUG) $(CFLAGS) $< -o $@
+
+%.debug.o: %.cpp
+ $(CXX) -c $(CPPFLAGSDEBUG) $(CXXFLAGS) $< -o $@
+
+#$(DEBUGLIB): $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
+
+lib$(LIBNAME)Debug.a: $(Libsources:.cpp=.debug.o) $(LibCsources:.c=.debug.o)
+ ar rv $@ $?
+ ranlib $@
+
+DOUBLEREPEXEC = $(EXEC:=.doubleRep)
+
+doubleRep: LOGREP=t
+doubleRep: CPPFLAGS+= -DLOGREP
+doubleRep: $(DOUBLEREPLIB) $(DOUBLEREPEXEC)
+# echo $@
+$(DOUBLEREPEXEC): $(DOUBLEREPLIB) $(libEvolDoubleRep)
+
+%.doubleRep.o: %.c
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+
+%.doubleRep.o: %.cpp
+ $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
+
+$(DOUBLEREPLIB): $(Libsources:.cpp=.doubleRep.o) $(LibCsources:.c=.doubleRep.o)
+ ar rv $@ $?
+ ranlib $@
+
+# DO NOT DELETE
diff --git a/programs/fastml/Makefile b/programs/fastml/Makefile
new file mode 100644
index 0000000..e2a35d0
--- /dev/null
+++ b/programs/fastml/Makefile
@@ -0,0 +1,18 @@
+#! /usr/local/bin/gmake
+# $Id: Makefile 1215 2006-11-28 15:53:23Z osnatz $
+
+# In order to compile with doubleRep run make like this: make doubleRep
+
+Libsources= fastml.cpp bbAlg.cpp bbComputeDownAlg.cpp bbComputeUpAlg.cpp bbEvaluateSpecificAV.cpp bbfindBestAVDynProg.cpp bbNodeOrderAlg.cpp bb_options.cpp bbReport.cpp computeMarginalReconstruction.cpp jointNoGamma.cpp mainbb.cpp sequenceDataDiff.cpp suffStatComponentJointNoGamma.cpp
+
+#Libsources=
+LIBNAME = fastml
+
+# LibCsources= cmdline.c
+# LibCsources += getopt.c getopt1.c
+
+EXEC = fastml
+
+
+
+include ../Makefile.generic
diff --git a/programs/fastml/bbAlg.cpp b/programs/fastml/bbAlg.cpp
new file mode 100644
index 0000000..b40d4d0
--- /dev/null
+++ b/programs/fastml/bbAlg.cpp
@@ -0,0 +1,258 @@
+#include "bbAlg.h"
+#include "computeUpAlg.h"
+#include "likelihoodComputation.h"
+#include "maseFormat.h"
+#include <cmath>
+
+bbAlg::bbAlg(const tree& et,
+ vector<stochasticProcess> &spVec,
+ const sequenceContainer& sc,
+ const bbAlg::boundMethod boundType,
+ const string& reportFileName,
+ const MDOUBLE computeAgainExactTreshold,
+ const distribution * forceDistr) :
+ _reportFileName(reportFileName),
+ BandBReportAllPos1(reportFileName,et.getInternalNodesNum()*spVec[0].alphabetSize()*sc.seqLen()),
+ _et(et), _spVec(spVec), _sc(sc)
+{
+ cout<<"in bbAlg"<<endl;
+ _boundMethod = boundType;
+ _alphabetSize=_spVec[0].alphabetSize();
+ _seqLen=_sc.seqLen();
+ if (_spVec.size()>1) {//w codon model + gamma special case
+ _cpij._V.resize(forceDistr->categories());
+ for (int i=0; i < _spVec.size(); ++i)
+ _cpij._V[i].fillPij(_et,_spVec[i]);
+ _spVec[0].setDistribution(forceDistr);//update the first process with gamma distr
+ //for all the functions that needs number catregor and categor probabilty
+ }
+ else{
+ cout<<"no codon model"<<endl;
+ _cpij.fillPij(_et,_spVec[0]);
+ }
+
+ _bbesavp1 = new bbEvaluateSpecificAV(_et,_spVec[0],_sc,_cpij);
+
+ _bbNodeOrderAlg1 = new bbNodeOrderAlg(_et,_spVec[0],_sc,_cpij,computeAgainExactTreshold);
+ cout<<"after bbNodeOrderAlg"<<endl;
+ _bbfindBestAVDynProg1 = new bbfindBestAVDynProg(&_et,&_spVec[0],_sc,&_cpij);
+ cout<<"after bbfindBestAVDynProg"<<endl;
+ sequence tmp(_sc.getAlphabet());
+ const int startingVal = -2;
+ tmp.resize(_seqLen,&startingVal);
+ cout<<"after resize"<<endl;
+ _internalSequences.resize(_et.getNodesNum(),tmp);
+ cout<<"after _internalSequences resize"<<endl;
+ _bestReconstruction.resize(_et.getNodesNum(),tmp);
+ cout<<"afetr _bestReconstruction resize"<<endl;
+
+}
+
+void bbAlg::outputTheJointProbAtEachSite(const string & outputFileProbJoint) {
+ ofstream jointProbOutput(outputFileProbJoint.c_str());
+ MDOUBLE totalLogLikelihood =0;
+ for (int j=0; j < _jointL.size(); ++j) {
+ totalLogLikelihood+=log(_jointL[j]);
+ jointProbOutput<<"Joint log likelihood of position "<<j+1;// j+1 so that positions start from 1, and not from 0.
+ jointProbOutput<<": "<<log(_jointL[j])<<endl;
+ }
+ jointProbOutput<<"total log likelihood of joint reconstruction: "<<totalLogLikelihood<<endl;
+ jointProbOutput.close();
+}
+
+MDOUBLE bbAlg::bbReconstructAllPositions(sequenceContainer& res){
+ cout<<"in bbAlg::bbReconstructAllPositions"<<endl;
+ MDOUBLE sumLogLikelihood=0;
+ computePijGam cpij;
+ cout<<"Gamma model. Branch and Bound.\nReconstructing position: ";
+ _jointL.clear();
+ for (int i=0 ; i < _seqLen ; ++i) {
+ fillProbOfPosition(i);
+ _bbReport = new BandBReport(_reportFileName,i,_spVec[0].alphabetSize());
+ MDOUBLE tmp = bbReconstructPositions(i);
+ _jointL.push_back(tmp);
+ assert(tmp>0);
+ sumLogLikelihood+=log(tmp);
+ if (_reportFileName!="") {
+ if (_bbReport->size()>20*_et.getInternalNodesNum()) {
+ _bbReport->makeReport();
+ } else if (_bbReport->size()<20*_et.getInternalNodesNum()) {
+ errorMsg::reportError("error in function bbReconstructAllPositions");
+ }
+ BandBReportAllPos1.totalNumberOfNodeVisited += _bbReport->size();
+ }
+ delete _bbReport;
+ }
+ res = fromAncestralSequenceToSeqData(); // returning the ancestral sequences
+ BandBReportAllPos1.printReport();
+ return sumLogLikelihood;
+}
+
+MDOUBLE bbAlg::bbReconstructPositions(const int pos){
+ _bestRecord=0;
+ return bbReconstructPositions(pos,1); // 1 - start the first node in the search tree.
+
+}
+
+MDOUBLE bbAlg::bbReconstructPositions(const int pos,
+ const int nodeNum) {
+ tree::nodeP node2check=NULL;
+ vector<int> charOrder;
+ doubleRep exactVal=0;
+ if (nodeNum == 1) {
+ _bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check,
+ charOrder,
+ _internalSequences,
+ pos,
+ true,
+ exactVal);
+ }
+ else {
+ _bbNodeOrderAlg1->getNextNodeAndCharOrder( node2check,
+ charOrder,
+ _internalSequences,
+ pos,
+ false,
+ exactVal);
+ }
+ int k;
+ for (k = 0; k < charOrder.size(); k++) {
+ _internalSequences[node2check->id()][pos] = charOrder[k];
+ bool haveToGoDown=false;
+ if (nodeNum<_et.getInternalNodesNum()) {
+ MDOUBLE boundSigma,boundMax;
+ haveToGoDown =decideIfHaveToGoDown(pos,boundSigma,boundMax);
+ _bbReport->report( node2check->name(),
+ charOrder[k],
+ nodeNum,
+ _bestRecord/_pOfPos,
+ 0.00,
+ boundSigma/_pOfPos,
+ boundMax/_pOfPos);
+ };
+ if (haveToGoDown == true) {
+ bbReconstructPositions(pos,(nodeNum+1));
+ }
+
+
+ if (nodeNum==_et.getInternalNodesNum()) {
+ MDOUBLE tmp = _bbesavp1->evaluateSpecificAv(pos,&_internalSequences);
+ if (tmp > _bestRecord) {
+ vector<tree::nodeP> allNodes;
+ _et.getAllHTUs(allNodes,_et.getRoot());
+ for (int j = 0 ; j < allNodes.size(); j++) {
+ _bestReconstruction[allNodes[j]->id()][pos]=_internalSequences[allNodes[j]->id()][pos];
+ }
+ _bestRecord = tmp;
+ }
+ _bbReport->report( node2check->name(),
+ charOrder[k],
+ nodeNum,
+ _bestRecord/_pOfPos,
+ tmp/_pOfPos,
+ 0.0,
+ 0.0);
+ }
+ }
+
+ _internalSequences[node2check->id()][pos] = -2;
+ _bbNodeOrderAlg1->putBack(node2check,exactVal);
+ return _bestRecord;
+}
+
+
+
+bbAlg::~bbAlg() { delete _bbNodeOrderAlg1;
+ delete _bbesavp1;
+ delete _bbfindBestAVDynProg1;}
+
+void bbAlg::fillProbOfPosition(const int pos) {
+
+ _pOfPos = likelihoodComputation::getLofPos(pos,_et,_sc,_cpij,_spVec[0]);
+}
+
+
+
+sequenceContainer bbAlg::fromAncestralSequenceToSeqData() {
+ int j=0;
+ sequenceContainer sD;
+ for (j=0; j < _sc.numberOfSeqs(); ++j) {
+ sD.add(_sc[j]);
+ }
+ vector<tree::nodeP> HTUs;
+ _et.getAllHTUs(HTUs,_et.getRoot());
+ for (j=0; j < HTUs.size(); ++j) {
+ sequence tmpSeq(_sc.getAlphabet());
+ for (int pos=0; pos<_seqLen;++pos) {
+ tmpSeq.push_back(_bestReconstruction[HTUs[j]->id()][pos]);
+ }
+ tmpSeq.setID(sD.numberOfSeqs());
+ tmpSeq.setName(HTUs[j]->name());
+ sD.add(tmpSeq);
+ }
+ return sD;
+}
+
+
+
+
+
+bool bbAlg::decideIfHaveToGoDown(const int pos,
+ MDOUBLE& boundSigma,
+ MDOUBLE& boundMax) const {
+//---------------------------------------------------------------------
+// checkBoundSigma and checkBoundMax return true, if we have to go down
+// in the search tree. This is also the ouput of this function.
+// i.e., the bound is always an upper bound on the results.
+// it is compared with the best score so far, i.e., the lower bound,
+// and if the upperbound<lowerbound that there is no need going down.
+// When the two bounds are used,
+// it is enough that one is false to indicate no need to go down.
+//---------------------------------------------------------------------
+
+ bool acor1 = false;
+ bool acor2 = false;
+ switch (_boundMethod) {
+ case max: return checkBoundMax(pos,boundMax);
+ break;
+ case sum: return checkBoundSigma(pos,boundSigma);
+ break;
+ case both:
+ acor1 = checkBoundSigma(pos,boundSigma);
+ acor2 = checkBoundMax(pos,boundMax);
+
+// if ((acor1 == true) && (acor2 == false)) {
+// cerr<<"max is better"<<endl;
+// } else if ((acor2 == true) && (acor1 == false)) {
+// cerr<<"sum is better"<<endl;
+// }
+ return (acor1 && acor2);
+ break;
+ default: errorMsg::reportError("Error in function decideIfHaveToGoDown");
+ }
+
+ errorMsg::reportError("Error in function decideIfHaveToGoDown");
+ return true;
+}
+
+bool bbAlg::checkBoundSigma(const int pos,
+ MDOUBLE& inBoundSigma) const {
+ inBoundSigma = _bbesavp1->evaluateSpecificAv(pos,&_internalSequences);
+ if (inBoundSigma < _bestRecord) return false;
+ else return true;
+}
+
+bool bbAlg::checkBoundMax(const int pos, MDOUBLE& inboundMax) const {
+ // to make
+ inboundMax = 0.0;
+// MDOUBLE rate;
+ for (int rateCategor=0; rateCategor < _spVec[0].categories(); rateCategor++) {
+ inboundMax+= (
+ _bbfindBestAVDynProg1->evaluateSpecificAvDP(pos,&_internalSequences,rateCategor)*
+ _spVec[0].ratesProb(rateCategor));
+ }
+ if (inboundMax < _bestRecord) return false;
+ else return true;
+}
+
+
diff --git a/programs/fastml/bbAlg.h b/programs/fastml/bbAlg.h
new file mode 100644
index 0000000..9e71db7
--- /dev/null
+++ b/programs/fastml/bbAlg.h
@@ -0,0 +1,67 @@
+#if !defined ___BB__ALG__
+#define ___BB__ALG__
+
+#include "computePijComponent.h"
+#include "bbNodeOrderAlg.h"
+#include "bbEvaluateSpecificAV.h"
+#include "bbfindBestAVDynProg.h"
+#include "bbReport.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "distribution.h"
+
+class bbAlg {
+public:
+ enum boundMethod {max,sum,both};
+ explicit bbAlg( const tree& et,
+ vector<stochasticProcess> &spVec,
+ const sequenceContainer &sc,
+ const boundMethod boundType,
+ const string& reportFileName,
+ const MDOUBLE computeAgainExactTreshold,
+ const distribution * forceDistr);
+ virtual ~bbAlg();
+ MDOUBLE bbReconstructAllPositions(sequenceContainer& res);
+ sequenceContainer fromAncestralSequenceToSeqData();
+ void outputTheJointProbAtEachSite(const string & outputFileProbJoint);
+
+private:
+ const tree& _et;
+ vector<stochasticProcess> &_spVec;
+ const sequenceContainer& _sc;
+ bbEvaluateSpecificAV* _bbesavp1;
+ computePijGam _cpij;
+ bbNodeOrderAlg* _bbNodeOrderAlg1;
+ bbfindBestAVDynProg* _bbfindBestAVDynProg1;
+
+ boundMethod _boundMethod;
+
+ int _alphabetSize;
+ int _seqLen;
+ MDOUBLE _bestRecord; // for 1 position. =0 when new pos is started...
+ Vdouble _jointL; // the likelihood of the reconstruction, per position.
+ void fillProbOfPosition(const int pos);
+ MDOUBLE bbReconstructPositions(const int pos);
+ MDOUBLE bbReconstructPositions(const int pos, const int nodeNum);
+
+ vector<sequence> _bestReconstruction; // the sequences (nodes * seqLen)
+ vector<sequence> _internalSequences; // the sequences (nodes * seqLen)
+
+ bool decideIfHaveToGoDown(const int pos,
+ MDOUBLE& boundSigma,
+ MDOUBLE& boundMax) const;
+ bool checkBoundSigma(const int pos,
+ MDOUBLE& inBoundSigma) const;
+ bool checkBoundMax(const int pos, MDOUBLE& inboundMax) const;
+
+
+// reporting:
+ BandBReport* _bbReport; // report per position.
+ BandBReportAllPos BandBReportAllPos1; // report for all positions.
+ const string& _reportFileName;
+ doubleRep _pOfPos;
+
+};
+
+
+#endif
diff --git a/programs/fastml/bbComputeDownAlg.cpp b/programs/fastml/bbComputeDownAlg.cpp
new file mode 100644
index 0000000..7aa28c2
--- /dev/null
+++ b/programs/fastml/bbComputeDownAlg.cpp
@@ -0,0 +1,191 @@
+#include "bbComputeDownAlg.h"
+#include "seqContainerTreeMap.h"
+
+void BBfillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const vector<sequence>& ancS){
+ ssc.allocatePlace(et.getNodesNum(), pi.alphabetSize());
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter,letterInFather,bro,letterInSon;
+ if (mynode->father()==NULL) {// if root
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ ssc.set(mynode->id(),letter,1.0);
+ }
+ mynode = tIt.next(); //continue
+ }
+ tree::nodeP fatherNode=mynode->father();
+ const int n_bro=fatherNode->getNumberOfSons();
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ if ((ancS[mynode->father()->id()][pos]!=-2)&&(ancS[mynode->father()->id()][pos]!=letter)){
+ ssc.set(mynode->id(),letter,0);
+ continue;
+ } // this if takes care of internal node assignments...
+
+ doubleRep totalProb=1.0;
+ doubleRep fatherTerm=0;
+ if (fatherNode->father()!=NULL) {
+ for(letterInFather=0; letterInFather<pi.alphabetSize();letterInFather++)
+ fatherTerm += pi.getPij(fatherNode->id(),letter,letterInFather)*
+ ssc.get(fatherNode->id(),letterInFather);
+ }
+ else {
+ fatherTerm=1.0;
+ }
+ doubleRep brotherTerm=1.0;
+ for(bro = 0; bro < n_bro; bro++) {
+ tree::nodeP brother = fatherNode->getSon(bro);
+ if (brother != mynode) {
+ doubleRep tmp_bro=0.0;
+ for(letterInSon=0; letterInSon<pi.alphabetSize();letterInSon++) {
+ tmp_bro+=pi.getPij(fatherNode->getSon(bro)->id(),letter,letterInSon)*
+ cup.get(brother->id(),letterInSon);
+ }
+ brotherTerm *=tmp_bro;
+ }
+ }
+ totalProb = fatherTerm * brotherTerm;
+ ssc.set(mynode->id(),letter,totalProb);
+ }
+ }
+}
+/*
+const evolTree* bbComputeDownAlg::_et=NULL;
+const stochasticProcess* bbComputeDownAlg::_sp=NULL;
+const suffStatComponent* bbComputeDownAlg::_cup=NULL;
+const computePij* bbComputeDownAlg::_cpij=NULL;
+suffStatComponent* bbComputeDownAlg::_ssc=NULL;
+const vector<sequence>* bbComputeDownAlg::_ancS = NULL;
+
+void bbComputeDownAlg::bbFillComputeDown(const evolTree* et,
+ const stochasticProcess* sp,
+ const suffStatComponent* cup,
+ const computePij* cpij,
+ suffStatComponent* ssc,
+ vector<sequence>* ancS) {
+
+
+ _et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS;
+ _ssc->resize(et->iNodes());
+ if (_ssc->size()>0)
+ if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)...
+ for (vector<suffStatComponent::suffStatComponentCell>::iterator it=ssc->_suffCellVec.begin();
+ it !=ssc->_suffCellVec.end();++it) {
+ it->allocatePlace(_et->seqLen(),
+ _sp->categories(),_et->alphabetSize());
+ }
+ }
+ recursiveFillDown(_et->iRoot());
+}
+
+void bbComputeDownAlg::bbFillComputeDownForOnePos(const evolTree* et,
+ const stochasticProcess* sp,
+ const suffStatComponent* cup,
+ const computePij* cpij,
+ suffStatComponent* ssc,
+ vector<sequence>* ancS,
+ const int pos) {
+
+
+ _et=et;_sp=sp;_cup=cup;_cpij=cpij, _ssc=ssc;_ancS=ancS;
+ _ssc->resize(et->iNodes());
+ if (_ssc->size()>0)
+ if ((*_ssc)[0].isEmpty()==true) {// alocating memory for the pij(t)...
+ for (vector<suffStatComponent::suffStatComponentCell>::iterator it=ssc->_suffCellVec.begin();
+ it !=ssc->_suffCellVec.end();++it) {
+ it->allocatePlace(_et->seqLen(),
+ _sp->categories(),_et->alphabetSize());
+ }
+ }
+ recursiveFillDownPos(_et->iRoot(),pos);
+}
+
+void bbComputeDownAlg::recursiveFillDownPos(const evolTree::NodeP& mynode,
+ const int pos) {
+ fillDownNodePos(mynode,pos);
+ for (vector<evolTree::nodeP>::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) {
+ recursiveFillDownPos(*i,pos);
+ }
+}
+
+void bbComputeDownAlg::recursiveFillDown(const evolTree::NodeP& mynode) {
+ fillDownNode(mynode);
+ for (vector<evolTree::nodeP>::iterator i=mynode->sons.begin(); i != mynode->sons.end();++i) {
+ recursiveFillDown(*i);
+ }
+}
+
+void bbComputeDownAlg::fillDownNode(
+ const evolTree::NodeP& mynode) {
+ for(int pos=0; pos<_et->seqLen();pos++) fillDownNodePos(mynode,pos);
+}
+
+void bbComputeDownAlg::fillDownNodePos(
+ const evolTree::NodeP& mynode,
+ const int pos) {
+
+ int rateCategor,letter,letter_in_father,bro,letter_in_son;
+ if (mynode->father==NULL) {// if root
+ for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) {
+ for(letter=0; letter<_et->alphabetSize();letter++) {
+ (*_ssc)[mynode->id()].set(pos,rateCategor,letter,1.0);
+ }
+ }
+ return;
+ }
+ for (rateCategor = 0; rateCategor<_sp->categories(); ++rateCategor) {
+ evolTree::NodeP father_node=mynode->father;
+ const int n_bro=father_node->sons.size();
+ for(letter=0; letter<_et->alphabetSize();letter++) {//alpha
+ assert(_ancS != NULL);
+ //------------------------------------------------------
+ if (((*_ancS)[mynode->father->id()][pos]!=letter) &&
+ ((*_ancS)[mynode->father->id()][pos]!=-2)) {
+ (*_ssc)[mynode->id()].set(pos,rateCategor,letter,0);
+ continue;
+ } // this if takes care of internal node assignments...
+ //------------------------------------------------------
+
+ MDOUBLE total_prob=1.0;
+ MDOUBLE father_term=0;
+ if (father_node->father!=NULL) {
+ for(letter_in_father=0; letter_in_father<_et->alphabetSize();letter_in_father++)
+ father_term += _cpij->getPij(father_node->id(),letter,letter_in_father,rateCategor)*
+ (*_ssc)[father_node->id()].get(pos,rateCategor,letter_in_father);
+ }
+ else {
+ father_term=1.0;
+ }
+ MDOUBLE brother_term=1.0;
+ for(bro=0;bro<n_bro;bro++) {
+ evolTree::NodeP brother=father_node->sons[bro];
+ if (brother != mynode) {
+ MDOUBLE tmp_bro=0.0;
+ for(letter_in_son=0; letter_in_son<_et->alphabetSize();letter_in_son++) {
+ tmp_bro+=_cpij->getPij(
+ father_node->sons[bro]->id(),
+ letter,
+ letter_in_son,rateCategor)*
+ _cup->get(brother->id(),
+ pos,
+ rateCategor,
+ letter_in_son);
+ }
+ brother_term *=tmp_bro;
+ }
+ }
+ total_prob = father_term * brother_term;
+ (*_ssc)[mynode->id()].set(pos,rateCategor,letter,total_prob);
+ }
+ }
+}
+*/
+
+
+
+
+
diff --git a/programs/fastml/bbComputeDownAlg.h b/programs/fastml/bbComputeDownAlg.h
new file mode 100644
index 0000000..9aa40af
--- /dev/null
+++ b/programs/fastml/bbComputeDownAlg.h
@@ -0,0 +1,23 @@
+#ifndef ___BB_COMPUTE_DOWN_ALG__
+#define ___BB_COMPUTE_DOWN_ALG__
+
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "sequence.h"
+#include <vector>
+using namespace std;
+
+void BBfillComputeDown(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPos& cup,
+ const vector<sequence>& ancS);
+
+
+
+#endif
+
diff --git a/programs/fastml/bbComputeUpAlg.cpp b/programs/fastml/bbComputeUpAlg.cpp
new file mode 100644
index 0000000..7261298
--- /dev/null
+++ b/programs/fastml/bbComputeUpAlg.cpp
@@ -0,0 +1,46 @@
+#include "bbComputeUpAlg.h"
+#include "seqContainerTreeMap.h"
+
+void BBfillComputeUp(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const vector<sequence>& ancS) {
+
+ seqContainerTreeMap sctm(sc,et);
+
+ ssc.allocatePlace(et.getNodesNum(),pi.alphabetSize());
+ treeIterDownTopConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ int letter;
+ if (mynode->isLeaf()) {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ MDOUBLE val = sc.getAlphabet()->relations(sc[seqID][pos],letter);
+ ssc.set(mynode->id(),letter,val);
+ }
+ }
+ else {
+ for(letter=0; letter<pi.alphabetSize();letter++) {
+ if ((ancS[mynode->id()][pos]!=-2) && // if there is already assignments for this node
+ (ancS[mynode->id()][pos]!=letter)) {
+ ssc.set(mynode->id(),letter,0);
+ continue;
+ } // this if takes care of internal node assignments...
+
+
+ doubleRep total_prob=1.0;
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ doubleRep prob=0.0;
+ for(int letInSon=0; letInSon<pi.alphabetSize();letInSon++) {
+ prob += ssc.get(mynode->getSon(i)->id(), letInSon)*
+ pi.getPij(mynode->getSon(i)->id(),letter,letInSon);
+ }
+ total_prob*=prob;
+ }
+ ssc.set(mynode->id(),letter,total_prob);
+ }
+ }
+ }
+}
diff --git a/programs/fastml/bbComputeUpAlg.h b/programs/fastml/bbComputeUpAlg.h
new file mode 100644
index 0000000..c9bf72e
--- /dev/null
+++ b/programs/fastml/bbComputeUpAlg.h
@@ -0,0 +1,26 @@
+#ifndef ___BB_COMPUTE_UP_ALG__
+#define ___BB_COMPUTE_UP_ALG__
+
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+
+// the only different from computeUpAlg is that here char assignments to
+// internal nodes are taken into account while calculating compute up.
+
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "sequence.h"
+#include <vector>
+using namespace std;
+
+void BBfillComputeUp(const tree& et,
+ const sequenceContainer& sc,
+ const int pos,
+ const computePijHom& pi,
+ suffStatGlobalHomPos& ssc,
+ const vector<sequence>& ancS);
+
+#endif
+
diff --git a/programs/fastml/bbEvaluateSpecificAV.cpp b/programs/fastml/bbEvaluateSpecificAV.cpp
new file mode 100644
index 0000000..58735b6
--- /dev/null
+++ b/programs/fastml/bbEvaluateSpecificAV.cpp
@@ -0,0 +1,113 @@
+#include "bbEvaluateSpecificAV.h"
+
+bbEvaluateSpecificAV::bbEvaluateSpecificAV(const tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijGam& cpij) : _et(et), _sp(sp), _sc(sc), _bbcpij(cpij) {
+ _sctm = new seqContainerTreeMap(_sc,_et);
+
+ _alphabetSize=_sc.alphabetSize();
+ _Lvec.resize(_et.getNodesNum());
+ for (int i=0; i < _Lvec.size(); ++i ) {
+ _Lvec[i].resize(_alphabetSize);
+ }
+}
+
+bbEvaluateSpecificAV::~bbEvaluateSpecificAV() {
+ delete _sctm;
+}
+
+MDOUBLE bbEvaluateSpecificAV::evaluateSpecificAv(
+ const int pos,
+ const vector<sequence>* ancestralSequences) {
+ _ancss = ancestralSequences;
+ return recursiveEvaluateSpecificAv(pos,_et.getRoot());
+}
+
+MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv(
+ const int pos,
+ const tree::nodeP thisNode) {
+
+ MDOUBLE res=0.0;
+ for (int rateCategor=0;rateCategor<_sp.categories();rateCategor++) {
+ res += (
+ recursiveEvaluateSpecificAv(pos,thisNode,rateCategor)*
+ _sp.ratesProb(rateCategor)
+ );
+ }
+ return res;
+}
+
+MDOUBLE bbEvaluateSpecificAV::recursiveEvaluateSpecificAv(const int pos,
+ const tree::nodeP thisNode,
+ const int categor) {
+
+ int letterInNode;
+ if (thisNode->isLeaf() ) {
+ const int seqID = _sctm->seqIdOfNodeI(thisNode->id());
+ letterInNode = _sc[seqID][pos];
+ for (int k = 0; k < _alphabetSize ; ++k) { // taking care of ? by the -2 64 - for codons...
+ if ((letterInNode==-2) || (letterInNode==-1)||(letterInNode==64) ||(letterInNode==k)) _Lvec[thisNode->id()][k] = 1.0;
+ else _Lvec[thisNode->id()][k] = 0.0;
+ }
+ return 0.0;
+ }
+
+ for (int i = 0 ; i < thisNode->getNumberOfSons() ; ++i ) {// recursive call for the childs
+ recursiveEvaluateSpecificAv(pos,thisNode->getSon(i),categor);
+ }
+
+ letterInNode = (*_ancss)[thisNode->id()][pos];
+ if (letterInNode == -2) {// internal node with asterix.
+ for (int y = 0 ; y < _alphabetSize ; ++y) {
+ MDOUBLE rate = _sp.rates(categor); // the r.
+ _Lvec[thisNode->id()][y] = 1.0;
+ for (int u = 0 ; u < thisNode->getNumberOfSons() ; ++u) {
+ MDOUBLE tmp = 0;
+ for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) {
+ tmp+=(
+ _bbcpij.getPij(categor,thisNode->getSon(u)->id(),y,letInSon)*
+ _Lvec[thisNode->getSon(u)->id()][letInSon]
+ );
+ }
+ _Lvec[thisNode->id()][y] *= tmp;
+
+ }
+ }
+ }
+
+ else { // if the character in the HTU is known (not an asterix)
+ for (int w = 0 ; w < _alphabetSize ; ++w) {
+ if (w != letterInNode) _Lvec[thisNode->id()][w] = 0.0;
+ else {
+// MDOUBLE rate = _myStoc_proc.rates(categor); // the r.
+ _Lvec[thisNode->id()][w] = 1.0;
+ for (int z = 0 ; z < thisNode->getNumberOfSons() ; ++z) {
+ MDOUBLE tmp = 0;
+ for (int letInSon = 0 ; letInSon<_alphabetSize; ++letInSon) {
+ tmp += (
+ _bbcpij.getPij(categor,thisNode->getSon(z)->id(),w,letInSon)*
+ _Lvec[thisNode->getSon(z)->id()][letInSon]
+ );
+ }
+ _Lvec[thisNode->id()][w] *= tmp;
+ }
+ }// end of else
+ }
+ }
+
+ MDOUBLE result= 0.0;
+ if (thisNode->father() == NULL){ // tree root
+
+ for (int letRoot = 0 ; letRoot < _alphabetSize; ++letRoot) {
+ result += _sp.freq(letRoot) * _Lvec[thisNode->id()][letRoot];
+ }
+ }
+ return result;
+
+}
+
+
+
+
+
diff --git a/programs/fastml/bbEvaluateSpecificAV.h b/programs/fastml/bbEvaluateSpecificAV.h
new file mode 100644
index 0000000..a143533
--- /dev/null
+++ b/programs/fastml/bbEvaluateSpecificAV.h
@@ -0,0 +1,51 @@
+#if !defined ___BB__EVALUATE_SPECIFIC_AV__
+#define ___BB__EVALUATE_SPECIFIC_AV__
+
+#include "bb_options.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "sequence.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "tree.h"
+#include "seqContainerTreeMap.h"
+
+#include <vector>
+using namespace std;
+
+class bbEvaluateSpecificAV {
+
+public:
+ explicit bbEvaluateSpecificAV(
+ const tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc,
+ const computePijGam& cpij);
+ virtual ~bbEvaluateSpecificAV();
+
+ MDOUBLE evaluateSpecificAv( const int pos,
+ const vector<sequence>* ancestralSequences);
+private:
+ const tree& _et;
+ const stochasticProcess& _sp;
+ const computePijGam& _bbcpij;
+ int _alphabetSize;
+ int _pos;
+ const sequenceContainer& _sc;
+ seqContainerTreeMap * _sctm;
+
+
+ const vector<sequence>* _ancss;
+
+ MDOUBLE recursiveEvaluateSpecificAv(
+ const int pos,
+ const tree::nodeP thisNode);
+
+ MDOUBLE recursiveEvaluateSpecificAv(const int pos,
+ const tree::nodeP thisNode,
+ const int categor);
+ VVdouble _Lvec; // inodes * letter
+
+};
+
+#endif
diff --git a/programs/fastml/bbNodeOrderAlg.cpp b/programs/fastml/bbNodeOrderAlg.cpp
new file mode 100644
index 0000000..382eda6
--- /dev/null
+++ b/programs/fastml/bbNodeOrderAlg.cpp
@@ -0,0 +1,134 @@
+#include "bbNodeOrderAlg.h"
+#include "bbComputeUpAlg.h"
+#include "bbComputeDownAlg.h"
+#include "computeMarginalAlg.h"
+#include <algorithm>
+using namespace std;
+
+bbNodeOrderAlg::bbNodeOrderAlg(const tree& et,
+ const stochasticProcess &sp,
+ const sequenceContainer& sc,
+ const computePijGam& cpij,
+ const MDOUBLE computeAgainExactTreshold) :_et(et),_sp(sp),_sc(sc),_cpij(cpij){
+ _alphabetSize=_sp.alphabetSize();
+ _computeAgainExactTreshold = computeAgainExactTreshold;
+ cupbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
+ cdownbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
+ cmarginalbb.allocatePlace(sp.categories(),et.getNodesNum(),sp.alphabetSize());
+}
+
+bbNodeOrderAlg::~bbNodeOrderAlg(){}
+
+// note: there is a way to dynamically correct exact.
+// it is not implemented here.
+void bbNodeOrderAlg::getNextNodeAndCharOrder(tree::nodeP &nextNode,
+ vector<int> &charOrder,
+ vector<sequence> &ancestralSequences,
+ const int pos,
+ const bool firstTime,
+ doubleRep& exactVal){
+ doubleRep highestProb=0;
+ if (firstTime) {
+ _et.getAllHTUs(_nodesLeft,_et.getRoot());
+ recalculateExact(ancestralSequences,pos);
+ rankRemainingNodesAccordingToTheirMarginalProb(pos);
+ }
+ assert(_nodesLeftExact.size()>=1);
+ assert(_nodesLeftExact.size()==_nodesLeft.size());
+ highestProb = _nodesLeftExact[_nodesLeftExact.size()-1];
+ if (highestProb<_computeAgainExactTreshold) {
+ recalculateExact(ancestralSequences,pos);
+ rankRemainingNodesAccordingToTheirMarginalProb(pos);
+ highestProb = _nodesLeftExact[_nodesLeftExact.size()-1];
+ }
+ _nodesLeftExact.pop_back();
+ nextNode = _nodesLeft[_nodesLeft.size()-1];
+ _nodesLeft.pop_back();
+ charOrder = findBestOrderInNode(nextNode,pos);
+ exactVal = highestProb;
+}
+
+void bbNodeOrderAlg::putBack(tree::nodeP& node2check,const doubleRep & exactVal) {
+ _nodesLeft.push_back(node2check);
+ _nodesLeftExact.push_back(exactVal);
+}
+
+
+void bbNodeOrderAlg::rankRemainingNodesAccordingToTheirMarginalProb(
+ const int pos) {
+
+ typedef pair<doubleRep,tree::nodeP> sortedElement;
+ vector<sortedElement> sortVec;
+ int i;
+ doubleRep tmpVal;
+ for ( i = 0 ; i < _nodesLeft.size() ; ++i) {
+ tmpVal = getNodeHighestMarginal(_nodesLeft[i]);
+ sortedElement elem(tmpVal,_nodesLeft[i]);
+ sortVec.push_back(elem);
+ }
+
+ sort(sortVec.begin(), sortVec.end());
+ _nodesLeft.clear();
+ _nodesLeftExact.clear();
+ _nodesLeft.resize(sortVec.size());
+ _nodesLeftExact.resize(sortVec.size());
+ for ( i = 0 ; i < _nodesLeft.size() ; ++i ) {
+ _nodesLeft[i] = sortVec[i].second;
+ _nodesLeftExact[i]=sortVec[i].first;
+ }
+}
+
+// this function gets as input the "exact" sufficient statistic for a given node
+// for a given position. It goes over all the alphabet, and computes
+// the marginal at each position. Then he returns the highest marginal.
+doubleRep bbNodeOrderAlg::getNodeHighestMarginal(const tree::nodeP& inNodeP) {
+ doubleRep highestProb =0.0;
+
+ int j,s;
+ for (j=0;j<_alphabetSize;++j) {
+ doubleRep tmpVal = 0;
+ for (s=0; s< _sp.categories();++s ) {
+ tmpVal += cmarginalbb.get(s,inNodeP->id(),j)*_sp.ratesProb(s);
+ }
+ if (highestProb<tmpVal) {
+ highestProb=tmpVal;
+ }
+ }
+ return highestProb;
+}
+
+void bbNodeOrderAlg::recalculateExact(vector<sequence> &ancestralSequences,
+ const int pos) {
+ for (int i=0; i < _sp.categories(); ++i) {
+ BBfillComputeUp(_et,_sc,pos,_cpij[i],cupbb[i],ancestralSequences);
+ BBfillComputeDown(_et,_sc,pos,_cpij[i],cdownbb[i],cupbb[i],ancestralSequences);
+ doubleRep posProb = 0.0;
+ computeMarginalAlg cmalg;
+ cmalg.fillComputeMarginal(_et,_sc,_sp,pos,_cpij[i],cmarginalbb[i],cupbb[i],cdownbb[i],posProb);
+ }
+}
+
+vector<int> bbNodeOrderAlg::findBestOrderInNode(const tree::nodeP node2check,
+ const int pos) const {
+ assert (node2check != NULL);
+ typedef pair<doubleRep,int> sortedElement; // (marginal, letter)
+ vector<sortedElement> sortVec;
+ int i,s;
+ for ( i = 0 ; i < _alphabetSize ; i++ ) {
+ doubleRep tmpVal = 0;
+ for (s=0; s< _sp.categories();++s ) {
+ tmpVal += cmarginalbb.get(s,node2check->id(),i)*_sp.ratesProb(s);
+ }
+ sortedElement elem(tmpVal,i);
+ sortVec.push_back(elem);
+ }
+
+ sort(sortVec.begin(), sortVec.end());
+ reverse(sortVec.begin(), sortVec.end());
+ vector<int> bestCharOrder(_alphabetSize);
+ for ( i = 0 ; i < _alphabetSize ; i++ ) {
+ bestCharOrder[i] = sortVec[i].second;
+ }
+ return bestCharOrder;
+}
+
diff --git a/programs/fastml/bbNodeOrderAlg.h b/programs/fastml/bbNodeOrderAlg.h
new file mode 100644
index 0000000..cfe4a99
--- /dev/null
+++ b/programs/fastml/bbNodeOrderAlg.h
@@ -0,0 +1,54 @@
+#if !defined ___BB__NODE_ORDER_ALG__
+#define ___BB__NODE_ORDER_ALG__
+
+#include "definitions.h"
+#include "bb_options.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "sequence.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+
+class bbNodeOrderAlg {
+public:
+ explicit bbNodeOrderAlg(const tree& et,
+ const stochasticProcess &sp,
+ const sequenceContainer& sc,
+ const computePijGam& cpij,
+ const MDOUBLE computeAgainExactTreshold);
+ virtual ~bbNodeOrderAlg();
+ void getNextNodeAndCharOrder(tree::nodeP &nextNode,
+ vector<int> &charOrder,
+ vector<sequence> &ancestralSequences,
+ const int pos,
+ const bool firstTime,
+ doubleRep& exactVal);
+ void putBack(tree::nodeP& node2check,const doubleRep & exactVal);
+
+private:
+ const tree& _et;
+ const stochasticProcess& _sp;
+ const computePijGam& _cpij;
+ const sequenceContainer& _sc;
+ suffStatGlobalGamPos cmarginalbb;
+ suffStatGlobalGamPos cupbb;
+ suffStatGlobalGamPos cdownbb;
+
+ MDOUBLE _computeAgainExactTreshold;
+ int _alphabetSize;
+ int _pos;
+ vector<tree::nodeP> _nodesLeft;
+ vector<doubleRep> _nodesLeftExact;
+
+ void recalculateExact( vector<sequence> &ancestralSequences,
+ const int pos);
+ vector<int> findBestOrderInNode(const tree::nodeP node2check,
+ const int pos) const;
+ void rankRemainingNodesAccordingToTheirMarginalProb(
+ const int pos);
+ doubleRep getNodeHighestMarginal( const tree::nodeP& inNodeP);
+};
+
+
+#endif
diff --git a/programs/fastml/bbReport.cpp b/programs/fastml/bbReport.cpp
new file mode 100644
index 0000000..f64b9e8
--- /dev/null
+++ b/programs/fastml/bbReport.cpp
@@ -0,0 +1,75 @@
+#include "bbReport.h"
+#include "amino.h"
+#include "nucleotide.h"
+#include "codon.h"
+#include <iomanip>
+#include <iostream>
+#include <cmath>
+using namespace std;
+
+BandBReport::BandBReport( const string& reportFileName, const int position, const int alphabetSize ) :
+ _reportFileName(reportFileName), _position(position), _alphabetSize(alphabetSize)
+{
+// _root = new TreeNode;
+// DecisionNode rootData(-2,"allstar"); // char, node-id
+// _root->Setdata(rootData);
+// _current = _root;
+// _nodes = 1;
+}
+
+void BandBReport::report(
+ const string NodeName,
+ const int charPutInsideNode,
+ const int depth,
+ const doubleRep bestRecord,
+ const doubleRep probOfVector,
+ const doubleRep BoundSigma,
+ const doubleRep boundMax
+ ) {
+
+ VNodeName.push_back(NodeName);
+ VcharPutInsideNode.push_back(charPutInsideNode);
+ VbestRecord.push_back(bestRecord);
+ VprobOfVector.push_back(probOfVector);
+ VBoundSigma.push_back(BoundSigma);
+ VboundMax.push_back(boundMax);
+ Vdepth.push_back(depth);
+
+}
+
+
+void BandBReport::makeReport() const {
+
+ ofstream out;
+ //if (_position==0) out.open("report.txt",ios::trunc);
+ //else {
+ out.open(_reportFileName.c_str(),ios::app);
+ //}
+ out<<" position is: "<<_position<<endl;
+// cerr<<"reportFileIs: "<<_reportFileName<<endl;
+ if (out == NULL) {
+ errorMsg::reportError("unable to open output file for reporting");
+ }
+// exit(555);
+ amino aa;
+ nucleotide nuc;
+ codon co;
+ for (int k=0; k < VNodeName.size(); ++k) {
+ for (int l=0; l < Vdepth[k]; ++l) out<<" ";
+ out<<VNodeName[k]<<" ";
+ if (_alphabetSize==20) out<<aa.fromInt(VcharPutInsideNode[k])<<" ";
+ else if (_alphabetSize==4) out<<nuc.fromInt(VcharPutInsideNode[k])<<" ";
+ else if (_alphabetSize==61) out<<co.fromInt(VcharPutInsideNode[k])<<" ";
+ else errorMsg::reportError(" error in function BandBReport::makeReport( )");
+ out<<setiosflags(ios::scientific);
+ out<<"best Record: "<<VbestRecord[k]<<" ";
+ out<<"BoundSigma: "<<VBoundSigma[k]<<" ";
+ out<<"boundMax: "<<VboundMax[k]<<" ";
+ out<<"probAV: "<<VprobOfVector[k];
+ out<<endl;
+ }
+ out.close();
+
+ return;
+}
+
diff --git a/programs/fastml/bbReport.h b/programs/fastml/bbReport.h
new file mode 100644
index 0000000..b1940e2
--- /dev/null
+++ b/programs/fastml/bbReport.h
@@ -0,0 +1,58 @@
+#ifndef ________BANBREPORT
+#define ________BANBREPORT
+
+#include "definitions.h"
+#include <fstream>
+using namespace std;
+
+class BandBReportAllPos {
+public:
+ explicit BandBReportAllPos(const string& reportFileName, int minNumOfNodesToVisit)
+ : _reportFileName(reportFileName),_minNumOfNodesToVisit(minNumOfNodesToVisit) {totalNumberOfNodeVisited=0;}
+ int totalNumberOfNodeVisited;
+ const int _minNumOfNodesToVisit;
+ const string& _reportFileName;
+ void printReport() const {
+ fstream out(_reportFileName.c_str(),ios::app);
+ out<<"total positions visited: "<<totalNumberOfNodeVisited<<endl;
+ out<<"min positions to be visited: "<<_minNumOfNodesToVisit<<endl;
+ out.close();
+ return;
+ }
+};
+
+
+class BandBReport
+{
+public:
+ explicit BandBReport( const string& reportFileName,
+ const int position,
+ const int alphabetSize);
+ void report(
+ const string NodeName,
+ const int charPutInsideNode,
+ const int depth,
+ const doubleRep bestRecord,
+ const doubleRep probOfVector,
+ const doubleRep BoundSigma,
+ const doubleRep boundMax);
+ void makeReport() const;
+ int size() {return VNodeName.size();}
+private:
+
+ vector<string> VNodeName;
+ vector<int> VcharPutInsideNode;
+ vector<doubleRep> VbestRecord;
+ vector<doubleRep> VprobOfVector;
+ vector<doubleRep> VBoundSigma;
+ vector<doubleRep> VboundMax;
+ vector<int> Vdepth;
+
+ const int _position;
+ const int _alphabetSize;
+ const string& _reportFileName;
+};
+
+
+#endif
+
diff --git a/programs/fastml/bb_options.cpp b/programs/fastml/bb_options.cpp
new file mode 100644
index 0000000..f81f513
--- /dev/null
+++ b/programs/fastml/bb_options.cpp
@@ -0,0 +1,159 @@
+#include <cstdlib>
+#include "bb_options.h"
+#include "logFile.h"
+#include "errorMsg.h"
+
+bb_options::bb_options(int& argc, char *argv[]):
+ computeAgainExactTreshold(0.9),
+ optimizeBrLenOnStartingTree(true),
+ doJoint(true),
+ treefile(""),
+ reportFile("log.txt"),
+ outFile_seq_joint("seq.joint.txt"),
+ outFile_seq_marginal("seq.marginal.txt"),
+ outFile_prob_joint("prob.joint.txt"),
+ outFile_prob_marginal("prob.marginal.txt"),
+ seqfile(""),
+ distributionName(hom),
+ seqOutputFormat(clustal),
+ outTreeFileNewick("tree.newick.txt"),
+ outTreeFileAncestor("tree.ancestor.txt"),
+ boundMethod(both),
+ gammaPar(1.0),
+ userProvideAlpha(false),
+ gammaCategies(8),
+ modelName(jtt),
+ alphabet_size(20),
+ removeGapsPosition(true),
+ useChebyshev(true),
+ treeOutFile("TheTree.txt"),
+ outPtr(&cout){
+ static struct option long_options[] = {{0, 0, 0, 0}};
+ int option_index = 0;
+ int c=0;
+ while (c >= 0) {
+ c = getopt_long(argc, argv,"a:bc:d:e:fghj:k:m:p:q:R:s:t:ux:y:z:", long_options,&option_index);
+
+ switch (c) {
+ case 'a': computeAgainExactTreshold=atof(optarg); break;
+ case 'b': optimizeBrLenOnStartingTree=false; break;
+ case 'c': gammaCategies=atoi(optarg); break;
+ case 'd': outFile_prob_joint=optarg; break;
+ case 'e': outFile_prob_marginal=optarg; break;
+ case 'f': doJoint=false; break;
+ case 'g': distributionName=gam; break;
+ case 'h' : {
+ cout << "USAGE: "<<argv[0]<<" [-options] "<<endl;
+ cout << usage()<<endl;
+ exit (0);
+ } break;
+ case 'j': outFile_seq_joint=optarg; break;
+ case 'k': outFile_seq_marginal=optarg; break;
+ case 'm': {
+ switch (optarg[0]) {
+ case 'd': case 'D': modelName=day;alphabet_size=20; break;
+ case 'j': case 'J': modelName=jtt;alphabet_size=20; break;
+ case 'l': case 'L': modelName=lg;alphabet_size=20; break;
+ case 'r': case 'R': modelName=rev;alphabet_size=20; break;
+ case 'w': case 'W': modelName=wag;alphabet_size=20; break;
+ case 'c': case 'C': modelName=cprev;alphabet_size=20; break;
+ case 'a': case 'A': modelName=aajc;alphabet_size=20; break;
+ case 'n': case 'N': modelName=nucjc;alphabet_size=4; break;
+ case 'g': case 'G': modelName=nucgtr;alphabet_size=4; break;
+ case 'e': case 'E': modelName=empiriCodon;alphabet_size=61; break;
+ case 'y': case 'Y': modelName=nyCodon;alphabet_size=61; break;
+ default:modelName=jtt;alphabet_size=20;
+ break;
+ }
+ } break;
+ case 'p': {
+ userProvideAlpha = true;
+ gammaPar=atof(optarg);
+ distributionName=gam;
+
+ } break;
+ case 'q': {
+ switch (optarg[0]) {
+ case 'c': seqOutputFormat=clustal; break;
+ case 'f': seqOutputFormat=fasta; break;
+ case 'm': seqOutputFormat=molphy; break;
+ case 's': seqOutputFormat=mase; break;
+ case 'p': seqOutputFormat=phylip; break;
+ case 'n': seqOutputFormat=nexus; break;
+ default: seqOutputFormat=clustal; break;
+ }
+ } break;
+ case 'R': reportFile=optarg; break;
+ case 's': seqfile=optarg; break;
+ case 't': treefile=optarg; break;
+ case 'u': useChebyshev=false; break;
+ case 'x': outTreeFileNewick=optarg; break;
+ case 'y': outTreeFileAncestor=optarg; break;
+ case 'z': {
+ switch (optarg[0]) {
+ case 's': case 'S': boundMethod=sum; break;
+ case 'm': case 'M': boundMethod=max; break;
+ case 'b': case 'B': boundMethod=both; break;
+ default:boundMethod=both;break;
+ }
+ } break;
+
+ //default: printf ("?? getopt returned character code 0%o ??\n", c);
+ } // end of switch c
+ } // end of while (c)
+ if (seqfile=="") {
+ cout << "USAGE: "<<argv[0]<<" [-options] "<<endl;
+ //cout << "cat SeqFile |"<<argv[0]<<" [-options]"<<endl <<endl;
+ cout << usage();
+ cout << endl;
+ exit (0);
+ }
+}
+
+
+string bb_options::modelNameStr() const
+{
+
+ string res = "";
+ switch (modelName)
+ {
+ case day:
+ res = "DAY";
+ break;
+ case jtt:
+ res = "JTT";
+ break;
+ case wag:
+ res = "WAG";
+ break;
+ case lg:
+ res = "LG";
+ break;
+ case nyCodon:
+ res = "NY_CODON";
+ break;
+ case rev:
+ res = "REV";
+ break;
+ case cprev:
+ res = "CPREV";
+ break;
+ case nucjc:
+ res = "NUC_JC";
+ break;
+ case nucgtr:
+ res = "NUC_GTR";
+ break;
+ case aajc:
+ res = "AA_JC";
+ break;
+ case empiriCodon:
+ res = "EMPIRICAL_CODON";
+ break;
+ default:
+ errorMsg::reportError("unknown type in bb_options::modelNameStr");
+ }
+ return res;
+
+}
+
diff --git a/programs/fastml/bb_options.h b/programs/fastml/bb_options.h
new file mode 100644
index 0000000..d39d983
--- /dev/null
+++ b/programs/fastml/bb_options.h
@@ -0,0 +1,69 @@
+#if !defined ___BB__OPTION__T__
+#define ___BB__OPTION__T__
+
+
+#ifndef __STDC__
+#define __STDC__ 1
+#include "pgetopt.h"
+#undef __STDC__
+#else
+#include "pgetopt.h"
+#endif
+
+#include "definitions.h"
+#include <iostream>
+#include <fstream>
+using namespace std;
+
+class bb_options {
+public:
+ MDOUBLE computeAgainExactTreshold;
+ mutable bool optimizeBrLenOnStartingTree;
+ bool doJoint;
+ string treefile;
+ string seqfile;
+ enum SeqFileFormat {mase,clustal,fasta,molphy,phylip,nexus};
+ SeqFileFormat seqOutputFormat;
+ string treeOutFile;
+ bool userProvideAlpha;
+ enum distributionsNames {hom,gam};
+ distributionsNames distributionName;
+ enum boundMethods {max,sum,both};
+ boundMethods boundMethod;
+ bool verbose; // if true: print starting tree to the file: start_tree
+// tree::TREEformats outputFormat;
+ enum modelNameOptions {day,jtt,lg,rev,wag,cprev,nucjc,nucgtr,aajc,nyCodon,empiriCodon};
+ modelNameOptions modelName;
+ int alphabet_size;
+ bool removeGapsPosition;
+ bool useChebyshev;
+ string outTreeFileNewick;
+ string outTreeFileAncestor;
+ string outFile_prob_joint;
+ string outFile_prob_marginal;
+ string outFile_seq_joint;
+ string outFile_seq_marginal;
+
+ MDOUBLE gammaPar;
+ int gammaCategies;
+ string reportFile;
+private:
+ ostream* outPtr;
+ ofstream out_f;
+public:
+ ostream& out() const {return *outPtr;};
+ string modelNameStr() const;
+ explicit bb_options(int& argc, char *argv[]);
+};
+
+#include "bb_options_list.h"
+#include <string>
+using namespace std;
+static const string usege_splash_screen() {
+ string tmp = usage();
+ return tmp;
+};
+
+
+#endif
+
diff --git a/programs/fastml/bb_options_list.h b/programs/fastml/bb_options_list.h
new file mode 100644
index 0000000..8b93a26
--- /dev/null
+++ b/programs/fastml/bb_options_list.h
@@ -0,0 +1,47 @@
+#include <string>
+using namespace std;
+static string usage() {
+ string tmp;
+ tmp +=" |-------------------------------- HELP: -------------------------------------+\n";
+ tmp +=" | VALUES IN [] ARE DEFAULT VALUES |\n";
+ tmp +=" |-h help |\n";
+ tmp +=" |-s sequence input file (for example use -s D:\\mySequences\\seq.txt ) |\n";
+ tmp +=" |-t tree input file |\n";
+ tmp +=" | (if tree is not given, a neighbor joining tree is computed). |\n";
+ tmp +=" |-g Assume among site rate variation model (Gamma) [By default the program |\n";
+ tmp +=" | will assume an homogenous model. very fast, but less accurate!] |\n";
+ tmp += "|-m model name |\n";
+ tmp += "|-mj [JTT] |\n";
+ tmp += "|-mr mtREV (for mitochondrial genomes) |\n";
+ tmp += "|-md DAY |\n";
+ tmp += "|-mw WAG |\n";
+ tmp += "|-mc cpREV (for chloroplasts genomes) |\n";
+ tmp += "|-ma Jukes and Cantor (JC) for amino acids |\n";
+ tmp += "|-mn Jukes and Cantor (JC) for nucleotides |\n";
+ tmp +=" +----------------------------------------------------------------------------+\n";
+ tmp +=" |Controling the output options: |\n";
+ tmp +=" |-x tree file output in Newick format [tree.newick.txt] |\n";
+ tmp +=" |-y tree file output in ANCESTOR format [tree.ancestor.txt] |\n";
+ tmp +=" |-j joint sequences output file [seq.joint.txt] |\n";
+ tmp +=" |-k marginal sequences output file [seq.marginal.txt] |\n";
+ tmp +=" |-d joint probabilities output file [prob.joint.txt] |\n";
+ tmp +=" |-e marginal probabilities output file [prob.marginal.txt] |\n";
+ tmp +=" |-q ancestral sequences output format. -qc = [CLUSTAL], -qf = FASTA |\n";
+ tmp +=" | -qm = MOLPHY, -qs = MASE, -qp = PHLIYP, -qn = Nexus |\n";
+ tmp +=" +----------------------------------------------------------------------------+\n";
+ tmp +=" |Advances options: |\n";
+ tmp +=" |-a Treshold for computing again marginal probabilities [0.9] |\n";
+ tmp +=" |-b Do not optimize branch lengths on starting tree |\n";
+ tmp +=" | [by default branches and alpha are ML optimized from the data] |\n";
+ tmp +=" |-c number of discrete Gamma categories for the gamma distribution [8] |\n";
+ tmp +=" |-f don't compute Joint reconstruction (good if the branch and bound |\n";
+ tmp +=" | algorithm takes too much time, and the goal is to compute the |\n";
+ tmp +=" | marginal reconstruction with Gamma). |\n";
+ tmp +=" |-z The bound used. -zs - bound based on sum. -zm based on max. -zb [both] |\n";
+ tmp +=" |-p user alpha parameter of the gamma distribution [if alpha is not given, |\n";
+ tmp +=" | alpha and branches will be evaluated from the data (override -b) |\n";
+// tmp +=" |R report file. Show the choices made by the algorithm |\n";
+// tmp +=" |-u do not use Chebyshev optimization |\n";
+ tmp +=" +----------------------------------------------------------------------------+\n";
+ return tmp;
+}
diff --git a/programs/fastml/bbfindBestAVDynProg.cpp b/programs/fastml/bbfindBestAVDynProg.cpp
new file mode 100644
index 0000000..473613c
--- /dev/null
+++ b/programs/fastml/bbfindBestAVDynProg.cpp
@@ -0,0 +1,116 @@
+#include "bbfindBestAVDynProg.h"
+
+bbfindBestAVDynProg::bbfindBestAVDynProg(const tree* et,
+ const stochasticProcess *sp,
+ const sequenceContainer& sc,
+ const computePijGam* cpij): _sc(sc) {
+ _et = et;
+ _sp = sp;
+ _bbcpij = cpij;
+ _sctm = new seqContainerTreeMap(_sc,*_et);
+ _alphabetSize=_sp->alphabetSize();
+ _jointLval.resize(_et->getNodesNum());
+ _jointCval.resize(_et->getNodesNum());
+ for (int i=0; i < _et->getNodesNum(); ++i) {
+ _jointLval[i].resize(_alphabetSize);
+ _jointCval[i].resize(_alphabetSize);
+ }
+}
+
+bbfindBestAVDynProg::~bbfindBestAVDynProg() {
+ delete _sctm;
+}
+
+MDOUBLE bbfindBestAVDynProg::evaluateSpecificAvDP(
+ const int pos,
+ const vector<sequence>* ancestralSequences,
+ const int rateCategor) {
+ _ancss = ancestralSequences;
+
+ recursiveComputeLandC(pos,_et->getRoot(),rateCategor);
+// modified from NancestralTree::findBestLetInRoot(const int pos) {
+ MDOUBLE bestLinRoot =0 ;
+ //MDOUBLE bestLetInRoot = -2;
+ MDOUBLE tmp = 0.0;
+ int letInRoot = (*_ancss)[_et->getRoot()->id()][pos];
+ if (letInRoot==-2) {
+
+ for (int x = 0 ; x < _alphabetSize; ++x) {
+ tmp = _sp->freq(x);
+ for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) {
+ tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][x];
+ }
+ if (tmp > bestLinRoot) {
+ bestLinRoot = tmp;
+ //bestLetInRoot = x;
+ }
+ }
+ }
+ else {//if (letInRoot!=-2)
+ tmp = _sp->freq(letInRoot);
+ for (int y =0 ; y < _et->getRoot()->getNumberOfSons() ; ++y) {
+ tmp *= _jointLval[_et->getRoot()->getSon(y)->id()][letInRoot];
+ }
+ if (tmp > bestLinRoot) {
+ bestLinRoot = tmp;
+ //bestLetInRoot = x;
+ }
+ }
+
+ //iRoot()->data()[pos] = bestLetInRoot;
+ return bestLinRoot;
+}
+
+void bbfindBestAVDynProg::recursiveComputeLandC(const int pos,
+ const tree::nodeP inNode,
+ const int rateCategor) {
+// root has to be internal node here.
+ for (int i=0; i<inNode->getNumberOfSons();++i) {
+ recursiveComputeLandC(pos,inNode->getSon(i),rateCategor);
+ }
+ if (inNode->father() == NULL) return;
+
+ int letInNode;
+ if (inNode->isLeaf()) {
+ const int seqID = _sctm->seqIdOfNodeI(inNode->id());
+ letInNode=_sc[seqID][pos];
+ }
+ else {
+ letInNode = (*_ancss)[inNode->id()][pos];
+ }
+
+ if (letInNode!=-2){ // known leaf, or known HTU, (no root)
+
+ for (int FatherLet = 0; FatherLet<_alphabetSize;++FatherLet) {
+ _jointLval[inNode->id()][FatherLet] = _bbcpij->getPij(rateCategor,inNode->id(),FatherLet,letInNode);
+ _jointCval[inNode->id()][FatherLet] = letInNode;
+ for (int k=0; k < inNode->getNumberOfSons() ; ++k) {
+ _jointLval[inNode->id()][FatherLet] *= _jointLval[inNode->getSon(k)->id()][letInNode];
+ }
+ }
+ }
+ else {// unknown leaf or HTU -> no root.
+ for (int letInFather = 0; letInFather < _alphabetSize; ++letInFather) {
+ MDOUBLE bestVal = 0;
+ int bestLet = -2;
+ for (int lenInNode = 0; lenInNode < _alphabetSize; ++lenInNode) {
+ MDOUBLE tmp = 1;
+ if (inNode->isInternal())
+ tmp*= _bbcpij->getPij(rateCategor,inNode->id(),letInFather,lenInNode);
+ // if it is a leaf, and since it is ? tmp will be 1.0...
+ for (int k=0; k < inNode->getNumberOfSons() ; ++k) {
+ tmp *= _jointLval[inNode->getSon(k)->id()][lenInNode];
+ }
+ if (tmp > bestVal) {
+ bestVal = tmp;
+ bestLet = lenInNode;
+ }
+ }
+ _jointLval[inNode->id()][letInFather] = bestVal;
+ _jointCval[inNode->id()][letInFather] = bestLet;
+ }
+ }
+}
+
+
+
diff --git a/programs/fastml/bbfindBestAVDynProg.h b/programs/fastml/bbfindBestAVDynProg.h
new file mode 100644
index 0000000..b4725f2
--- /dev/null
+++ b/programs/fastml/bbfindBestAVDynProg.h
@@ -0,0 +1,44 @@
+#if !defined ___BB__FIND_BEST_AV_DYN_PROG
+#define ___BB__FIND_BEST_AV_DYN_PROG
+
+
+#include "bb_options.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "sequence.h"
+#include "tree.h"
+#include "sequenceContainer.h"
+#include "seqContainerTreeMap.h"
+
+class bbfindBestAVDynProg {
+public:
+ explicit bbfindBestAVDynProg(const tree* et,
+ const stochasticProcess *sp,
+ const sequenceContainer& sc,
+ const computePijGam* cpij);
+ virtual ~bbfindBestAVDynProg();
+
+ MDOUBLE evaluateSpecificAvDP( const int pos,
+ const vector<sequence>* ancestralSequences,
+ const int rateCategory
+ );
+
+private:
+ const tree* _et;
+ const stochasticProcess* _sp;
+ const computePijGam* _bbcpij;
+ int _alphabetSize;
+ int _pos;
+ seqContainerTreeMap * _sctm;
+ const sequenceContainer& _sc;
+
+ const vector<sequence>* _ancss;
+
+ void recursiveComputeLandC( const int pos,
+ const tree::nodeP inNode,
+ const int rateCategor);
+ VVdouble _jointLval; // inodes * letter
+ VVdouble _jointCval; // inodes * letter
+};
+
+#endif
diff --git a/programs/fastml/computeMarginalReconstruction.cpp b/programs/fastml/computeMarginalReconstruction.cpp
new file mode 100644
index 0000000..49ac34d
--- /dev/null
+++ b/programs/fastml/computeMarginalReconstruction.cpp
@@ -0,0 +1,152 @@
+#include "computeMarginalReconstruction.h"
+#include "computeUpAlg.h"
+#include "computePijComponent.h"
+
+#include "computeDownAlg.h"
+#include "computeMarginalAlg.h"
+#include "treeIt.h"
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+
+computeMarginalReconstruction::computeMarginalReconstruction(const tree& et,
+ vector<stochasticProcess>& spVec,
+ const sequenceContainer& sc) : _et(et), _spVec(spVec), _sc(sc) {
+ _resultProb.resize(_sc.seqLen());
+ _bestProb.resize(_sc.seqLen());
+ for (int i=0; i < _sc.seqLen(); ++i) {
+ _resultProb[i].resize(et.getNodesNum());
+ _bestProb[i].resize(et.getNodesNum());
+ for (int j=0; j < et.getNodesNum(); ++j) {
+ _resultProb[i][j].resize(_spVec[0].alphabetSize(),0.0);
+ }
+ }
+}
+
+
+
+void computeMarginalReconstruction::compute(const distribution * forceDistr){
+ computePijGam pi;
+ if (_spVec.size()>1) {//w codon model + gamma special case
+ pi._V.resize(forceDistr->categories());
+ for (int i=0; i < _spVec.size(); ++i)
+ pi._V[i].fillPij(_et,_spVec[i]);
+ _spVec[0].setDistribution(forceDistr);//update the first process with gamma distr
+ //for all the functions that needs no catregor and categor probabilty
+ }
+ else{
+ pi.fillPij(_et,_spVec[0]);
+ }
+
+ //pi.fillPij(_et,_sp);
+ MDOUBLE totalLikelihoodOfReconstruction = 0;
+ cout<<"doing position (marginal): ";
+ for (int pos=0; pos<_sc.seqLen(); ++pos) {
+ suffStatGlobalGamPos sscUp;// this is for a specific position.
+ suffStatGlobalGamPos sscDown;// this is for a specific position.
+ suffStatGlobalGamPos sscMarginal; // this is for a specific position.
+ sscUp.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
+ sscDown.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
+ sscMarginal.allocatePlace(_spVec[0].categories(),_et.getNodesNum(),_sc.alphabetSize());
+
+ cout<<pos+1<<" ";
+ computeUpAlg computeUpAlg1;
+ computeDownAlg computeDownAlg1;
+ computeMarginalAlg computeMarginalAlg1;
+
+ for (int cat = 0; cat < _spVec[0].categories(); ++cat) {
+ computeUpAlg1.fillComputeUp(_et,_sc,pos,pi[cat],sscUp[cat]);
+ computeDownAlg1.fillComputeDown(_et,_sc,pos,pi[cat],sscDown[cat],sscUp[cat]);
+ doubleRep posProb =0;
+ computeMarginalAlg1.fillComputeMarginal(_et,_sc,_spVec[0],pos,pi[cat],sscMarginal[cat],sscUp[cat],sscDown[cat],posProb);
+ }
+
+ MDOUBLE likelihoodOfPos = 0;
+
+ fillResultProb(sscMarginal,_spVec[0],_et,pos);
+ fillMarginalReconstruction();
+ }
+ cout<<endl;
+}
+
+void computeMarginalReconstruction::fillResultProb(
+ const suffStatGlobalGamPos& ssc,
+ const stochasticProcess & sp,
+ const tree& et,
+ const int pos){
+ treeIterTopDownConst tIt(et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ for (int i=0; i < sp.alphabetSize(); ++i) {
+ doubleRep tmp=0; // the value for this letter in this node.
+ for (int j=0; j < sp.categories(); ++j) {
+ tmp += ssc.get(j,mynode->id(),i)*sp.ratesProb(j);
+ }
+ _resultProb[pos][mynode->id()][i] = convert(tmp);
+ }
+ }
+}
+
+void computeMarginalReconstruction::fillMarginalReconstruction() {
+ _resultSec = _sc;
+ treeIterTopDownConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isLeaf()) continue;
+ // creating the place for this sequence in the resulting sequence container
+ sequence tmp("",mynode->name(),"",_resultSec.numberOfSeqs(),_sc.getAlphabet());
+ _resultSec.add(tmp);
+ fillMarginalReconstructionSpecificNode(mynode);
+ }
+}
+
+void computeMarginalReconstruction::fillMarginalReconstructionSpecificNode(tree::nodeP mynode) {
+ for (int pos=0; pos < _sc.seqLen(); ++pos) {
+ MDOUBLE bestP =-1.0;
+ int bestChar = -1;
+ for (int letter=0; letter < _spVec[0].alphabetSize(); ++letter) {
+ if (_resultProb[pos][mynode->id()][letter] > bestP) {
+ bestP = _resultProb[pos][mynode->id()][letter];
+ bestChar = letter;
+ }
+ }
+ _bestProb[pos][mynode->id()] = bestP;
+
+ // adding bestChar to the resulting sequence container.
+ string res = _sc.getAlphabet()->fromInt(bestChar);
+ int id = _resultSec.getId(mynode->name());
+ _resultSec[id].addFromString(res);
+ }
+}
+
+void computeMarginalReconstruction::outputTheMarginalProbForEachCharForEachNode(const string& outputFileName) {
+ ofstream out(outputFileName.c_str());
+ for (int pos=0; pos<_sc.seqLen(); ++pos) {
+ outputTheMarginalProbForEachCharForEachNodePos(out,pos);
+ }
+ out.close();
+}
+
+void computeMarginalReconstruction::outputTheMarginalProbForEachCharForEachNodePos(ostream& out,const int pos){//(DEFAULT = JPF, same file as above).
+ treeIterDownTopConst tIt(_et);
+ out<<"marginal probabilities at position: "<<pos+1<<endl;
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ //if (mynode->isLeaf()) continue;
+ out<<"of node: "<<mynode->name()<<": ";
+ vector<pair< MDOUBLE,string> > pres;
+ int c=0;
+ for (c=0; c < _spVec[0].alphabetSize(); ++c) {
+ pres.push_back(pair<MDOUBLE,string>(_resultProb[pos][mynode->id()][c],_sc.getAlphabet()->fromInt(c)));
+ }
+ sort(pres.begin(),pres.end());
+ for (c=pres.size()-1; c >=0 ; --c) {
+ if (pres[c].first<0.0001) continue;
+ out<<"p("<<pres[c].second;
+ out<<")="<<pres[c].first<<" ";
+ }
+ out<<endl;
+ }
+ out<<endl;
+}
+
diff --git a/programs/fastml/computeMarginalReconstruction.h b/programs/fastml/computeMarginalReconstruction.h
new file mode 100644
index 0000000..f84aaac
--- /dev/null
+++ b/programs/fastml/computeMarginalReconstruction.h
@@ -0,0 +1,39 @@
+#ifndef ___COMPUTE_MARGINAL_RECONSTRUCTION
+#define ___COMPUTE_MARGINAL_RECONSTRUCTION
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "suffStatComponent.h"
+
+class computeMarginalReconstruction {
+public:
+ explicit computeMarginalReconstruction(
+ const tree& et,
+ vector<stochasticProcess>& spVec,
+ const sequenceContainer& sc);
+
+ void compute(const distribution * forceDistr);
+ void outputTheMarginalProbForEachCharForEachNode(const string& outputFileName);
+ sequenceContainer getResultingMarginalReconstruction() const {return _resultSec;}
+private:
+ const tree& _et;
+ vector<stochasticProcess>& _spVec;
+ const sequenceContainer& _sc;
+ sequenceContainer _resultSec;
+
+ // this will be the marginal for each node, for each pos, for each letter
+ VVVdouble _resultProb; //_resultProb[pos][node][letter]
+
+ // this will be the marginal for each node, for each pos, of the best reconsturction.
+ VVdouble _bestProb; //_resultProb[pos][node]
+
+ void fillResultProb(const suffStatGlobalGamPos& ssc,const stochasticProcess & sp,const tree& et, const int pos);
+ void fillMarginalReconstruction();
+ void fillMarginalReconstructionSpecificNode(tree::nodeP mynode);
+ void outputTheMarginalProbForEachCharForEachNodePos(ostream& out,const int pos);
+
+};
+
+#endif
diff --git a/programs/fastml/fastml.cpp b/programs/fastml/fastml.cpp
new file mode 100644
index 0000000..23bc4c3
--- /dev/null
+++ b/programs/fastml/fastml.cpp
@@ -0,0 +1,361 @@
+#include "mainbb.h"
+#include "logFile.h"
+
+
+int main(int argc, char* argv[]) {
+ myLog::setLog("",10);
+ mainbb mainbb1(argc,argv);
+ return 0;
+}
+
+/*
+//------------------------------------------------
+
+
+#include "bbAlg.h"
+#include "sequenceDataDiff.h"
+sequenceContainer main1(const string& seqFile,
+ char format,
+ const string& treeFile,
+ const string& reportFileName,
+ const string& ancestralSequencesFileName,
+ const MDOUBLE alpha,
+ const int categor,
+ time_t& timeTaken,
+ clock_t& ctimeTaken,
+ const MDOUBLE recalculateExactVal); //0 never recalculate...
+
+int veryMainLysSmallCheck() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\seq\\lys6\\junk\\seqF1.txt";
+ const string treeFile1 = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
+ const string treeFile2 = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
+ const string reportFileHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\seq\\lys6\\junk\\tmp\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\seq\\lys6\\junk\\tmp\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+clock_t ctime1;
+clock_t ctime2;
+
+ sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.924884,4,time1,ctime1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+int veryMainLys() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\lys71.ngap.mase";
+ const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\treehom.txt";
+ const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\treegam.txt";
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\lys71\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ clock_t ctime1;
+ clock_t ctime2;
+ sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.924884,4,time1,ctime1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+int veryMainCo1() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\co1.ngap.aln";
+ const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\treehom.txt";
+ const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\treegam.txt";
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co1\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ clock_t ctime1;
+ clock_t ctime2;
+ sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileGam,ancstralSeqGam,0.257432,4,time1,ctime1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+int veryMainCo2() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\co2ngap.aln";
+ const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\treehom.txt";
+ const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\treegam.txt";
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\co2\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ clock_t ctime1;
+ clock_t ctime2;
+ sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileGam,ancstralSeqGam,0.476490,4,time1,ctime1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+int veryMainOpsin() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\opsin.mase";
+ const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\treehom.txt";
+ const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\treegam.txt";
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\opsin\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ clock_t ctime1;
+ clock_t ctime2;
+ sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,0.331405,4,time1,ctime1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,ctime2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+
+int veryMainSteroid() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\noGaps.mase";
+ const string treeFile1 = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\treehom.txt";
+ const string treeFile2 = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\treegam.txt";
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\reportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\ancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\ancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ sequenceContainer sd1 = main1(seqFile,'m',treeFile1,reportFileGam,ancstralSeqGam,1.534586,4,time1,0); // gam
+ sequenceContainer sd2 = main1(seqFile,'m',treeFile2,reportFileHom,ancstralSeqHom,-3,1,time2,0); // hom
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time2<<endl;
+ out<<" time taken for gam was: "<<time1<<endl;
+ out.close();
+ return 0;
+}
+
+
+int veryMainSteroid() {// the non command line version for debugging and checking.
+ const string seqFile = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\dataPreperation\\B4remGap\\ster73.snames.correct.ngap.aln";
+ const string treeFile1 ="C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\buildingTree\\topologyHom.ph";
+ const string treeFile2 ="C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\buildingTree\\topologyGam.ph";
+
+
+
+ const string reportFileHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileHom.txt";
+ const string reportFileGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileGam.txt";
+ const string reportFileDiffAndTime = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NreportFileDif.txt";
+ const string ancstralSeqGam = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NancstralSeqGam.txt";
+ const string ancstralSeqHom = "C:\\tal\\activeProjects\\ancbb\\seq\\steroid\\NancstralSeqHom.txt";
+ time_t time1;
+ time_t time2;
+ clock_t ctime1;
+ clock_t ctime2;
+ sequenceContainer sd1 = main1(seqFile,'a',treeFile1,reportFileHom,ancstralSeqHom,-600,1,time1,ctime1,0); // hom
+ sequenceContainer sd2 = main1(seqFile,'a',treeFile2,reportFileGam,ancstralSeqGam,1.29,4,time2,ctime2,0); // gam
+ sequenceDataDiff sequenceDataDiff1f(&sd1,&sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out<<" time taken for hom was: "<<time1<<endl;
+ out<<" time taken for gam was: "<<time2<<endl;
+ out<<" ctime taken for hom was: "<<ctime1<<endl;
+ out<<" ctime taken for gam was: "<<ctime2<<endl;
+ out.close();
+ return 0;
+}
+
+MDOUBLE totalBranchLengh(const tree& t1) {
+ MDOUBLE sum=0;
+ vector<tree::nodeP> vec;
+ t1.getAllNodes(vec,t1.getRoot());
+ for (int i=0; i< vec.size(); ++i) {
+ if (vec[i]->father != NULL) sum += vec[i]->dis2father();
+ cerr<<sum<<" "<<vec[i]->dis2father()<<endl;
+ }
+ return sum;
+}
+
+
+
+
+*/
+#include "sequenceDataDiff.h"
+#include "amino.h"
+#include <ctime>
+#include "recognizeFormat.h"
+#include "uniDistribution.h"
+#include "gammaDistribution.h"
+#include "replacementModel.h"
+#include "readDatMatrix.h"
+#include "chebyshevAccelerator.h"
+#include "bbAlg.h"
+/*
+sequenceContainer main1(const string& seqFile,
+ char format,
+ const string& treeFile,
+ const string& reportFileName,
+ const string& ancestralSequencesFileName,
+ const MDOUBLE alpha,
+ const int categor,
+ time_t& timeTaken,
+ clock_t& ctimeTaken,
+ const MDOUBLE recalculateExactVal) { // gamma distribution
+
+ alphabet* _alph = new amino;
+ ifstream f(seqFile.c_str());
+ sequenceContainer original = recognizeFormat::read(f,_alph);;
+ tree t1(treeFile); // with sequence data
+// t1.multipleAllBranchesByFactor(10);
+ // stochastic process:
+
+// cerr<<" total br-len is:"<<totalBranchLengh(t1)<<endl;
+// return *sd;
+
+
+ distribution *dist1 = NULL;
+ if (categor ==1 ) dist1 = new uniDistribution;
+ else dist1 = new gammaDistribution(alpha,categor);
+
+ replacementModel *probMod=new pupAll(datMatrixHolder::jones);
+ pijAccelerator *pijAcc1 = new chebyshevAccelerator(probMod);
+
+// replacementModel *probMod1=new nucJC;
+// replacementModel *probMod1=new pupJTT;
+// pijAccelerator *pijAcc1= new chebyshevAccelerator(probMod1);
+// pijAccelerator *pijAcc1= new trivialAccelerator(probMod1);
+ stochasticProcess* _s1 = new stochasticProcess(dist1, pijAcc1);
+ bbAlg bbAlg1(t1,*_s1,original,bbAlg::both,reportFileName,recalculateExactVal);//computeAgainExactTreshold
+// bbAlg bbAlg1(&t1,_s1,bbAlg::sum,0);//computeAgainExactTreshold
+// bbAlg bbAlg1(&t1,_s1,bbAlg::max,0);//computeAgainExactTreshold
+ time_t time1,time2;
+ clock_t ctime1, ctime2;
+ time(&time1);
+ ctime1 = clock();
+ cerr<<"starting time is: "<<time1<<endl;
+ cerr<<"starting clock is: "<<ctime1<<endl;
+ MDOUBLE res = bbAlg1.bbReconstructAllPositions(original);
+ time(&time2);
+ ctime2 = clock();
+ cerr<<"ending time is: "<<time2<<endl;
+ cerr<<"ending clock is: "<<ctime2<<endl;
+ timeTaken=time2-time1;
+ ctimeTaken=ctime2-ctime1;
+
+ ofstream outi;
+ outi.open(reportFileName.c_str(),ios::app);
+ outi<<" the likelihood of the reconstruction is:"<<res<<endl;
+ outi.close();
+ sequenceContainer recS= bbAlg1.fromAncestralSequenceToSeqData();
+
+ delete pijAcc1;
+ delete dist1;
+ return recS;
+}
+*/
+/*
+int mainNoCommandLine() {
+
+// veryMainLysSmallCheck(); // just to check that everything is working...
+// veryMainLys();
+// veryMainCo1();
+// veryMainCo2();
+// veryMainOpsin();
+ veryMainSteroid();
+ return 0;
+}
+// const string seqFile = "C:\\tal\\seq\\lys6\\junk\\seq.txt";
+// const string treeFile = "C:\\tal\\seq\\lys6\\junk\\tree.txt";
+// const string seqFile = "C:\\tal\\seq\\lys6\\seq.txt";
+// const string treeFile = "C:\\tal\\seq\\lys6\\tree.txt";
+// main1(seqFile,treeFile,-3,1,time1);// hom
+
+*/
+
+//int main() {
+int FindDifferencesBetween2SequenceContainerFiles() {
+ const string seqFile1 = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\seq_joint.txt";
+ const string seqFile2 = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\seq_marginal.txt";
+ const string reportFileDiffAndTime = "D:\\tal\\yaep15\\fastml2.01\\originalDataForPaper\\reportFileDif.txt";
+
+ alphabet* _alph = new amino;
+ ifstream f(seqFile1.c_str());
+ sequenceContainer sd1 = recognizeFormat::read(f,_alph);
+ f.close();
+
+ ifstream f2(seqFile2.c_str());
+ sequenceContainer sd2 = recognizeFormat::read(f2,_alph);
+ f2.close();
+
+ sequenceDataDiff sequenceDataDiff1f(sd1,sd2);
+ sequenceDataDiff1f.computeDifferences();
+ ofstream outdiff(reportFileDiffAndTime.c_str(),ios::app);
+ sequenceDataDiff1f.printDiff(outdiff);
+ outdiff.close();
+ ofstream out;
+ out.open(reportFileDiffAndTime.c_str(),ios::app);
+ out.close();
+ return 0;
+}
\ No newline at end of file
diff --git a/programs/fastml/jointNoGamma.cpp b/programs/fastml/jointNoGamma.cpp
new file mode 100644
index 0000000..7c6cefa
--- /dev/null
+++ b/programs/fastml/jointNoGamma.cpp
@@ -0,0 +1,140 @@
+#include "jointNoGamma.h"
+#include "treeIt.h"
+#include "seqContainerTreeMap.h"
+#include <fstream>
+#include <cmath>
+using namespace std;
+
+jointNoGamma::jointNoGamma(const tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc)
+ : _et(et), _sp(sp), _sc(sc) {
+ _cpih.fillPij(_et,_sp);
+}
+
+void jointNoGamma::compute() {
+
+ suffStatGlobalHomPos ssc;
+ suffStatGlobalHomPosJointNoGamma sscJointNoGam;
+ ssc.allocatePlace(_et.getNodesNum(),_sc.alphabetSize());
+ sscJointNoGam.allocatePlace(_et.getNodesNum(),_sc.alphabetSize());
+
+ vector<string> ancestralSequences(_et.getNodesNum());
+ MDOUBLE totalLikelihoodOfReconstruction = 0;
+ cout<<"doing position (joint): ";
+ for (int pos=0; pos<_sc.seqLen(); ++pos) {
+ cout<<pos+1<<" ";
+ fillComputeUp(pos,ssc,sscJointNoGam);
+ doubleRep likelihoodOfPos = 0;
+
+ vector<int> res =computeJointAncestralFromSSC(pos,ssc,sscJointNoGam,likelihoodOfPos);
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isInternal()) {
+ ancestralSequences[mynode->id()]+=_sc.getAlphabet()->fromInt(res[mynode->id()]);
+ }
+ }
+ _jointLikelihoodOfPositions.push_back(likelihoodOfPos);
+ }
+ cout<<endl;
+ fromJointReconstructionToSequenceContainer(ancestralSequences);
+}
+
+void jointNoGamma::fillComputeUp(const int pos,
+ suffStatGlobalHomPos& ssc,
+ suffStatGlobalHomPosJointNoGamma& sscJointNoGam) {
+ seqContainerTreeMap sctm(_sc,_et);
+ ssc.allocatePlace(_et.getNodesNum(),_cpih.alphabetSize());
+ treeIterDownTopConst tIt(_et);
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isLeaf()) {// leaf
+ for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) {
+ const int seqID = sctm.seqIdOfNodeI(mynode->id());
+ MDOUBLE totalVal = 0.0;
+ for (int let=0; let<_cpih.alphabetSize();let++) {
+ MDOUBLE val = _sc.getAlphabet()->relations(_sc[seqID][pos],let);
+ if (val>0) {
+ val*=_cpih.getPij(mynode->id(),letterInFather,let);
+ totalVal +=val;
+ }
+ }
+ //cerr<<"val =" << val <<" "; // REMOVE!
+ //cerr<<"_pi->data(mynode->id(),pos)= "<<_pi->data(mynode->id(),pos)<<" ";//REMOVE
+ ssc.set(mynode->id(),letterInFather,totalVal);
+ sscJointNoGam.set(mynode->id(),letterInFather,_sc[seqID][pos]);
+ }
+ }
+ else {
+ for(int letterInFather=0; letterInFather<_cpih.alphabetSize();letterInFather++) {
+ doubleRep maxProb=0.0;
+ int bestLet = -1;
+ for (int let=0; let<_cpih.alphabetSize();++let) {
+ doubleRep tmpProb = 1;
+ if (mynode->isRoot() == false) {
+ tmpProb *= _cpih.getPij(mynode->id(),letterInFather,let);
+ }
+ for(int i=0; i < mynode->getNumberOfSons();++i){
+ tmpProb *= ssc.get(mynode->getSon(i)->id(),let);
+ }
+ if (tmpProb>maxProb) {
+ maxProb = tmpProb;
+ bestLet = let;
+ }
+ }
+ ssc.set(mynode->id(),letterInFather,maxProb);
+ assert(bestLet>=0);
+ assert(bestLet<_cpih.alphabetSize());
+
+ sscJointNoGam.set(mynode->id(),letterInFather,bestLet);
+ if (mynode->isRoot()) break; // there's no meening to letterInFather in case of root.
+ }
+ }
+ }
+}
+
+vector<int> jointNoGamma::computeJointAncestralFromSSC(
+ const int pos,
+ const suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPosJointNoGamma& sscFASTML,
+ doubleRep & likelihoodOfReconstruction) {
+ treeIterTopDownConst tIt(_et);
+ vector<int> res(_et.getNodesNum());
+ for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
+ if (mynode->isRoot() == false) {
+ int letterInFather = res[mynode->father()->id()];
+ int tmp = sscFASTML.get(mynode->id(),letterInFather);
+ res[mynode->id()] = tmp;
+ } else {//special case of the root
+ MDOUBLE maxL = VERYSMALL;
+ int bestCharInRoot = sscFASTML.get(mynode->id(),0);
+ likelihoodOfReconstruction = ssc.get(mynode->id(),0)*_sp.freq(bestCharInRoot);;
+ res[mynode->id()] = bestCharInRoot;
+ }
+ }
+ return res;
+}
+
+void jointNoGamma::fromJointReconstructionToSequenceContainer(const vector<string> & ancestralSequences){
+ _resultSec = _sc;
+ treeIterDownTopConst tIt2(_et);
+ for (tree::nodeP mynode = tIt2.first(); mynode != tIt2.end(); mynode = tIt2.next()) {
+ if (mynode->isInternal()) {
+ sequence tmp(ancestralSequences[mynode->id()],mynode->name(),"joint reconstruction",_resultSec.numberOfSeqs(),_sc.getAlphabet());
+ _resultSec.add(tmp);
+ }
+ }
+}
+
+void jointNoGamma::outputTheJointProbAtEachSite(const string & outputFileProbJoint) {
+ ofstream jointProbOutput(outputFileProbJoint.c_str());
+ MDOUBLE totalLogLikelihood =0;
+ for (int j=0; j < _jointLikelihoodOfPositions.size(); ++j) {
+ totalLogLikelihood+=log(_jointLikelihoodOfPositions[j]);
+ jointProbOutput<<"Joint log likelihood of position "<<j+1;// j+1 so that positions start from 1, and not from 0.
+ jointProbOutput<<": "<<log(_jointLikelihoodOfPositions[j])<<endl;
+ }
+ jointProbOutput<<"total log likelihood of joint reconstruction: "<<totalLogLikelihood<<endl;
+ jointProbOutput.close();
+}
+
+
diff --git a/programs/fastml/jointNoGamma.h b/programs/fastml/jointNoGamma.h
new file mode 100644
index 0000000..1ccebca
--- /dev/null
+++ b/programs/fastml/jointNoGamma.h
@@ -0,0 +1,44 @@
+#ifndef ___JOINT_NO_GAMMA
+#define ___JOINT_NO_GAMMA
+
+#include "definitions.h"
+#include "tree.h"
+#include "stochasticProcess.h"
+#include "sequenceContainer.h"
+#include "computePijComponent.h"
+#include "suffStatComponent.h"
+#include "suffStatComponentJointNoGamma.h"
+
+class jointNoGamma {
+public:
+ explicit jointNoGamma(
+ const tree& et,
+ const stochasticProcess& sp,
+ const sequenceContainer& sc);
+
+ void compute();
+ void outputTheJointProbAtEachSite(const string & outputFileProbJoint);
+ sequenceContainer getTheJointReconstruction() const {return _resultSec;}
+
+private:
+ void fillComputeUp(const int pos,
+ suffStatGlobalHomPos& ssc,
+ suffStatGlobalHomPosJointNoGamma& sscJointNoGam);
+ vector<int> computeJointAncestralFromSSC(
+ const int pos,
+ const suffStatGlobalHomPos& ssc,
+ const suffStatGlobalHomPosJointNoGamma& sscFASTML,
+ doubleRep & likelihoodOfReconstruction);
+ void fromJointReconstructionToSequenceContainer(const vector<string> & ancestralSequences);
+
+ const tree& _et;
+ const stochasticProcess& _sp;
+ const sequenceContainer& _sc;
+ sequenceContainer _resultSec;
+ computePijHom _cpih;
+ vector<doubleRep> _jointLikelihoodOfPositions;
+};
+
+
+
+#endif
diff --git a/programs/fastml/mainbb.cpp b/programs/fastml/mainbb.cpp
new file mode 100644
index 0000000..48246c6
--- /dev/null
+++ b/programs/fastml/mainbb.cpp
@@ -0,0 +1,562 @@
+#include "mainbb.h"
+
+#include "aaJC.h"
+#include "amino.h"
+#include "bbAlg.h"
+#include "bestAlpha.h"
+#include "bblEM.h"
+#include "chebyshevAccelerator.h"
+#include "clustalFormat.h"
+#include "computeMarginalReconstruction.h"
+#include "distanceTable.h"
+#include "fastaFormat.h"
+#include "gammaDistribution.h"
+#include "jointNoGamma.h"
+#include "likeDist.h"
+#include "logFile.h"
+#include "maseFormat.h"
+#include "molphyFormat.h"
+#include "nexusFormat.h"
+#include "nucleotide.h"
+#include "nucJC.h"
+#include "gtrModel.h"
+#include "nj.h"
+#include "phylipFormat.h"
+#include "readDatMatrix.h"
+#include "recognizeFormat.h"
+#include "trivialAccelerator.h"
+#include "uniDistribution.h"
+
+//For the codon part
+#include "bestAlphaAndK.h"
+#include "codonUtils.h"
+
+
+#include <fstream>
+#include <iostream>
+using namespace std;
+
+mainbb::mainbb(int argc, char* argv[]) {
+ fillOptionsParameters(argc,argv);
+ myLog::setLog(_options->reportFile,10);
+ printBBProjectInfo();
+ printSearchParameters();
+ getStartingSequenceData();
+ getStartingStochasticProcess();
+ getStartingEvolTreeTopology();
+ _et.rootToUnrootedTree();
+ //_et.createFlatLengthMatrix(0.001); // TO BE USED FOR TESTING ONLY.
+ if (_options->modelName == bb_options::nyCodon)
+ getStartingBLAndModelParam(); //for NY codon Models
+ else
+ getStartingBranchLengthsAndAlpha();
+ printOutputTree();
+ if (_options->doJoint) {
+ if (_options->distributionName == bb_options::gam) {
+ findAncestralSequencesGammaJoint();
+ } else {
+ findAncestralSequencesHomJoint();
+ }
+ }
+ else{
+ getMarginalReconstruction();
+ }
+ myLog::endLog();
+}
+
+void mainbb::printAncestralSequencesGammaJoint() {
+ replaceSequences(_resulutingJointReconstruction,_originSc);
+ ofstream out(_options->outFile_seq_joint.c_str());
+ //out<<"sequences of the joint reconstruction, model: "<<_options->modelNameStr()<<endl;
+ switch (_options->seqOutputFormat){
+ case (bb_options::mase) : maseFormat::write(out,_resulutingJointReconstruction); break;
+ case (bb_options::fasta) : fastaFormat::write(out,_resulutingJointReconstruction); break;
+ case (bb_options::clustal): clustalFormat::write(out,_resulutingJointReconstruction); break;
+ case (bb_options::phylip) : phylipFormat::write(out,_resulutingJointReconstruction); break;
+ case (bb_options::molphy) : molphyFormat::write(out,_resulutingJointReconstruction); break;
+ case (bb_options::nexus) : nexusFormat::write(out,_resulutingJointReconstruction); break;
+ }
+ out.close();
+}
+
+mainbb::~mainbb() {
+ if (_alph) delete _alph;
+ if (_options) delete _options;
+}
+
+void mainbb::getStartingEvolTreeTopology(){
+ if (_options->treefile=="") {
+ getStartingNJtreeNjMLdis();
+ }
+ else getStartingTreeFromTreeFile();
+}
+
+
+
+void mainbb::getStartingNJtreeNjMLdis() {
+ // note that here ALWAYS, the ML distances are computed using
+ // an homogenous rate distribution.
+ uniDistribution lUni;
+// const pijAccelerator* lpijAcc = _sp->getPijAccelerator();// note this is just a copy of the pointer.
+ const pijAccelerator* lpijAcc = _spVec[0].getPijAccelerator();// note this is just a copy of the pointer.
+ stochasticProcess lsp(&lUni,lpijAcc);
+
+ likeDist pd1(lsp,0.01);
+ VVdouble disTab;
+ vector<string> vNames;
+ giveDistanceTable(&pd1,
+ _sc,
+ disTab,
+ vNames);
+ getStartingTreeNJ_fromDistances(disTab,vNames);
+}
+
+void mainbb::getStartingTreeNJ_fromDistances(const VVdouble& disTab,
+ const vector<string>& vNames) {
+ NJalg nj1;
+ _et= nj1.computeTree(disTab,vNames);
+
+}
+
+void mainbb::getStartingTreeFromTreeFile(){
+ _et= tree(_options->treefile);
+ if (!_et.withBranchLength()) {
+ _et.createFlatLengthMatrix(0.05);
+ _options->optimizeBrLenOnStartingTree = true;
+ }
+}
+
+void mainbb::getStartingBranchLengthsAndAlpha(){
+ if (_options->distributionName == bb_options::hom) {
+ if (_options->optimizeBrLenOnStartingTree == true) {
+ cout<<"Optimizing branch lengths (Homogenuos model)..."<<endl;
+ bblEM bblem1(_et,_sc,_spVec[0],NULL);
+ //bblEM bblem1(_et,_sc,*_sp,NULL);
+ //brLenOptEM::optimizeBranchLength1G_EM(_et,_sc,*_sp,NULL);
+ }
+ }
+ else { // GAMMA MODEL!
+ // Here we want to optimize branch lengths with a gamma model.
+ // there are three options:
+ //(1) User provides the alpha and no bbl.
+ //(2) User provides the alpha and bbl
+ //(3) Alpha is optimized from the data and bbl.
+
+
+ // option 1 will not enter to any of these options.
+ if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) {
+ cout<<"Optimizing branch lengths (Gamma model, user alpha)..."<<endl;
+ MDOUBLE intitalAlpha = 1.0;
+ static_cast<gammaDistribution*>(_spVec[0].distr())->setAlpha(intitalAlpha);
+ bblEM bblem1(_et,_sc,_spVec[0],NULL);
+ //static_cast<gammaDistribution*>(_sp->distr())->setAlpha(intitalAlpha);
+ //bblEM bblem1(_et,_sc,*_sp,NULL);
+ //brLenOptEM::optimizeBranchLength1G_EM(_et,_sc,*_sp,NULL);
+ }
+ else if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) {
+ return;
+ }
+ else if (_options->userProvideAlpha == false) {
+ cout<<"Optimizing branch lengths and alpha (Gamma model) ..."<<endl;
+ bestAlphaAndBBL bbl2(_et,_sc,_spVec[0]);
+ }
+ }
+}
+
+void mainbb::getStartingStochasticProcess() {
+ int numberOfCategories = _options->gammaCategies;
+ MDOUBLE alpha = _options->gammaPar;
+ if (_options->distributionName == bb_options::hom) {
+ numberOfCategories = 1; // forcing homogenous model.
+ alpha = 1.0;
+ cout<<"Using homogenous model (no among site rate variation)"<<endl;
+ } else {
+ cout<<"Using a Gamma model with: "<<numberOfCategories<<" discrete categories "<<endl;
+ }
+ distribution *dist = new gammaDistribution(alpha,numberOfCategories);
+ replacementModel *probMod=NULL;
+ pijAccelerator *pijAcc=NULL;
+ switch (_options->modelName){
+ case (bb_options::day):
+ probMod=new pupAll(datMatrixHolder::dayhoff);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is Dayhoff"<<endl;
+ break;
+ case (bb_options::jtt):
+ probMod=new pupAll(datMatrixHolder::jones);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is JTT"<<endl;
+ break;
+ case (bb_options::lg):
+ probMod=new pupAll(datMatrixHolder::lg);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is LG"<<endl;
+ break;
+ case (bb_options::rev):
+ probMod=new pupAll(datMatrixHolder::mtREV24);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is mtREV24"<<endl;
+ break;
+ case (bb_options::wag):
+ probMod=new pupAll(datMatrixHolder::wag);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is WAG"<<endl;
+ break;
+ case (bb_options::cprev):
+ probMod=new pupAll(datMatrixHolder::cpREV45);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Amino acid replacement matrix is cpREV45"<<endl;
+ break;
+ case (bb_options::empiriCodon):
+ probMod=new pupAll(datMatrixHolder::empiriCodon,61);
+ if (_options->useChebyshev == true) {
+ pijAcc = new chebyshevAccelerator(probMod,61);
+ } else {
+ pijAcc = new trivialAccelerator(probMod);
+ }
+ cout<<"Codon replacement matrix is empiriCodon of adrian"<<endl;
+ break;
+ case (bb_options::nucjc):
+ probMod=new nucJC;
+ pijAcc = new trivialAccelerator(probMod);
+ cout<<"Nucleotide substitution model is Jukes and Cantor"<<endl;
+ break;
+ case (bb_options::nucgtr):
+ {
+ nucleotide nucAlph;
+ Vdouble freq = computeGTRFreq(nucAlph);
+ probMod=new gtrModel(freq, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25);
+ pijAcc = new trivialAccelerator(probMod);
+ cout<<"Nucleotide substitution model is General time Reversible"<<endl;
+ }
+ break;
+ case (bb_options::aajc):
+ probMod=new aaJC; pijAcc = new trivialAccelerator(probMod);
+ cout<<"Amino acid replacement matrix is Jukes and Cantor"<<endl;
+ break;
+ //this part for the codon model c & w init as with no selection
+ case (bb_options::nyCodon):
+ {
+ codon codonAlph;
+ Vdouble freq = computeFreq(codonAlph);
+ probMod = new wYangModel(1.0,1.0,freq, 0, &codonAlph);
+ pijAcc = new trivialAccelerator(probMod);
+ cout<<"Codon replacement matrix is NY model"<<endl;
+ }
+ break;
+ default:
+ errorMsg::reportError("this probablistic model is not yet available");
+ }
+ stochasticProcess sp(dist, pijAcc);
+ _spVec.push_back(sp);
+ if (probMod) delete probMod;
+ if (pijAcc) delete pijAcc;
+ if (dist) delete dist;
+}
+
+void mainbb::printOutputTree() {
+ ofstream f;
+ string fileName1=_options->outTreeFileNewick;
+ f.open(fileName1.c_str());
+ _et.output(f,tree::PHYLIP,true);
+ //_et.output(f,tree::PHYLIP,false);
+ f.close();
+ cout<<"The tree in 'Newick tree format' (with the internal nodes labeled)\nwas written to a file name called "<<fileName1<<endl;
+ fileName1 = _options->outTreeFileAncestor;
+ f.open(fileName1.c_str());
+ _et.output(f,tree::ANCESTOR);
+ f.close();
+ cout<<"The tree in 'ANCESTOR tree format' was written to a file name called "<<fileName1<<endl;
+}
+
+void mainbb::fillOptionsParameters(int argc, char* argv[]) {
+ _options = new bb_options(argc, argv);
+}
+
+void mainbb::getStartingSequenceData(){
+ if (_options->alphabet_size==4) _alph = new nucleotide;
+ else if (_options->alphabet_size == 20) _alph = new amino;
+ else if (_options->alphabet_size == 61) _alph = new codon;
+ else errorMsg::reportError("no such alphabet in function rate4site::getStartingSequenceData");
+
+ ifstream fstream1(_options->seqfile.c_str());
+ _sc = recognizeFormat::read(fstream1,_alph);
+ _originSc = _sc;
+ _sc.changeGaps2MissingData();
+}
+
+void mainbb::printSearchParameters() {
+ if (_options->verbose) {
+ LOG(1,<<"\nBB parameters: "<<endl);
+ LOG(1,<<endl);
+ LOG(1,<<"-------------------------------------------------------------------------------"<<endl);
+ LOG(1,<<endl);
+ if (_options->treefile.size()>0) {LOG(1,<<"Tree file is: "<<_options->treefile<<endl)}
+ else LOG(1,<<"Starting tree is the NJ tree "<<endl);
+ if (_options->seqfile.size()>0) LOG(1,<<"Sequence file is: "<<_options->seqfile<<endl);
+ }
+}
+
+void mainbb::printBBProjectInfo() {
+ LOG(1,<<"*******************************************************************************"<<endl);
+ LOG(1,<<"B&B: A Branch and Bound algorithm for Ancestral Sequence Reconstruction. "<<endl);
+ LOG(1,<<"For information, please send email to Tal Pupko: talp at post.tau.ac.il "<<endl);
+ LOG(1,<<"Ref: Pupko, T., Pe'er, I., Graur, D. Hasegawa, M., and Friedman N. 2002. "<<endl);
+ LOG(1,<<"A branch-and-bound algorithm for the inference of ancestral amino-acid "<<endl);
+ LOG(1,<<"sequences when the replacement rate varies among sites: Application to the "<<endl);
+ LOG(1,<<"evolution of five gene families. Bioinformatics 18: 1116-1123. "<<endl);
+ LOG(1,<<"*******************************************************************************"<<endl);
+ LOG(1,<<endl);
+}
+
+void mainbb::findAncestralSequencesGammaJoint() {
+ bbAlg::boundMethod bm;
+ if (_options->boundMethod == bb_options::max) bm=bbAlg::max;
+ else if (_options->boundMethod == bb_options::sum) bm=bbAlg::sum;
+ else if (_options->boundMethod == bb_options::both) bm=bbAlg::both;
+
+ bbAlg bbAlg1(_et,_spVec,_sc,bm,_options->reportFile,_options->computeAgainExactTreshold,_forceDistr);
+ cout<<"after bbAlg in findAncestralSequencesGammaJoint()"<<endl;
+ //bbAlg bbAlg1(_et,*_sp,_sc,bm,_options->reportFile,_options->computeAgainExactTreshold);
+ MDOUBLE res = bbAlg1.bbReconstructAllPositions(_resulutingJointReconstruction);
+ cout<<" the likelihood of this reconstruction is: "<<res<<endl;
+ bbAlg1.outputTheJointProbAtEachSite(_options->outFile_prob_joint);
+ printAncestralSequencesGammaJoint();
+}
+
+void mainbb::findAncestralSequencesHomJoint() {
+ //jointNoGamma jng(_et,*_sp,_sc);
+ jointNoGamma jng(_et,_spVec[0],_sc);
+ jng.compute();
+ jng.outputTheJointProbAtEachSite(_options->outFile_prob_joint);
+ sequenceContainer withAncestral = jng.getTheJointReconstruction();
+ replaceSequences(withAncestral,_originSc);
+ ofstream jointNoGammaReconstructionOutputFile(_options->outFile_seq_joint.c_str());
+ //jointNoGammaReconstructionOutputFile<<"sequences of the joint reconstruction, model (hom): "<<_options->modelNameStr()<<endl;
+ switch (_options->seqOutputFormat) {
+ case bb_options::mase:
+ maseFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::molphy:
+ molphyFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::clustal:
+ clustalFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::fasta:
+ fastaFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::phylip:
+ phylipFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::nexus:
+ nexusFormat::write(jointNoGammaReconstructionOutputFile,withAncestral);
+ break;
+ default: errorMsg::reportError(" format not implemented yet in this version... ",1);
+ }
+}
+
+
+void mainbb::getMarginalReconstruction(){
+ //computeMarginalReconstruction cmr(_et,*_sp,_sc);
+ computeMarginalReconstruction cmr(_et,_spVec,_sc);
+ cmr.compute(_forceDistr);
+ //cmr.compute();
+ cmr.outputTheMarginalProbForEachCharForEachNode(_options->outFile_prob_marginal);
+ sequenceContainer withAncestral = cmr.getResultingMarginalReconstruction();
+ replaceSequences(withAncestral,_originSc);
+ ofstream marginalReconstructionOutputFile(_options->outFile_seq_marginal.c_str());
+ marginalReconstructionOutputFile<<"sequences of the marginal reconstruction, model: "<<_options->modelNameStr()<<endl;
+ switch (_options->seqOutputFormat) {
+ case bb_options::mase:
+ maseFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::molphy:
+ molphyFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::clustal:
+ clustalFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::fasta:
+ fastaFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::phylip:
+ phylipFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ case bb_options::nexus:
+ nexusFormat::write(marginalReconstructionOutputFile,withAncestral);
+ break;
+ default: errorMsg::reportError(" format not implemented yet in this version... ",1);
+ }
+ marginalReconstructionOutputFile.close();
+}
+
+
+//This part for NY codon model
+//for optomize the w yang model under gamma model and BBL
+ void mainbb::getStartingBLAndModelParam()
+ {
+ // GAMMA MODEL FOR W Yang Model
+ // Here we want to optimize branch lengths with a gamma model.
+ // there are three options:
+ //(1) User provides the alpha and no bbl.
+ //(2) User provides the alpha and bbl
+ //(3) Alpha is optimized from the data and bbl.
+ cout<<"Optimization of NY model with gamma - M5 in PAML"<<endl<<endl;
+ createStochasticProcessVec();
+ if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == true)) {
+ cout<<"Optimizing branch lengths & parametrs model: beta + k (Gamma model, user alpha)..."<<endl;
+ optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,true,true,false,false,false,true,false,3,3,0.01,0.01,0.1,20,20);
+ }
+
+ else if ((_options->userProvideAlpha == true) && (_options->optimizeBrLenOnStartingTree == false)) {
+ cout<<"Optimizing parametrs model: k + beta (Gamma model, user alpha, user branch lengths)..."<<endl;
+ optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,0,1,0,0,0,1,0);
+
+ }
+ else if (_options->userProvideAlpha == false) {
+ cout<<"Optimizing branch lengths and model parametrs alpha + beta +k (Gamma model) ... "<<endl;
+ optimizeSelectonParameters bestParams(_et,_sc,_spVec,_forceDistr,1,1,0,0,0,0,0);
+ }
+ }
+
+
+ void mainbb::createStochasticProcessVec()
+ {
+ wYangModel * baseModel = static_cast<wYangModel*>(_spVec[0].getPijAccelerator()->getReplacementModel());
+ wYangModel tmp(*baseModel);
+ _forceDistr = new generalGammaDistribution(_options->gammaPar,_options->gammaPar,_options->gammaCategies);
+ _spVec.resize(_forceDistr->categories());
+ uniDistribution dist;
+ for (int categor=0; categor<_forceDistr->categories();categor++){
+ wYangModel tmpModel(tmp);
+ tmpModel.setW(_forceDistr->rates(categor));
+ trivialAccelerator pijAcc(&tmpModel);
+ stochasticProcess tmpSp(&dist,&pijAcc);
+ _spVec[categor] = tmpSp;
+ }
+ normalizeMatrices(_spVec,_forceDistr);
+
+ }
+
+ Vdouble mainbb::computeFreq(codon &codonAlph){
+ Vdouble pi;
+ nucleotide alph;
+ sequenceContainer nucSc;
+ ifstream in(_options->seqfile.c_str());
+ nucSc = recognizeFormat::readUnAligned(in, &alph);
+ nucSc.changeGaps2MissingData();
+ in.close();
+ pi = freqCodonF3x4(nucSc,&codonAlph);
+ makeSureNoZeroFreqs(pi);
+ return pi;
+}
+
+ Vdouble mainbb::computeGTRFreq(nucleotide &nucAlph){
+ Vdouble pi;
+ nucleotide alph;
+ sequenceContainer nucSc;
+ ifstream in(_options->seqfile.c_str());
+ nucSc = recognizeFormat::readUnAligned(in, &alph);
+ nucSc.changeGaps2MissingData();
+ in.close();
+ pi = freqGTR(nucSc,&nucAlph);
+ makeSureNoZeroFreqs(pi);
+ return pi;
+
+}
+Vdouble mainbb::freqGTR(const sequenceContainer &nucSc, nucleotide * nucAlph){
+ Vdouble freqGTR(nucAlph->size(),0.0);
+ int pos= 0;
+ int nPos = 0;
+
+// freqGTR.resize(nucSc.alphabetSize(),0.0);
+
+ sequenceContainer::constTaxaIterator tIt;
+ sequenceContainer::constTaxaIterator tItEnd;
+ tIt.begin(nucSc);
+ tItEnd.end(nucSc);
+ while (tIt!= tItEnd) {
+ pos = 0;
+ sequence::constIterator sIt;
+ sequence::constIterator sItEnd;
+ sIt.begin(*tIt);
+ sItEnd.end(*tIt);
+ while (sIt != sItEnd) {
+ if ((*sIt >= 0) && (*sIt <freqGTR.size())) ++freqGTR[(*sIt)];
+ if (*sIt == 4) ++freqGTR[3]; //for T (4) to U (3)
+ ++sIt;
+ ++pos;
+ }
+ ++tIt;
+ }
+ changeCountsToFreqs(freqGTR);
+
+
+ /*Vdouble freqGTR(nucAlph->size(),0.0);
+
+ nucleotide n;
+ for (int c = 0; c<freqGTR.size();c++){
+
+ string s = nucAlph->fromInt(c);
+ int nuc0 = n.fromChar(s[0]);
+ int nuc1 = n.fromChar(s[1]);
+ int nuc2 = n.fromChar(s[2]);
+ freqCodon[c] = nucFeqPos[0][nuc0]*nucFeqPos[1][nuc1]*nucFeqPos[2][nuc2];
+ }
+
+ MDOUBLE sum=0;
+ for (int i=0;i<coAlph->size();i++){
+ sum+=freqCodon[i];
+ }
+ MDOUBLE stopFreq = 1.0 - sum;
+ MDOUBLE ep = stopFreq/coAlph->size();
+ for (int i=0;i<coAlph->size();i++){
+ freqGTR[i]+=ep;
+ }*/
+
+ return freqGTR;
+
+
+}
+
+ void mainbb::replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc)
+ {
+ for (int s = 0; s < originSc.numberOfSeqs();s++)
+ {
+ string name = originSc[s].name();
+ for ( int i = 0;i<sc2change.numberOfSeqs(); i++)
+ {
+ if (sc2change[i].name() == name)
+ {
+ sc2change[i] = originSc[s];
+ break;
+ }
+ }
+
+ }
+ }
diff --git a/programs/fastml/mainbb.h b/programs/fastml/mainbb.h
new file mode 100644
index 0000000..12bbf00
--- /dev/null
+++ b/programs/fastml/mainbb.h
@@ -0,0 +1,74 @@
+#ifndef ___BB__MAIN__FILE
+#define ___BB__MAIN__FILE
+
+#include "bb_options.h"
+#include "sequenceContainer.h"
+#include "stochasticProcess.h"
+#include "tree.h"
+#include "codon.h"
+#include "nucleotide.h"
+
+#include "suffStatComponent.h"
+
+#include <vector>
+using namespace std;
+
+
+class mainbb {
+public:
+ explicit mainbb(int argc, char* argv[]);
+ virtual ~mainbb();
+
+private:
+ const bb_options* _options;
+ sequenceContainer _sc;
+ sequenceContainer _originSc; //hold the sc before change the gaps
+ tree _et;
+ vector<stochasticProcess> _spVec; //hold stochastic process
+ //if codon yang model with gamma then
+ //holds number of categores of replacment model
+ distribution *_forceDistr; //holds the w distribution of yang codon model.
+
+ alphabet* _alph;
+ sequenceContainer _resulutingJointReconstruction;
+
+ void getStartingStochasticProcess();
+ void createStochasticProcessVec();
+ Vdouble computeFreq(codon &codonAlph);
+ Vdouble computeGTRFreq(nucleotide &nucAlph);
+ Vdouble freqGTR(const sequenceContainer &nucSc, nucleotide * nucAlph);
+
+ // get starting tree
+ void getStartingEvolTreeTopology();
+ void getStartingNJtreeNjMLdis();
+ void getStartingTreeNJ_fromDistances(const VVdouble& disTab,const vector<string>& vNames);
+ void getStartingTreeFromTreeFile();
+ void getStartingBranchLengthsAndAlpha();
+ void printOutputTree();
+
+ //get starting tree and codon model
+ void getStartingBLAndModelParam();
+
+ // JOINT WITH GAMMA
+ void printAncestralSequencesGammaJoint();
+ void findAncestralSequencesGammaJoint();
+
+ // JOINT WITHOUT GAMMA
+ void findAncestralSequencesHomJoint();
+
+ // MARGINAL RECONSTRUCTION:
+ void getMarginalReconstruction();
+
+
+ void fillOptionsParameters(int argc, char* argv[]);
+ void getStartingSequenceData();
+ void printSearchParameters();
+ void printBBProjectInfo();
+ void replaceSequences(sequenceContainer &sc2change,sequenceContainer &originSc);
+
+
+};
+
+
+#endif
+
diff --git a/programs/fastml/make.dep b/programs/fastml/make.dep
new file mode 100644
index 0000000..71bcc01
--- /dev/null
+++ b/programs/fastml/make.dep
@@ -0,0 +1,254 @@
+fastml.o fastml.debug.o: fastml.cpp mainbb.h bb_options.h ../../libs/phylogeny/pgetopt.h \
+ ../../libs/phylogeny/definitions.h bb_options_list.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \
+ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/codon.h \
+ ../../libs/phylogeny/geneticCodeHolder.h \
+ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/logFile.h \
+ sequenceDataDiff.h ../../libs/phylogeny/amino.h \
+ ../../libs/phylogeny/codon.h ../../libs/phylogeny/recognizeFormat.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/uniDistribution.h \
+ ../../libs/phylogeny/gammaDistribution.h \
+ ../../libs/phylogeny/generalGammaDistribution.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/readDatMatrix.h \
+ ../../libs/phylogeny/datMatrixHolder.h \
+ ../../libs/phylogeny/chebyshevAccelerator.h bbAlg.h \
+ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/stochasticProcess.h bbNodeOrderAlg.h \
+ ../../libs/phylogeny/sequence.h bbEvaluateSpecificAV.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h bbfindBestAVDynProg.h bbReport.h \
+ ../../libs/phylogeny/distribution.h
+bbAlg.o bbAlg.debug.o: bbAlg.cpp bbAlg.h ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h bbNodeOrderAlg.h \
+ ../../libs/phylogeny/definitions.h bb_options.h bb_options_list.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/tree.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/gainLossAlphabet.h \
+ bbEvaluateSpecificAV.h ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h \
+ bbfindBestAVDynProg.h bbReport.h ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/computeUpAlg.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/likelihoodComputation.h \
+ ../../libs/phylogeny/unObservableData.h \
+ ../../libs/phylogeny/maseFormat.h
+bbComputeDownAlg.o bbComputeDownAlg.debug.o: bbComputeDownAlg.cpp bbComputeDownAlg.h \
+ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequence.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h
+bbComputeUpAlg.o bbComputeUpAlg.debug.o: bbComputeUpAlg.cpp bbComputeUpAlg.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/gainLossAlphabet.h ../../libs/phylogeny/sequence.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h
+bbEvaluateSpecificAV.o bbEvaluateSpecificAV.debug.o: bbEvaluateSpecificAV.cpp bbEvaluateSpecificAV.h \
+ bb_options.h ../../libs/phylogeny/pgetopt.h \
+ ../../libs/phylogeny/definitions.h bb_options_list.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/stochasticProcess.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h
+bbfindBestAVDynProg.o bbfindBestAVDynProg.debug.o: bbfindBestAVDynProg.cpp bbfindBestAVDynProg.h \
+ bb_options.h ../../libs/phylogeny/pgetopt.h \
+ ../../libs/phylogeny/definitions.h bb_options_list.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/tree.h ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h
+bbNodeOrderAlg.o bbNodeOrderAlg.debug.o: bbNodeOrderAlg.cpp bbNodeOrderAlg.h \
+ ../../libs/phylogeny/definitions.h bb_options.h bb_options_list.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/tree.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/gainLossAlphabet.h \
+ bbComputeUpAlg.h bbComputeDownAlg.h \
+ ../../libs/phylogeny/computeMarginalAlg.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/computePijComponent.h
+bb_options.o bb_options.debug.o: bb_options.cpp bb_options.h ../../libs/phylogeny/pgetopt.h \
+ ../../libs/phylogeny/definitions.h bb_options_list.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/errorMsg.h
+bbReport.o bbReport.debug.o: bbReport.cpp bbReport.h ../../libs/phylogeny/definitions.h \
+ ../../libs/phylogeny/amino.h ../../libs/phylogeny/definitions.h \
+ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/geneticCodeHolder.h ../../libs/phylogeny/codon.h \
+ ../../libs/phylogeny/someUtil.h ../../libs/phylogeny/logFile.h \
+ ../../libs/phylogeny/nucleotide.h ../../libs/phylogeny/codon.h
+computeMarginalReconstruction.o computeMarginalReconstruction.debug.o: computeMarginalReconstruction.cpp \
+ computeMarginalReconstruction.h ../../libs/phylogeny/definitions.h \
+ ../../libs/phylogeny/tree.h ../../libs/phylogeny/definitions.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/errorMsg.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/computeUpAlg.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/computeDownAlg.h \
+ ../../libs/phylogeny/computeMarginalAlg.h ../../libs/phylogeny/treeIt.h
+jointNoGamma.o jointNoGamma.debug.o: jointNoGamma.cpp jointNoGamma.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/readTree.h \
+ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/logFile.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/sequence.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ suffStatComponentJointNoGamma.h ../../libs/phylogeny/treeIt.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h
+mainbb.o mainbb.debug.o: mainbb.cpp mainbb.h bb_options.h ../../libs/phylogeny/pgetopt.h \
+ ../../libs/phylogeny/definitions.h bb_options_list.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \
+ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h \
+ ../../libs/phylogeny/stochasticProcess.h \
+ ../../libs/phylogeny/pijAccelerator.h \
+ ../../libs/phylogeny/replacementModel.h \
+ ../../libs/phylogeny/distribution.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/readTree.h ../../libs/phylogeny/codon.h \
+ ../../libs/phylogeny/geneticCodeHolder.h \
+ ../../libs/phylogeny/suffStatComponent.h ../../libs/phylogeny/aaJC.h \
+ ../../libs/phylogeny/amino.h ../../libs/phylogeny/codon.h bbAlg.h \
+ ../../libs/phylogeny/computePijComponent.h ../../libs/phylogeny/tree.h \
+ ../../libs/phylogeny/stochasticProcess.h bbNodeOrderAlg.h \
+ ../../libs/phylogeny/sequence.h bbEvaluateSpecificAV.h \
+ ../../libs/phylogeny/seqContainerTreeMap.h \
+ ../../libs/phylogeny/treeIt.h ../../libs/phylogeny/sequenceContainer.h \
+ bbfindBestAVDynProg.h bbReport.h ../../libs/phylogeny/distribution.h \
+ ../../libs/phylogeny/bestAlpha.h \
+ ../../libs/phylogeny/likelihoodComputation.h \
+ ../../libs/phylogeny/computePijComponent.h \
+ ../../libs/phylogeny/suffStatComponent.h \
+ ../../libs/phylogeny/unObservableData.h \
+ ../../libs/phylogeny/gammaDistribution.h \
+ ../../libs/phylogeny/generalGammaDistribution.h \
+ ../../libs/phylogeny/bblEM.h ../../libs/phylogeny/countTableComponent.h \
+ ../../libs/phylogeny/chebyshevAccelerator.h \
+ ../../libs/phylogeny/clustalFormat.h computeMarginalReconstruction.h \
+ ../../libs/phylogeny/distanceTable.h \
+ ../../libs/phylogeny/distanceMethod.h \
+ ../../libs/phylogeny/fastaFormat.h \
+ ../../libs/phylogeny/gammaDistribution.h jointNoGamma.h \
+ suffStatComponentJointNoGamma.h ../../libs/phylogeny/likeDist.h \
+ ../../libs/phylogeny/jcDistance.h ../../libs/phylogeny/logFile.h \
+ ../../libs/phylogeny/maseFormat.h ../../libs/phylogeny/molphyFormat.h \
+ ../../libs/phylogeny/nexusFormat.h ../../libs/phylogeny/nucleotide.h \
+ ../../libs/phylogeny/nucJC.h ../../libs/phylogeny/nj.h \
+ ../../libs/phylogeny/njConstrain.h \
+ ../../libs/phylogeny/distances2Tree.h \
+ ../../libs/phylogeny/phylipFormat.h \
+ ../../libs/phylogeny/readDatMatrix.h \
+ ../../libs/phylogeny/datMatrixHolder.h \
+ ../../libs/phylogeny/recognizeFormat.h \
+ ../../libs/phylogeny/trivialAccelerator.h \
+ ../../libs/phylogeny/uniDistribution.h \
+ ../../libs/phylogeny/bestAlphaAndK.h \
+ ../../libs/phylogeny/likelihoodComputation2Codon.h \
+ ../../libs/phylogeny/wYangModel.h ../../libs/phylogeny/fromQtoPt.h \
+ ../../libs/phylogeny/bblEM2codon.h ../../libs/phylogeny/computeUpAlg.h \
+ ../../libs/phylogeny/numRec.h \
+ ../../libs/phylogeny/uniformDistribution.h \
+ ../../libs/phylogeny/codonUtils.h ../../libs/phylogeny/nucleotide.h \
+ ../../libs/phylogeny/amino.h ../../libs/phylogeny/fastaFormat.h \
+ ../../libs/phylogeny/clustalFormat.h \
+ ../../libs/phylogeny/recognizeFormat.h \
+ ../../libs/phylogeny/evaluateCharacterFreq.h
+sequenceDataDiff.o sequenceDataDiff.debug.o: sequenceDataDiff.cpp sequenceDataDiff.h \
+ ../../libs/phylogeny/sequenceContainer.h \
+ ../../libs/phylogeny/definitions.h ../../libs/phylogeny/sequence.h \
+ ../../libs/phylogeny/errorMsg.h ../../libs/phylogeny/alphabet.h \
+ ../../libs/phylogeny/mulAlphabet.h ../../libs/phylogeny/someUtil.h \
+ ../../libs/phylogeny/logFile.h ../../libs/phylogeny/gainLossAlphabet.h
+suffStatComponentJointNoGamma.o suffStatComponentJointNoGamma.debug.o: suffStatComponentJointNoGamma.cpp \
+ suffStatComponentJointNoGamma.h ../../libs/phylogeny/definitions.h
diff --git a/programs/fastml/sequenceDataDiff.cpp b/programs/fastml/sequenceDataDiff.cpp
new file mode 100644
index 0000000..c4e1caf
--- /dev/null
+++ b/programs/fastml/sequenceDataDiff.cpp
@@ -0,0 +1,49 @@
+#include "sequenceDataDiff.h"
+#include <iostream>
+using namespace std;
+
+void sequenceDataDiff::computeDifferences(){
+ for (int i=0;i<_sc1.numberOfSeqs();++i) {
+ string name1 = _sc1[i].name();
+ int idOf1in2 = _sc2.getId(name1,false);//return -1 if not found...
+ if (idOf1in2==-1) {
+ string x = "sequence does not exist ";
+ x+=name1;
+ unitDiff ud(x);
+ _differences.push_back(ud);
+ continue;
+ }
+ const sequence& sequence1 = _sc1[i];
+ const sequence& sequence2 = _sc2[i];
+ if (sequence1.seqLen() != sequence1.seqLen()) {
+ string x = "sequences don't have the same length ";
+ x+=name1;
+ unitDiff ud(x);
+ _differences.push_back(ud);
+ continue;
+ }
+
+ for (int j=0; j < sequence1.seqLen(); ++j) {
+ if (sequence1[j] != sequence2[j]) {
+ unitDiff ud(name1,j,sequence1.toString(j),sequence2.toString(j));
+ _differences.push_back(ud);
+ }
+ }
+ }
+}
+
+
+void sequenceDataDiff::printDiff(ostream& out) {
+ for (int i=0; i < _differences.size(); ++i) {
+ out<<_differences[i]._seqName;
+ out<<" ";
+ out<<_differences[i]._pos;
+ out<<" ";
+ out<<_differences[i]._letInSd1;
+ out<<" ";
+ out<<_differences[i]._letInSd2;
+ out<<endl;
+ }
+}
+
+
diff --git a/programs/fastml/sequenceDataDiff.h b/programs/fastml/sequenceDataDiff.h
new file mode 100644
index 0000000..52f5741
--- /dev/null
+++ b/programs/fastml/sequenceDataDiff.h
@@ -0,0 +1,45 @@
+#ifndef ___SEQ__DATA__DIF
+#define ___SEQ__DATA__DIF
+
+#include "sequenceContainer.h"
+
+#include <fstream>
+#include <iostream>
+#include <string>
+using namespace std;
+
+// this class represents a single difference between a pair of sequences.
+// I.e., it is used here, to show a difference between two approaches for ancestral sequence
+// reconstruction, for example, Joint vs. Marginal, or With and Without Gamma.
+
+class unitDiff{
+ friend class sequenceDataDiff;
+public:
+ explicit unitDiff(const string& seqName,const int pos, const string letInSd1,const string letInSd2) {
+ _seqName = seqName; _pos = pos; _letInSd1 = letInSd1; _letInSd2 = letInSd2;
+ }
+ explicit unitDiff(const string& seqName) { // in case one seq is only in one
+ _seqName = seqName; _pos = -1; _letInSd1 = '?'; _letInSd2 = '?';
+ }
+private:
+ string _seqName;
+ int _pos;
+ string _letInSd1;
+ string _letInSd2;
+};
+
+// This class prints differences between two reconstructions (or in general, between any two sequence conatiners)
+
+class sequenceDataDiff {
+public:
+ sequenceDataDiff(const sequenceContainer& sc1, const sequenceContainer& sc2) :_sc1(sc1) ,_sc2(sc2) {}
+ void computeDifferences();
+ void printDiff(ostream& out);
+private:
+ vector<unitDiff> _differences;
+ const sequenceContainer& _sc1;
+ const sequenceContainer& _sc2;
+};
+
+#endif
+
diff --git a/programs/fastml/suffStatComponentJointNoGamma.cpp b/programs/fastml/suffStatComponentJointNoGamma.cpp
new file mode 100644
index 0000000..bc4286f
--- /dev/null
+++ b/programs/fastml/suffStatComponentJointNoGamma.cpp
@@ -0,0 +1 @@
+#include "suffStatComponentJointNoGamma.h"
diff --git a/programs/fastml/suffStatComponentJointNoGamma.h b/programs/fastml/suffStatComponentJointNoGamma.h
new file mode 100644
index 0000000..453aa30
--- /dev/null
+++ b/programs/fastml/suffStatComponentJointNoGamma.h
@@ -0,0 +1,50 @@
+#ifndef SUFF_STAT_COMPONENT_JOINT_NO_GAMMA_H___
+#define SUFF_STAT_COMPONENT_JOINT_NO_GAMMA_H___
+
+#include "definitions.h"
+#include <vector>
+#include <cassert>
+using namespace std;
+
+class suffStatSpecHomPosJointNoGamma{ // this is for a specific node.
+ public:
+ void set(const int letterInFather,const int val) {
+ _V[letterInFather]=val;
+ }
+
+ int get(const int letterInFather) const {
+ return _V[letterInFather];
+ }
+
+ void allocatePlace(const int alphabetSize) {
+ _V.resize(alphabetSize);
+ }
+ bool isEmpty (){return (_V.empty());};
+ size_t size() {return _V.size();}
+ private:
+ Vint _V;//size = alphabet size
+};
+
+class suffStatGlobalHomPosJointNoGamma{ // this is for all nodes
+ public:
+ void set(const int nodeId,const int letterInFather,const int val) {
+ _V[nodeId].set(letterInFather,val);
+ }
+
+ int get(const int nodeId,const int letterInFather) const {
+ return _V[nodeId].get(letterInFather);
+ }
+
+ void allocatePlace(const int numOnNodes,const int alphabetSize) {
+ _V.resize(numOnNodes);
+ for (int i=0;i<_V.size();++i) {_V[i].allocatePlace(alphabetSize);}
+ }
+ bool isEmpty (){return (_V.empty());}
+ size_t size() {return _V.size();}
+
+ private:
+ vector<suffStatSpecHomPosJointNoGamma> _V;//size = letter
+};
+
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastml2.git
More information about the debian-med-commit
mailing list