[med-svn] [consensuscore2] 01/05: Imported Upstream version 0.13.0+20160719
Afif Elghraoui
afif at moszumanska.debian.org
Sun Jul 24 07:22:19 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository consensuscore2.
commit 4ed3a06a6a5170ea28f30af4f45342a3ec187c95
Author: Afif Elghraoui <afif at debian.org>
Date: Sat Jul 23 21:14:20 2016 -0700
Imported Upstream version 0.13.0+20160719
---
CMakeLists.txt | 2 +-
circle.yml | 13 +-
.../{Integrator.h => AbstractIntegrator.h} | 123 +++----
include/pacbio/consensus/Evaluator.h | 42 +--
include/pacbio/consensus/Exceptions.h | 22 +-
include/pacbio/consensus/ModelConfig.h | 12 +-
.../{Polish.h => MonoMolecularIntegrator.h} | 51 +--
.../{Polish.h => MultiMolecularIntegrator.h} | 48 +--
include/pacbio/consensus/Polish.h | 3 +-
.../consensus/{Exceptions.h => PolishResult.h} | 29 +-
include/pacbio/consensus/Read.h | 13 +-
include/pacbio/consensus/{Exceptions.h => State.h} | 28 +-
.../consensus/{Exceptions.h => StrandType.h} | 26 +-
include/pacbio/consensus/Template.h | 46 ++-
setup.py | 2 +-
src/AbstractIntegrator.cpp | 182 +++++++++
src/Evaluator.cpp | 146 ++++----
src/EvaluatorImpl.cpp | 4 +-
src/EvaluatorImpl.h | 5 +
src/Integrator.cpp | 408 ---------------------
src/MonoMolecularIntegrator.cpp | 182 +++++++++
src/MultiMolecularIntegrator.cpp | 134 +++++++
src/Polish.cpp | 42 ++-
src/Read.cpp | 30 +-
src/Recursor.h | 4 +-
src/Template.cpp | 29 +-
src/matrix/SparseMatrix.cpp | 7 +
src/matrix/SparseMatrix.h | 5 +-
src/matrix/SparseVector.h | 16 +-
src/models/P6C4NoCovModel.cpp | 55 ++-
src/models/S_P1C1Beta_Model.cpp | 41 +--
src/models/S_P1C1v1_Model.cpp | 70 +---
src/models/S_P1C1v2_Model.cpp | 358 +++++++++---------
swig/Integrator.i | 12 +-
swig/Polish.i | 9 +-
swig/Read.i | 2 +
tests/CMakeLists.txt | 4 +
tests/TestIntegrator.cpp | 79 ++--
tests/TestMutationEnumerator.cpp | 3 +-
tests/TestPolish.cpp | 22 +-
tests/TestTemplate.cpp | 49 +--
41 files changed, 1208 insertions(+), 1150 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 77e09a5..40c56fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set the version in project()
-project(PacBioConsensus VERSION 0.12.0 LANGUAGES CXX C)
+project(PacBioConsensus VERSION 0.13.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
# set magic variable
diff --git a/circle.yml b/circle.yml
index ec5c2d9..fdfff89 100644
--- a/circle.yml
+++ b/circle.yml
@@ -10,6 +10,8 @@ dependencies:
- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get update
- sudo apt-get install g++-4.8
+ - curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
+ - sudo apt-get install git-lfs=1.1.0
- if [ ! -d _deps ] ; then mkdir _deps ; fi # Create a directory for dependencies, These are static, cache them.
- pushd _deps ; if [ ! -d cmake-3.3.0-Linux-x86_64 ] ; then wget --no-check-certificate https://www.cmake.org/files/v3.3/cmake-3.3.0-Linux-x86_64.tar.gz ; tar xzf cmake-3.3.0-Linux-x86_64.tar.gz ; fi
- pushd _deps ; if [ ! -d boost_1_58_0 ] ; then wget https://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.tar.bz2 ; tar xjf boost_1_58_0.tar.bz2 ; fi
@@ -17,17 +19,20 @@ dependencies:
- mkdir _rev_deps # Create a directory for reverse-dependencies, ie things that depend on us. These are not static, do not cache them.
- pushd _rev_deps ; git clone https://github.com/PacificBiosciences/htslib.git
- pushd _rev_deps ; git clone https://github.com/PacificBiosciences/pbbam.git
- - pushd _rev_deps ; git clone -b richQuals https://github.com/PacificBiosciences/pbccs.git
- - pushd _rev_deps ; git clone -b richQuals https://${TOKEN}@github.com/PacificBiosciences/pblaa.git
+ - pushd _rev_deps ; git clone https://github.com/PacificBiosciences/pbccs.git
+ - pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pblaa.git
- pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pbsparse.git
- pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pbchimera.git
- pushd _rev_deps ; git clone https://github.com/PacificBiosciences/seqan.git
+ - pushd _rev_deps ; git clone https://github.com/PacificBiosciences/PacBioTestData.git
- pip install --upgrade pip
- pip install numpy
- pip install cython
- pip install h5py
- pip install pysam
- pip install git+https://github.com/PacificBiosciences/pbcommand.git git+https://github.com/PacificBiosciences/pbcore.git
+ - pushd _rev_deps/PacBioTestData ; git lfs pull && make python
+
override:
- echo "Building PBBAM"
- mkdir _build_bam
@@ -36,11 +41,11 @@ dependencies:
test:
pre:
- mkdir _build
- - pushd _build ; CC=gcc-4.8 CXX=g++-4.8 $(readlink -f ../_deps/cmake-3.3.0-Linux-x86_64/bin/cmake) -DCMAKE_BUILD_TYPE=Debug -DBoost_INCLUDE_DIRS=$(readlink -f ../_deps/boost_1_58_0) ..
+ - pushd _build ; CC=gcc-4.8 CXX=g++-4.8 $(readlink -f ../_deps/cmake-3.3.0-Linux-x86_64/bin/cmake) -DCMAKE_BUILD_TYPE=Debug -DBoost_INCLUDE_DIRS=$(readlink -f ../_deps/boost_1_58_0) -DEXTENSIVE_TESTING=1 ..
override:
- pushd _build ; make
- pushd _build ; make check
- - pushd _build ; rm -rf * ; CC=gcc-4.8 CXX=g++-4.8 $(readlink -f ../_deps/cmake-3.3.0-Linux-x86_64/bin/cmake) -DCMAKE_BUILD_TYPE=Release -DBoost_INCLUDE_DIRS=$(readlink -f ../_deps/boost_1_58_0) ..
+ - pushd _build ; rm -rf * ; CC=gcc-4.8 CXX=g++-4.8 $(readlink -f ../_deps/cmake-3.3.0-Linux-x86_64/bin/cmake) -DCMAKE_BUILD_TYPE=Release -DBoost_INCLUDE_DIRS=$(readlink -f ../_deps/boost_1_58_0) -DEXTENSIVE_TESTING=1 ..
- pushd _build ; make clean ; make test_pbconsensus
- tests/bin/test_pbconsensus --gtest_filter=IntegratorTest.TestLongTemplateTiming*
- CC=gcc-4.8 CXX=g++-4.8 CMAKE_COMMAND=$(readlink -f _deps/cmake-3.3.0-Linux-x86_64/bin/cmake) Boost_INCLUDE_DIRS=$(readlink -f _deps/boost_1_58_0) SWIG_COMMAND=$(readlink -f _deps/swig-3.0.8/bin/swig) VERBOSE=1 pip install --verbose --upgrade --no-deps .
diff --git a/include/pacbio/consensus/Integrator.h b/include/pacbio/consensus/AbstractIntegrator.h
similarity index 61%
rename from include/pacbio/consensus/Integrator.h
rename to include/pacbio/consensus/AbstractIntegrator.h
index f9a4ef4..2632fc2 100644
--- a/include/pacbio/consensus/Integrator.h
+++ b/include/pacbio/consensus/AbstractIntegrator.h
@@ -35,15 +35,18 @@
#pragma once
+#include <cmath>
#include <cstdint>
#include <functional>
#include <iostream>
#include <memory>
+#include <numeric>
#include <set>
#include <pacbio/consensus/Evaluator.h>
#include <pacbio/consensus/Exceptions.h>
#include <pacbio/consensus/Mutation.h>
+#include <pacbio/consensus/State.h>
namespace PacBio {
namespace Consensus {
@@ -56,26 +59,9 @@ struct IntegratorConfig
IntegratorConfig(double minZScore = -3.5, double scoreDiff = 12.5);
};
-enum struct AddReadResult : uint8_t
+inline std::ostream& operator<<(std::ostream& os, State result)
{
- SUCCESS,
- ALPHA_BETA_MISMATCH,
- POOR_ZSCORE,
- OTHER,
- /*
- This enum is used in other places to
- both size an array and index into it. So
- users can know the number of elements needed in
- an array that could count valus of this enum, this
- should always be the last value in the enum.
- */
- SIZE
-};
-
-inline std::ostream& operator<<(std::ostream& os, AddReadResult result)
-{
- static const char* names[] = {"SUCCESS", "ALPHA/BETA MISMATCH", "POOR Z-SCORE", "OTHER"};
- os << names[static_cast<size_t>(result)];
+ os << StateName[static_cast<size_t>(result)];
return os;
}
@@ -96,12 +82,13 @@ public:
double AvgZScore() const;
std::vector<double> ZScores() const;
+
std::vector<std::pair<double, double>> NormalParameters() const;
virtual void ApplyMutation(const Mutation& mut) = 0;
virtual void ApplyMutations(std::vector<Mutation>* muts) = 0;
- virtual AddReadResult AddRead(const MappedRead& read) = 0;
+ virtual State AddRead(const MappedRead& read) = 0;
// For debugging purposes
// (Note that these include results include all evaluators, even the inactive ones)
@@ -109,6 +96,44 @@ public:
std::vector<double> LLs() const;
std::vector<std::string> ReadNames() const;
+ std::vector<int> NumFlipFlops() const;
+ int MaxNumFlipFlops() const;
+
+ std::vector<float> AlphaPopulated() const;
+ float MaxAlphaPopulated() const;
+
+ std::vector<float> BetaPopulated() const;
+ float MaxBetaPopulated() const;
+
+ std::vector<State> States() const;
+ std::vector<StrandType> StrandTypes() const;
+
+ // TODO(atoepfer) Does anyone have a clue if we can make one function out
+ // of those two?
+ template <typename T>
+ inline std::vector<T> TransformEvaluators(std::function<T(Evaluator&)> functor)
+ {
+ std::vector<T> vec;
+ vec.reserve(evals_.size());
+ std::transform(evals_.begin(), evals_.end(), std::back_inserter(vec), functor);
+ return vec;
+ }
+
+ template <typename T>
+ inline std::vector<T> TransformEvaluators(std::function<T(const Evaluator&)> functor) const
+ {
+ std::vector<T> vec;
+ vec.reserve(evals_.size());
+ std::transform(evals_.begin(), evals_.end(), std::back_inserter(vec), functor);
+ return vec;
+ }
+
+ template <typename T>
+ inline T MaxElement(const std::vector<T>& in) const
+ {
+ return *std::max_element(in.cbegin(), in.end());
+ }
+
protected:
Mutation ReverseComplement(const Mutation& mut) const;
@@ -117,63 +142,17 @@ protected:
// move constructor
AbstractIntegrator(AbstractIntegrator&&);
- AddReadResult AddRead(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& read);
+ State AddRead(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& read);
IntegratorConfig cfg_;
std::vector<Evaluator> evals_;
-};
-
-class MonoMolecularIntegrator : public AbstractIntegrator
-{
-public:
- MonoMolecularIntegrator(const std::string& tpl, const IntegratorConfig& cfg, const SNR& snr,
- const std::string& model);
-
- // move constructor
- MonoMolecularIntegrator(MonoMolecularIntegrator&&);
-
- size_t TemplateLength() const;
-
- char operator[](size_t i) const;
- operator std::string() const;
-
- double LL(const Mutation& mut);
- inline double LL() const { return AbstractIntegrator::LL(); }
- void ApplyMutation(const Mutation& mut);
- void ApplyMutations(std::vector<Mutation>* muts);
-
- AddReadResult AddRead(const MappedRead& read);
-
-protected:
- std::string mdl_;
- SNR snr_;
- Template fwdTpl_;
- Template revTpl_;
-};
-
-class MultiMolecularIntegrator : public AbstractIntegrator
-{
-public:
- MultiMolecularIntegrator(const std::string& tpl, const IntegratorConfig& cfg);
-
- size_t TemplateLength() const;
-
- char operator[](size_t i) const;
- operator std::string() const;
-
- void ApplyMutation(const Mutation& mut);
- void ApplyMutations(std::vector<Mutation>* muts);
-
- AddReadResult AddRead(const MappedRead& read);
-
-protected:
- std::unique_ptr<AbstractTemplate> GetTemplate(const MappedRead& read, const SNR& snr);
-
- std::string fwdTpl_;
- std::string revTpl_;
private:
- friend struct std::hash<MultiMolecularIntegrator>;
+ inline double AccumulateNoInf(std::vector<double> input) const
+ {
+ const auto AddNoInf = [](double a, double b) { return a + (std::isinf(b) ? 0.0 : b); };
+ return std::accumulate(input.cbegin(), input.cend(), 0.0, AddNoInf);
+ }
};
} // namespace Consensus
diff --git a/include/pacbio/consensus/Evaluator.h b/include/pacbio/consensus/Evaluator.h
index ad4b7d0..27f0d4f 100644
--- a/include/pacbio/consensus/Evaluator.h
+++ b/include/pacbio/consensus/Evaluator.h
@@ -40,45 +40,38 @@
#include <vector>
#include <pacbio/consensus/Read.h>
+#include <pacbio/consensus/State.h>
#include <pacbio/consensus/Template.h>
namespace PacBio {
namespace Consensus {
-//
-// TODO: explain the legal state transitions. What does it mean to be in each state?
-// Can a read "come back" form being DISABLED to being VALID (I hope not!) or from one
-// of the other states?
-// TODO: Get rid of NULL_TEMPLATE state---this should be handled by ReadScoresMutation logic
-enum struct EvaluatorState : uint8_t
-{
- VALID,
- ALPHA_BETA_MISMATCH,
- POOR_ZSCORE,
- NULL_TEMPLATE,
- DISABLED
-};
-
// forward declaration
class EvaluatorImpl;
class Evaluator
{
public:
- Evaluator(EvaluatorState);
+ Evaluator() = delete;
+ Evaluator(State);
Evaluator(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double minZScore,
double scoreDiff);
- // move constructors
+ // copying is verboten
+ Evaluator(const Evaluator&) = delete;
+ Evaluator& operator=(const Evaluator&) = delete;
+
+ // move constructor
Evaluator(Evaluator&&);
+ // move assign operator
Evaluator& operator=(Evaluator&&);
~Evaluator();
size_t Length() const; // TODO: is this used anywhere? If not, delete it.
- StrandEnum Strand() const;
+ StrandType Strand() const;
- operator bool() const;
+ operator bool() const { return IsValid(); }
operator std::string() const;
std::string ReadName() const;
@@ -92,15 +85,22 @@ public:
bool ApplyMutation(const Mutation& mut);
bool ApplyMutations(std::vector<Mutation>* muts);
- EvaluatorState Status() const;
+ State Status() const { return curState_; }
+ int NumFlipFlops() const;
+ float AlphaPopulated() const;
+ float BetaPopulated() const;
+
void Release();
private:
- void CheckInvariants();
+ void CheckZScore(const double minZScore, const std::string& model);
+
+ bool IsValid() const { return curState_ == State::VALID; }
+ void Status(State nextState);
private:
std::unique_ptr<EvaluatorImpl> impl_;
- EvaluatorState state_;
+ State curState_;
};
} // namespace Consensus
diff --git a/include/pacbio/consensus/Exceptions.h b/include/pacbio/consensus/Exceptions.h
index 8179b21..f472382 100644
--- a/include/pacbio/consensus/Exceptions.h
+++ b/include/pacbio/consensus/Exceptions.h
@@ -38,13 +38,31 @@
#include <stdexcept>
#include <string>
+#include <pacbio/consensus/State.h>
+
namespace PacBio {
namespace Consensus {
-class AlphaBetaMismatch : public std::runtime_error
+class StateError : public std::runtime_error
+{
+public:
+ StateError(State state, const std::string& msg) : std::runtime_error(msg), state_(state) {}
+ State WhatState() const { return state_; }
+ virtual const char* what() const noexcept { return std::runtime_error::what(); }
+private:
+ State state_;
+};
+
+class TemplateTooSmall : public StateError
+{
+public:
+ TemplateTooSmall() : StateError(State::TEMPLATE_TOO_SMALL, "Template too short!") {}
+};
+
+class AlphaBetaMismatch : public StateError
{
public:
- AlphaBetaMismatch() : std::runtime_error("alpha/beta mismatch error!") {}
+ AlphaBetaMismatch() : StateError(State::ALPHA_BETA_MISMATCH, "Alpha/beta mismatch!") {}
};
class ChemistryNotFound : public std::runtime_error
diff --git a/include/pacbio/consensus/ModelConfig.h b/include/pacbio/consensus/ModelConfig.h
index d03a5ce..1bd2eca 100644
--- a/include/pacbio/consensus/ModelConfig.h
+++ b/include/pacbio/consensus/ModelConfig.h
@@ -78,6 +78,12 @@ enum struct MoveType : uint8_t
DELETION = 3 // never used for covariate
};
+enum struct MomentType : uint8_t
+{
+ FIRST = 0,
+ SECOND = 1
+};
+
class ModelConfig
{
public:
@@ -85,10 +91,8 @@ public:
virtual std::unique_ptr<AbstractRecursor> CreateRecursor(
std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const = 0;
virtual std::vector<TemplatePosition> Populate(const std::string& tpl) const = 0;
- virtual double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
- virtual double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
- virtual double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
-
+ virtual double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const = 0;
};
} // namespace Consensus
diff --git a/include/pacbio/consensus/Polish.h b/include/pacbio/consensus/MonoMolecularIntegrator.h
similarity index 65%
copy from include/pacbio/consensus/Polish.h
copy to include/pacbio/consensus/MonoMolecularIntegrator.h
index 4894071..5ed3ef6 100644
--- a/include/pacbio/consensus/Polish.h
+++ b/include/pacbio/consensus/MonoMolecularIntegrator.h
@@ -35,41 +35,48 @@
#pragma once
-#include <tuple>
-#include <vector>
+#include <cstdint>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <set>
+#include <pacbio/consensus/AbstractIntegrator.h>
+#include <pacbio/consensus/Evaluator.h>
+#include <pacbio/consensus/Exceptions.h>
#include <pacbio/consensus/Mutation.h>
+#include <pacbio/consensus/State.h>
namespace PacBio {
namespace Consensus {
-// forward declaration
-class AbstractIntegrator;
-
-struct PolishConfig
+class MonoMolecularIntegrator : public AbstractIntegrator
{
- size_t MaximumIterations;
- size_t MutationSeparation;
- size_t MutationNeighborhood;
+public:
+ MonoMolecularIntegrator(const std::string& tpl, const IntegratorConfig& cfg, const SNR& snr,
+ const std::string& model);
- PolishConfig(size_t iterations = 40, size_t separation = 10, size_t neighborhood = 20);
-};
+ // move constructor
+ MonoMolecularIntegrator(MonoMolecularIntegrator&&);
-std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg);
+ size_t TemplateLength() const;
-struct QualityValues
-{
- std::vector<int> Qualities;
- std::vector<int> DeletionQVs;
- std::vector<int> InsertionQVs;
- std::vector<int> SubstitutionQVs;
-};
+ char operator[](size_t i) const;
+ operator std::string() const;
-std::vector<int> ConsensusQualities(AbstractIntegrator& ai);
+ double LL(const Mutation& mut);
+ inline double LL() const { return AbstractIntegrator::LL(); }
+ void ApplyMutation(const Mutation& mut);
+ void ApplyMutations(std::vector<Mutation>* muts);
-QualityValues ConsensusQVs(AbstractIntegrator& ai);
+ State AddRead(const MappedRead& read);
-std::vector<Mutation> Mutations(const AbstractIntegrator& ai);
+protected:
+ std::string mdl_;
+ SNR snr_;
+ Template fwdTpl_;
+ Template revTpl_;
+};
} // namespace Consensus
} // namespace PacBio
diff --git a/include/pacbio/consensus/Polish.h b/include/pacbio/consensus/MultiMolecularIntegrator.h
similarity index 68%
copy from include/pacbio/consensus/Polish.h
copy to include/pacbio/consensus/MultiMolecularIntegrator.h
index 4894071..b0dc74c 100644
--- a/include/pacbio/consensus/Polish.h
+++ b/include/pacbio/consensus/MultiMolecularIntegrator.h
@@ -35,41 +35,45 @@
#pragma once
-#include <tuple>
-#include <vector>
+#include <cstdint>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <set>
+#include <pacbio/consensus/AbstractIntegrator.h>
+#include <pacbio/consensus/Evaluator.h>
+#include <pacbio/consensus/Exceptions.h>
#include <pacbio/consensus/Mutation.h>
+#include <pacbio/consensus/State.h>
namespace PacBio {
namespace Consensus {
-// forward declaration
-class AbstractIntegrator;
-
-struct PolishConfig
+class MultiMolecularIntegrator : public AbstractIntegrator
{
- size_t MaximumIterations;
- size_t MutationSeparation;
- size_t MutationNeighborhood;
+public:
+ MultiMolecularIntegrator(const std::string& tpl, const IntegratorConfig& cfg);
- PolishConfig(size_t iterations = 40, size_t separation = 10, size_t neighborhood = 20);
-};
+ size_t TemplateLength() const;
-std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg);
+ char operator[](size_t i) const;
+ operator std::string() const;
-struct QualityValues
-{
- std::vector<int> Qualities;
- std::vector<int> DeletionQVs;
- std::vector<int> InsertionQVs;
- std::vector<int> SubstitutionQVs;
-};
+ void ApplyMutation(const Mutation& mut);
+ void ApplyMutations(std::vector<Mutation>* muts);
+
+ State AddRead(const MappedRead& read);
-std::vector<int> ConsensusQualities(AbstractIntegrator& ai);
+protected:
+ std::unique_ptr<AbstractTemplate> GetTemplate(const MappedRead& read, const SNR& snr);
-QualityValues ConsensusQVs(AbstractIntegrator& ai);
+ std::string fwdTpl_;
+ std::string revTpl_;
-std::vector<Mutation> Mutations(const AbstractIntegrator& ai);
+private:
+ friend struct std::hash<MultiMolecularIntegrator>;
+};
} // namespace Consensus
} // namespace PacBio
diff --git a/include/pacbio/consensus/Polish.h b/include/pacbio/consensus/Polish.h
index 4894071..5eedc1a 100644
--- a/include/pacbio/consensus/Polish.h
+++ b/include/pacbio/consensus/Polish.h
@@ -39,6 +39,7 @@
#include <vector>
#include <pacbio/consensus/Mutation.h>
+#include <pacbio/consensus/PolishResult.h>
namespace PacBio {
namespace Consensus {
@@ -55,7 +56,7 @@ struct PolishConfig
PolishConfig(size_t iterations = 40, size_t separation = 10, size_t neighborhood = 20);
};
-std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg);
+PolishResult Polish(AbstractIntegrator* ai, const PolishConfig& cfg);
struct QualityValues
{
diff --git a/include/pacbio/consensus/Exceptions.h b/include/pacbio/consensus/PolishResult.h
similarity index 77%
copy from include/pacbio/consensus/Exceptions.h
copy to include/pacbio/consensus/PolishResult.h
index 8179b21..8a34655 100644
--- a/include/pacbio/consensus/Exceptions.h
+++ b/include/pacbio/consensus/PolishResult.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,26 +35,19 @@
#pragma once
-#include <stdexcept>
-#include <string>
+#include <vector>
namespace PacBio {
namespace Consensus {
-class AlphaBetaMismatch : public std::runtime_error
+struct PolishResult
{
-public:
- AlphaBetaMismatch() : std::runtime_error("alpha/beta mismatch error!") {}
+ bool hasConverged = false;
+ size_t mutationsTested = 0;
+ size_t mutationsApplied = 0;
+ std::vector<float> maxAlphaPopulated;
+ std::vector<float> maxBetaPopulated;
+ std::vector<int> maxNumFlipFlops;
};
-
-class ChemistryNotFound : public std::runtime_error
-{
-public:
- ChemistryNotFound(const std::string& name)
- : std::runtime_error(std::string("chemistry not found: '") + name + "'")
- {
- }
-};
-
-} // namespace Consensus
-} // namespace PacBio
+}
+} // ::PacBio::Consensus
\ No newline at end of file
diff --git a/include/pacbio/consensus/Read.h b/include/pacbio/consensus/Read.h
index b36230a..be12ca2 100644
--- a/include/pacbio/consensus/Read.h
+++ b/include/pacbio/consensus/Read.h
@@ -41,6 +41,8 @@
#include <string>
#include <vector>
+#include <pacbio/consensus/StrandType.h>
+
namespace PacBio {
namespace Consensus {
@@ -90,21 +92,14 @@ struct Read
inline size_t Length() const { return Seq.length(); }
};
-enum struct StrandEnum : uint8_t
-{
- FORWARD,
- REVERSE,
- UNMAPPED
-};
-
struct MappedRead : public Read
{
- MappedRead(const Read& read, StrandEnum strand, size_t templateStart, size_t templateEnd,
+ MappedRead(const Read& read, StrandType strand, size_t templateStart, size_t templateEnd,
bool pinStart = false, bool pinEnd = false);
MappedRead(const MappedRead& read) = default;
MappedRead(MappedRead&& read) = default;
- StrandEnum Strand;
+ StrandType Strand;
size_t TemplateStart;
size_t TemplateEnd;
bool PinStart;
diff --git a/include/pacbio/consensus/Exceptions.h b/include/pacbio/consensus/State.h
similarity index 77%
copy from include/pacbio/consensus/Exceptions.h
copy to include/pacbio/consensus/State.h
index 8179b21..79dd345 100644
--- a/include/pacbio/consensus/Exceptions.h
+++ b/include/pacbio/consensus/State.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,26 +35,20 @@
#pragma once
-#include <stdexcept>
-#include <string>
-
namespace PacBio {
namespace Consensus {
-class AlphaBetaMismatch : public std::runtime_error
+enum struct State : uint8_t
{
-public:
- AlphaBetaMismatch() : std::runtime_error("alpha/beta mismatch error!") {}
-};
+ VALID = 0,
+ ALPHA_BETA_MISMATCH,
+ POOR_ZSCORE,
+ TEMPLATE_TOO_SMALL,
+ MANUALLY_RELEASED,
-class ChemistryNotFound : public std::runtime_error
-{
-public:
- ChemistryNotFound(const std::string& name)
- : std::runtime_error(std::string("chemistry not found: '") + name + "'")
- {
- }
+ SIZE
};
-} // namespace Consensus
-} // namespace PacBio
+static const char* StateName[] = {"VALID", "ALPHA/BETA MISMATCH", "POOR Z-SCORE", "NULL TEMPLATE"};
+}
+} //::PacBio::Consensus
\ No newline at end of file
diff --git a/include/pacbio/consensus/Exceptions.h b/include/pacbio/consensus/StrandType.h
similarity index 77%
copy from include/pacbio/consensus/Exceptions.h
copy to include/pacbio/consensus/StrandType.h
index 8179b21..537bb24 100644
--- a/include/pacbio/consensus/Exceptions.h
+++ b/include/pacbio/consensus/StrandType.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,26 +35,14 @@
#pragma once
-#include <stdexcept>
-#include <string>
-
namespace PacBio {
namespace Consensus {
-class AlphaBetaMismatch : public std::runtime_error
+enum struct StrandType : uint8_t
{
-public:
- AlphaBetaMismatch() : std::runtime_error("alpha/beta mismatch error!") {}
+ FORWARD,
+ REVERSE,
+ UNMAPPED
};
-
-class ChemistryNotFound : public std::runtime_error
-{
-public:
- ChemistryNotFound(const std::string& name)
- : std::runtime_error(std::string("chemistry not found: '") + name + "'")
- {
- }
-};
-
-} // namespace Consensus
-} // namespace PacBio
+}
+} //::PacBio::Consensus
\ No newline at end of file
diff --git a/include/pacbio/consensus/Template.h b/include/pacbio/consensus/Template.h
index ae126d9..0042130 100644
--- a/include/pacbio/consensus/Template.h
+++ b/include/pacbio/consensus/Template.h
@@ -79,10 +79,9 @@ public:
// access model configuration
virtual std::unique_ptr<AbstractRecursor> CreateRecursor(
std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const = 0;
- virtual double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
- virtual double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
- virtual double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
-
+ virtual double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const = 0;
+
std::pair<double, double> NormalParameters() const;
// a sad but necessary release valve for MonoMolecularIntegrator Length()
@@ -124,15 +123,8 @@ public:
const MappedRead& mr,
double scoreDiff) const;
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return cfg_->ExpectedLogLikelihoodForMatchEmission(prev, curr, secondMoment);
- }
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return cfg_->ExpectedLogLikelihoodForStickEmission(prev, curr, secondMoment);
- }
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return cfg_->ExpectedLogLikelihoodForBranchEmission(prev, curr, secondMoment);
- }
+ inline double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
private:
std::unique_ptr<ModelConfig> cfg_;
@@ -158,18 +150,14 @@ public:
inline boost::optional<Mutation> Mutate(const Mutation&);
inline void Reset() {}
bool ApplyMutation(const Mutation& mut);
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return master_.ExpectedLogLikelihoodForMatchEmission(prev, curr, secondMoment);
- }
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return master_.ExpectedLogLikelihoodForStickEmission(prev, curr, secondMoment);
- }
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return master_.ExpectedLogLikelihoodForBranchEmission(prev, curr, secondMoment);
- }
+
inline std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr,
double scoreDiff) const;
+
+ inline double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
+
private:
Template const& master_;
};
@@ -184,7 +172,7 @@ public:
AbstractRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
double scoreDiff);
virtual ~AbstractRecursor() {}
- virtual size_t FillAlphaBeta(M& alpha, M& beta) const throw(AlphaBetaMismatch) = 0;
+ virtual size_t FillAlphaBeta(M& alpha, M& beta) const = 0;
virtual void FillAlpha(const M& guide, M& alpha) const = 0;
virtual void FillBeta(const M& guide, M& beta) const = 0;
virtual double LinkAlphaBeta(const M& alpha, size_t alphaColumn, const M& beta,
@@ -240,6 +228,11 @@ std::unique_ptr<AbstractRecursor> Template::CreateRecursor(std::unique_ptr<Abstr
return cfg_->CreateRecursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
scoreDiff);
}
+inline double Template::ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const
+{
+ return cfg_->ExpectedLLForEmission(move, prev, curr, moment);
+}
const TemplatePosition& VirtualTemplate::operator[](const size_t i) const
{
@@ -260,12 +253,17 @@ boost::optional<Mutation> VirtualTemplate::Mutate(const Mutation& mut)
return boost::optional<Mutation>(Mutation(mut.Type, mut.Start() - start_, mut.Base));
}
-std::unique_ptr<AbstractRecursor> VirtualTemplate::CreateRecursor(
+inline std::unique_ptr<AbstractRecursor> VirtualTemplate::CreateRecursor(
std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
{
return master_.CreateRecursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
scoreDiff);
}
+inline double VirtualTemplate::ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const
+{
+ return master_.ExpectedLLForEmission(move, prev, curr, moment);
+}
} // namespace Consensus
} // namespace PacBio
diff --git a/setup.py b/setup.py
index 48807d3..680684c 100644
--- a/setup.py
+++ b/setup.py
@@ -141,7 +141,7 @@ if not os.path.exists(os.path.join(swigLib, modName)):
setup(
name="ConsensusCore2",
- version="0.11.0",
+ version="0.13.0",
author="PacificBiosciences",
author_email="devnet at pacb.com",
url="http://www.github.com/PacificBiosciences/ConsensusCore2",
diff --git a/src/AbstractIntegrator.cpp b/src/AbstractIntegrator.cpp
new file mode 100644
index 0000000..ce95b46
--- /dev/null
+++ b/src/AbstractIntegrator.cpp
@@ -0,0 +1,182 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <cassert>
+#include <cmath>
+#include <exception>
+#include <limits>
+#include <numeric>
+#include <utility>
+
+#include <pacbio/consensus/AbstractIntegrator.h>
+#include <pacbio/consensus/Sequence.h>
+
+#include "ModelFactory.h"
+
+namespace PacBio {
+namespace Consensus {
+
+namespace {
+
+constexpr double NEG_DBL_INF = -std::numeric_limits<double>::infinity();
+constexpr int NEG_INT_INF = -std::numeric_limits<int>::infinity();
+constexpr float NEG_FLOAT_INF = -std::numeric_limits<float>::infinity();
+
+} // anonymous namespace
+
+std::set<std::string> SupportedChemistries() { return ModelFactory::SupportedChemistries(); }
+IntegratorConfig::IntegratorConfig(const double minZScore, const double scoreDiff)
+ : MinZScore{minZScore}, ScoreDiff{scoreDiff}
+{
+ if (ScoreDiff < 0) {
+ throw std::runtime_error("Score diff must be > 0");
+ }
+}
+
+AbstractIntegrator::AbstractIntegrator(const IntegratorConfig& cfg) : cfg_{cfg} {}
+AbstractIntegrator::AbstractIntegrator(AbstractIntegrator&& ai)
+ : cfg_{ai.cfg_}, evals_{std::move(ai.evals_)}
+{
+}
+
+AbstractIntegrator::~AbstractIntegrator() {}
+State AbstractIntegrator::AddRead(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& read)
+{
+ // TODO(atoepfer) Why don't we add those reads and tag them as TEMPLATE_TOO_SMALL
+ // and effectively keep book about them? This logic should be
+ // in the Evaluator
+ if (read.TemplateEnd <= read.TemplateStart) throw std::invalid_argument("template span < 2!");
+
+ if (read.Length() < 2) throw std::invalid_argument("read span < 2!");
+
+ evals_.emplace_back(Evaluator(std::move(tpl), read, cfg_.MinZScore, cfg_.ScoreDiff));
+ return evals_.back().Status();
+}
+
+double AbstractIntegrator::LL(const Mutation& fwdMut) { return AccumulateNoInf(LLs(fwdMut)); }
+double AbstractIntegrator::LL() const { return AccumulateNoInf(LLs()); }
+std::vector<double> AbstractIntegrator::LLs(const Mutation& fwdMut)
+{
+ const Mutation revMut(ReverseComplement(fwdMut));
+
+ const auto functor = [&fwdMut, &revMut](Evaluator& eval) {
+ switch (eval.Strand()) {
+ case StrandType::FORWARD:
+ return eval.LL(fwdMut);
+ case StrandType::REVERSE:
+ return eval.LL(revMut);
+ case StrandType::UNMAPPED:
+ return NEG_DBL_INF;
+ default:
+ throw std::runtime_error("Unknown StrandType");
+ }
+ };
+
+ return TransformEvaluators<double>(functor);
+}
+
+std::vector<double> AbstractIntegrator::LLs() const
+{
+ const auto functor = [](const Evaluator& eval) { return eval.LL(); };
+ return TransformEvaluators<double>(functor);
+}
+
+std::vector<std::string> AbstractIntegrator::ReadNames() const
+{
+ return TransformEvaluators<std::string>([](const Evaluator& eval) { return eval.ReadName(); });
+}
+
+std::vector<int> AbstractIntegrator::NumFlipFlops() const
+{
+ return TransformEvaluators<int>([](const Evaluator& eval) { return eval.NumFlipFlops(); });
+}
+
+int AbstractIntegrator::MaxNumFlipFlops() const { return MaxElement<int>(NumFlipFlops()); }
+std::vector<float> AbstractIntegrator::AlphaPopulated() const
+{
+ return TransformEvaluators<float>([](const Evaluator& eval) { return eval.AlphaPopulated(); });
+}
+
+float AbstractIntegrator::MaxAlphaPopulated() const { return MaxElement<float>(AlphaPopulated()); }
+std::vector<float> AbstractIntegrator::BetaPopulated() const
+{
+ return TransformEvaluators<float>([](const Evaluator& eval) { return eval.BetaPopulated(); });
+}
+
+float AbstractIntegrator::MaxBetaPopulated() const { return MaxElement<float>(BetaPopulated()); }
+double AbstractIntegrator::AvgZScore() const
+{
+ double mean = 0.0, var = 0.0;
+ size_t n = 0;
+ for (const auto& eval : evals_) {
+ if (eval) {
+ double m, v;
+ std::tie(m, v) = eval.NormalParameters();
+ mean += m;
+ var += v;
+ ++n;
+ }
+ }
+ return (LL() / n - mean / n) / std::sqrt(var / n);
+}
+
+std::vector<double> AbstractIntegrator::ZScores() const
+{
+ return TransformEvaluators<double>([](const Evaluator& eval) { return eval.ZScore(); });
+}
+
+std::vector<std::pair<double, double>> AbstractIntegrator::NormalParameters() const
+{
+ return TransformEvaluators<std::pair<double, double>>(
+ [](const Evaluator& eval) { return eval.NormalParameters(); });
+}
+
+std::vector<State> AbstractIntegrator::States() const
+{
+ return TransformEvaluators<State>([](const Evaluator& eval) { return eval.Status(); });
+}
+
+std::vector<StrandType> AbstractIntegrator::StrandTypes() const
+{
+ return TransformEvaluators<StrandType>([](const Evaluator& eval) { return eval.Strand(); });
+}
+
+Mutation AbstractIntegrator::ReverseComplement(const Mutation& mut) const
+{
+ return Mutation(mut.Type, TemplateLength() - mut.End(), Complement(mut.Base));
+}
+
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/Evaluator.cpp b/src/Evaluator.cpp
index 98e8d59..f398aa8 100644
--- a/src/Evaluator.cpp
+++ b/src/Evaluator.cpp
@@ -46,137 +46,155 @@ namespace PacBio {
namespace Consensus {
namespace {
-constexpr double NEG_INF = -std::numeric_limits<double>::infinity();
+constexpr double NEG_DBL_INF = -std::numeric_limits<double>::infinity();
+constexpr int NEG_INT_INF = -std::numeric_limits<int>::infinity();
+constexpr float NEG_FLOAT_INF = -std::numeric_limits<float>::infinity();
constexpr size_t EXTEND_BUFFER_COLUMNS = 8;
} // anonymous namespace
-Evaluator::Evaluator(const EvaluatorState state) : impl_{nullptr}, state_{state}
+Evaluator::Evaluator(const State state) : impl_{nullptr}, curState_{state}
{
- if (state_ == EvaluatorState::VALID)
+ if (curState_ == State::VALID)
throw std::invalid_argument("cannot initialize a dummy Evaluator with VALID state");
}
Evaluator::Evaluator(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
const double minZScore, const double scoreDiff)
- : impl_{nullptr}, state_{EvaluatorState::VALID}
+ : impl_{nullptr}, curState_{State::VALID}
{
try {
impl_.reset(
new EvaluatorImpl(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
- const double zScore = impl_->ZScore();
-
- // the zscore filter is disabled under the following conditions
- if ((mr.Model.find("S/P1-C1") != std::string::npos) ||
- (mr.Model.find("S/P2-C2/prospective-compatible") != std::string::npos))
- goto end;
- if (minZScore <= -100.0) goto end;
- if (std::isnan(minZScore)) goto end;
-
- // TODO(lhepler): re-enable this check when the zscore bits are working again
- // assert(std::isfinite(zScore));
- if (!std::isfinite(zScore) || zScore < minZScore) state_ = EvaluatorState::POOR_ZSCORE;
- } catch (AlphaBetaMismatch&) {
- state_ = EvaluatorState::ALPHA_BETA_MISMATCH;
+ CheckZScore(minZScore, mr.Model);
+ } catch (const StateError& e) {
+ Status(e.WhatState());
}
-
-end:
- CheckInvariants();
-}
-
-Evaluator::Evaluator(Evaluator&& eval) : impl_{std::move(eval.impl_)}, state_{eval.state_}
-{
- CheckInvariants();
}
+Evaluator::Evaluator(Evaluator&& eval) : impl_{std::move(eval.impl_)}, curState_{eval.curState_} {}
Evaluator& Evaluator::operator=(Evaluator&& eval)
{
impl_ = move(eval.impl_);
- state_ = eval.state_;
+ curState_ = eval.curState_;
return *this;
}
Evaluator::~Evaluator() {}
size_t Evaluator::Length() const
{
- if (impl_) return impl_->recursor_->tpl_->Length();
+ if (IsValid()) return impl_->recursor_->tpl_->Length();
return 0;
}
-StrandEnum Evaluator::Strand() const
+StrandType Evaluator::Strand() const
{
- if (impl_) return impl_->recursor_->read_.Strand;
- return StrandEnum::UNMAPPED;
+ if (IsValid()) return impl_->recursor_->read_.Strand;
+ return StrandType::UNMAPPED;
}
-Evaluator::operator bool() const { return state_ == EvaluatorState::VALID; }
std::string Evaluator::ReadName() const
{
- if (impl_) {
- return impl_->ReadName();
- } else {
- return "*Inactive evaluator*";
- }
+ if (IsValid()) return impl_->ReadName();
+ return "*Inactive evaluator*";
}
double Evaluator::LL(const Mutation& mut)
{
- if (impl_) return impl_->LL(mut);
- return NEG_INF;
+ if (IsValid()) return impl_->LL(mut);
+ return NEG_DBL_INF;
}
double Evaluator::LL() const
{
- if (impl_) return impl_->LL();
- return NEG_INF;
+ if (IsValid()) return impl_->LL();
+ return NEG_DBL_INF;
}
std::pair<double, double> Evaluator::NormalParameters() const
{
- if (impl_) return impl_->NormalParameters();
- return std::make_pair(NEG_INF, NEG_INF);
+ if (IsValid()) return impl_->NormalParameters();
+ return std::make_pair(NEG_DBL_INF, NEG_DBL_INF);
}
double Evaluator::ZScore() const
{
- if (impl_) return impl_->ZScore();
- return NEG_INF;
+ if (IsValid()) return impl_->ZScore();
+ return NEG_DBL_INF;
+}
+
+int Evaluator::NumFlipFlops() const
+{
+ if (IsValid()) return impl_->NumFlipFlops();
+ return NEG_INT_INF;
+}
+
+float Evaluator::AlphaPopulated() const
+{
+ if (IsValid()) return impl_->AlphaPopulated();
+ return NEG_FLOAT_INF;
+}
+
+float Evaluator::BetaPopulated() const
+{
+ if (IsValid()) return impl_->BetaPopulated();
+ return NEG_FLOAT_INF;
}
bool Evaluator::ApplyMutation(const Mutation& mut)
{
- if (!impl_) return false;
- const bool mutApplied = impl_->ApplyMutation(mut);
- CheckInvariants();
+ bool mutApplied = false;
+ if (IsValid()) {
+ try {
+ mutApplied = impl_->ApplyMutation(mut);
+ } catch (const StateError& e) {
+ Status(e.WhatState());
+ }
+ }
return mutApplied;
}
bool Evaluator::ApplyMutations(std::vector<Mutation>* muts)
{
- if (!impl_) return false;
- const bool mutsApplied = impl_->ApplyMutations(muts);
- CheckInvariants();
+ bool mutsApplied = false;
+ if (IsValid()) {
+ try {
+ mutsApplied = impl_->ApplyMutations(muts);
+ } catch (const StateError& e) {
+ Status(e.WhatState());
+ }
+ }
return mutsApplied;
}
-EvaluatorState Evaluator::Status() const { return state_; }
-void Evaluator::Release()
+void Evaluator::Status(State nextState)
{
- state_ = EvaluatorState::DISABLED;
- impl_.reset(nullptr);
+ // Allow transition from VALID to anything and
+ // from anything to DISABLED
+ if (curState_ == State::VALID)
+ curState_ = nextState;
+ else
+ std::cerr << "Log this behaviour and return" << std::endl;
+
+ if (curState_ != State::VALID) impl_.reset(nullptr);
}
-// TODO: this is no longer a simple "invariants check" function---a CheckInvariants function
-// should be const and only do asserts on whether the state is valid. Rename or refactor this. The
-// state in this class
-// is a bit too complex for my taste.
-void Evaluator::CheckInvariants()
+void Evaluator::Release() { Status(State::MANUALLY_RELEASED); }
+void Evaluator::CheckZScore(const double minZScore, const std::string& model)
{
- if (!impl_) return;
- if (impl_->recursor_->tpl_->Length() < 2) state_ = EvaluatorState::NULL_TEMPLATE;
- if (state_ != EvaluatorState::VALID) impl_.reset(nullptr);
+ // the zscore filter is disabled under the following conditions
+ // - unsupported model
+ for (const auto& m : {"S/P1-C1", "S/P2-C2/prospective-compatible"})
+ if (model.find(m) != std::string::npos) return;
+
+ // - threshold undefined or too low
+ if (std::isnan(minZScore) || minZScore <= -100.0) return;
+
+ const double zScore = impl_->ZScore();
+ // TODO(lhepler): re-enable this check when the zscore bits are working again
+ // assert(std::isfinite(zScore));
+ if (!std::isfinite(zScore) || zScore < minZScore) Status(State::POOR_ZSCORE);
}
-
} // namespace Consensus
} // namespace PacBio
diff --git a/src/EvaluatorImpl.cpp b/src/EvaluatorImpl.cpp
index e93aaf8..25e296f 100644
--- a/src/EvaluatorImpl.cpp
+++ b/src/EvaluatorImpl.cpp
@@ -90,7 +90,7 @@ EvaluatorImpl::EvaluatorImpl(std::unique_ptr<AbstractTemplate>&& tpl, const Mapp
, beta_(mr.Length() + 1, recursor_->tpl_->Length() + 1, ScaledMatrix::REVERSE)
, extendBuffer_(mr.Length() + 1, EXTEND_BUFFER_COLUMNS, ScaledMatrix::FORWARD)
{
- recursor_->FillAlphaBeta(alpha_, beta_);
+ numFlipFlops_ = recursor_->FillAlphaBeta(alpha_, beta_);
}
std::string EvaluatorImpl::ReadName() const { return recursor_->read_.Name; }
@@ -99,7 +99,7 @@ double EvaluatorImpl::LL(const Mutation& mut_)
// apply the virtual mutation
boost::optional<Mutation> mut(recursor_->tpl_->Mutate(mut_));
- // if the resulting template is 0, simulate NULL_TEMPLATE (removal)
+ // if the resulting template is 0, simulate TEMPLATE_TOO_SMALL (removal)
if (recursor_->tpl_->Length() == 0) {
recursor_->tpl_->Reset();
return 0.0;
diff --git a/src/EvaluatorImpl.h b/src/EvaluatorImpl.h
index e64c332..4f5c09f 100644
--- a/src/EvaluatorImpl.h
+++ b/src/EvaluatorImpl.h
@@ -68,6 +68,9 @@ public:
bool ApplyMutation(const Mutation& mut);
bool ApplyMutations(std::vector<Mutation>* muts);
+ int NumFlipFlops() const { return numFlipFlops_; }
+ float AlphaPopulated() const { return alpha_.UsedEntriesRatio(); }
+ float BetaPopulated() const { return beta_.UsedEntriesRatio(); }
private:
void Recalculate();
@@ -80,6 +83,8 @@ private:
ScaledMatrix beta_;
ScaledMatrix extendBuffer_;
+ int numFlipFlops_;
+
friend class Evaluator;
};
diff --git a/src/Integrator.cpp b/src/Integrator.cpp
deleted file mode 100644
index 6a5be4e..0000000
--- a/src/Integrator.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-#include <cassert>
-#include <cmath>
-#include <limits>
-#include <utility>
-
-#include <pacbio/consensus/Integrator.h>
-#include <pacbio/consensus/Sequence.h>
-
-#include "ModelFactory.h"
-
-namespace PacBio {
-namespace Consensus {
-
-constexpr double NaN = -std::numeric_limits<double>::quiet_NaN();
-
-std::set<std::string> SupportedChemistries() { return ModelFactory::SupportedChemistries(); }
-IntegratorConfig::IntegratorConfig(const double minZScore, const double scoreDiff)
- : MinZScore{minZScore}, ScoreDiff{scoreDiff}
-{
- if (ScoreDiff < 0) {
- throw std::runtime_error("Score diff must be > 0");
- }
-}
-
-AbstractIntegrator::AbstractIntegrator(const IntegratorConfig& cfg) : cfg_{cfg} {}
-AbstractIntegrator::AbstractIntegrator(AbstractIntegrator&& ai)
- : cfg_{ai.cfg_}, evals_{std::move(ai.evals_)}
-{
-}
-
-AbstractIntegrator::~AbstractIntegrator() {}
-AddReadResult AbstractIntegrator::AddRead(std::unique_ptr<AbstractTemplate>&& tpl,
- const MappedRead& read)
-{
- if (read.TemplateEnd <= read.TemplateStart || read.TemplateEnd - read.TemplateStart < 2)
- throw std::invalid_argument("template span < 2!");
-
- if (read.Length() < 2) throw std::invalid_argument("read span < 2!");
-
- evals_.emplace_back(Evaluator(std::move(tpl), read, cfg_.MinZScore, cfg_.ScoreDiff));
-
- const auto status = evals_.back().Status();
-
- if (status == EvaluatorState::ALPHA_BETA_MISMATCH)
- return AddReadResult::ALPHA_BETA_MISMATCH;
- else if (status == EvaluatorState::POOR_ZSCORE)
- return AddReadResult::POOR_ZSCORE;
-
- assert(status == EvaluatorState::VALID);
-
- return AddReadResult::SUCCESS;
-}
-
-double AbstractIntegrator::LL(const Mutation& fwdMut)
-{
- const Mutation revMut(ReverseComplement(fwdMut));
- double ll = 0.0;
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- ll += eval.LL(fwdMut);
- else if (eval.Strand() == StrandEnum::REVERSE)
- ll += eval.LL(revMut);
- }
- return ll;
-}
-
-double AbstractIntegrator::LL() const
-{
- double ll = 0.0;
- for (const auto& eval : evals_) {
- if (eval) ll += eval.LL();
- }
- return ll;
-}
-
-std::vector<double> AbstractIntegrator::LLs(const Mutation& fwdMut)
-{
- const Mutation revMut(ReverseComplement(fwdMut));
- std::vector<double> lls;
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- lls.push_back(eval.LL(fwdMut));
- else if (eval.Strand() == StrandEnum::REVERSE)
- lls.push_back(eval.LL(revMut));
- else {
- // inactive reads get a strand of "unmapped"
- lls.push_back(NaN);
- }
- }
- return lls;
-}
-
-std::vector<double> AbstractIntegrator::LLs() const
-{
- std::vector<double> lls;
- for (auto& eval : evals_) {
- if (eval)
- lls.push_back(eval.LL());
- else
- lls.push_back(NaN);
- }
- return lls;
-}
-
-std::vector<std::string> AbstractIntegrator::ReadNames() const
-{
- std::vector<std::string> readNames;
- for (const Evaluator& eval : evals_) {
- readNames.push_back(eval.ReadName());
- }
- return readNames;
-}
-
-double AbstractIntegrator::AvgZScore() const
-{
- double mean = 0.0, var = 0.0;
- size_t n = 0;
- for (const auto& eval : evals_) {
- if (eval) {
- double m, v;
- std::tie(m, v) = eval.NormalParameters();
- mean += m;
- var += v;
- ++n;
- }
- }
- return (LL() / n - mean / n) / std::sqrt(var / n);
-}
-
-std::vector<double> AbstractIntegrator::ZScores() const
-{
- std::vector<double> results;
- results.reserve(evals_.size());
- for (const auto& eval : evals_) {
- if (eval) {
- double mean, var;
- std::tie(mean, var) = eval.NormalParameters();
- results.emplace_back((eval.LL() - mean) / std::sqrt(var));
- } else
- results.emplace_back(std::numeric_limits<double>::quiet_NaN());
- }
- return results;
-}
-std::vector<std::pair<double, double>> AbstractIntegrator::NormalParameters() const
-{
- std::vector<std::pair<double, double>> results;
- results.reserve(evals_.size());
- for (const auto& eval : evals_) {
- if (eval) {
- results.emplace_back(eval.NormalParameters());
- } else {
- auto nan = std::numeric_limits<double>::quiet_NaN();
- results.emplace_back(std::pair<double, double>(nan, nan));
- }
- }
- return results;
-}
-
-Mutation AbstractIntegrator::ReverseComplement(const Mutation& mut) const
-{
- return Mutation(mut.Type, TemplateLength() - mut.End(), Complement(mut.Base));
-}
-
-MonoMolecularIntegrator::MonoMolecularIntegrator(const std::string& tpl,
- const IntegratorConfig& cfg, const SNR& snr,
- const std::string& model)
- : AbstractIntegrator(cfg)
- , mdl_{model}
- , snr_{snr}
- , fwdTpl_(tpl, ModelFactory::Create(mdl_, snr_))
- , revTpl_(::PacBio::Consensus::ReverseComplement(tpl), ModelFactory::Create(mdl_, snr_))
-{
-}
-
-MonoMolecularIntegrator::MonoMolecularIntegrator(MonoMolecularIntegrator&& mmi)
- : AbstractIntegrator(std::move(mmi))
- , mdl_{mmi.mdl_}
- , snr_{mmi.snr_}
- , fwdTpl_{std::move(mmi.fwdTpl_)}
- , revTpl_{std::move(mmi.revTpl_)}
-{
-}
-
-AddReadResult MonoMolecularIntegrator::AddRead(const MappedRead& read)
-{
- if (read.Model != mdl_) throw std::invalid_argument("invalid model for integrator!");
- if (read.SignalToNoise != snr_) throw std::invalid_argument("invalid SNR for integrator!");
-
- if (read.Strand == StrandEnum::FORWARD)
- return AbstractIntegrator::AddRead(
- std::unique_ptr<AbstractTemplate>(new VirtualTemplate(
- fwdTpl_, read.TemplateStart, read.TemplateEnd, read.PinStart, read.PinEnd)),
- read);
-
- else if (read.Strand == StrandEnum::REVERSE)
- return AbstractIntegrator::AddRead(
- std::unique_ptr<AbstractTemplate>(new VirtualTemplate(
- revTpl_, TemplateLength() - read.TemplateEnd, TemplateLength() - read.TemplateStart,
- read.PinEnd, read.PinStart)),
- read);
-
- throw std::invalid_argument("read is unmapped!");
-}
-
-size_t MonoMolecularIntegrator::TemplateLength() const { return fwdTpl_.TrueLength(); }
-char MonoMolecularIntegrator::operator[](const size_t i) const { return fwdTpl_[i].Base; }
-MonoMolecularIntegrator::operator std::string() const
-{
- std::string result;
-
- result.resize(fwdTpl_.Length());
-
- for (size_t i = 0; i < fwdTpl_.Length(); ++i)
- result[i] = fwdTpl_[i].Base;
-
- return result;
-}
-
-double MonoMolecularIntegrator::LL(const Mutation& fwdMut)
-{
- const Mutation revMut(ReverseComplement(fwdMut));
- fwdTpl_.Mutate(fwdMut);
- revTpl_.Mutate(revMut);
- const double ll = AbstractIntegrator::LL(fwdMut);
- fwdTpl_.Reset();
- revTpl_.Reset();
- return ll;
-}
-
-void MonoMolecularIntegrator::ApplyMutation(const Mutation& fwdMut)
-{
- const Mutation revMut(ReverseComplement(fwdMut));
-
- fwdTpl_.ApplyMutation(fwdMut);
- revTpl_.ApplyMutation(revMut);
-
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- eval.ApplyMutation(fwdMut);
- else if (eval.Strand() == StrandEnum::REVERSE)
- eval.ApplyMutation(revMut);
- }
-
- assert(fwdTpl_.Length() == revTpl_.Length());
-
-#ifndef NDEBUG
- std::string fwd;
- std::string rev;
-
- for (size_t i = 0; i < TemplateLength(); ++i) {
- fwd.push_back(fwdTpl_[i].Base);
- rev.push_back(revTpl_[i].Base);
- }
-
-#endif
-
- assert(fwd == ::PacBio::Consensus::ReverseComplement(rev));
-}
-
-void MonoMolecularIntegrator::ApplyMutations(std::vector<Mutation>* fwdMuts)
-{
- std::vector<Mutation> revMuts;
-
- for (auto it = fwdMuts->crbegin(); it != fwdMuts->crend(); ++it)
- revMuts.emplace_back(ReverseComplement(*it));
-
- fwdTpl_.ApplyMutations(fwdMuts);
- revTpl_.ApplyMutations(&revMuts);
-
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- eval.ApplyMutations(fwdMuts);
- else if (eval.Strand() == StrandEnum::REVERSE)
- eval.ApplyMutations(&revMuts);
- }
-
- assert(fwdTpl_.Length() == revTpl_.Length());
-
-#ifndef NDEBUG
- std::string fwd;
- std::string rev;
-
- for (size_t i = 0; i < TemplateLength(); ++i) {
- fwd.push_back(fwdTpl_[i].Base);
- rev.push_back(revTpl_[i].Base);
- }
-#endif
-
- assert(fwd == ::PacBio::Consensus::ReverseComplement(rev));
-}
-
-MultiMolecularIntegrator::MultiMolecularIntegrator(const std::string& tpl,
- const IntegratorConfig& cfg)
- : AbstractIntegrator(cfg), fwdTpl_{tpl}, revTpl_{::PacBio::Consensus::ReverseComplement(tpl)}
-{
-}
-
-AddReadResult MultiMolecularIntegrator::AddRead(const MappedRead& read)
-{
- return AbstractIntegrator::AddRead(GetTemplate(read, read.SignalToNoise), read);
-}
-
-size_t MultiMolecularIntegrator::TemplateLength() const { return fwdTpl_.length(); }
-char MultiMolecularIntegrator::operator[](const size_t i) const { return fwdTpl_[i]; }
-MultiMolecularIntegrator::operator std::string() const { return fwdTpl_; }
-void MultiMolecularIntegrator::ApplyMutation(const Mutation& fwdMut)
-{
- const Mutation revMut(ReverseComplement(fwdMut));
-
- std::vector<Mutation> fwdMuts = {fwdMut};
- std::vector<Mutation> revMuts = {revMut};
-
- fwdTpl_ = ::PacBio::Consensus::ApplyMutations(fwdTpl_, &fwdMuts);
- revTpl_ = ::PacBio::Consensus::ApplyMutations(revTpl_, &revMuts);
-
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- eval.ApplyMutation(fwdMut);
- else if (eval.Strand() == StrandEnum::REVERSE)
- eval.ApplyMutation(revMut);
- }
-
- assert(fwdTpl_.length() == revTpl_.length());
- assert(fwdTpl_ == ::PacBio::Consensus::ReverseComplement(revTpl_));
-}
-
-void MultiMolecularIntegrator::ApplyMutations(std::vector<Mutation>* fwdMuts)
-{
- std::vector<Mutation> revMuts;
-
- for (auto it = fwdMuts->crbegin(); it != fwdMuts->crend(); ++it)
- revMuts.emplace_back(ReverseComplement(*it));
-
- fwdTpl_ = ::PacBio::Consensus::ApplyMutations(fwdTpl_, fwdMuts);
- revTpl_ = ::PacBio::Consensus::ApplyMutations(revTpl_, &revMuts);
-
- for (auto& eval : evals_) {
- if (eval.Strand() == StrandEnum::FORWARD)
- eval.ApplyMutations(fwdMuts);
- else if (eval.Strand() == StrandEnum::REVERSE)
- eval.ApplyMutations(&revMuts);
- }
-
- assert(fwdTpl_.length() == revTpl_.length());
- assert(fwdTpl_ == ::PacBio::Consensus::ReverseComplement(revTpl_));
-}
-
-std::unique_ptr<AbstractTemplate> MultiMolecularIntegrator::GetTemplate(const MappedRead& read,
- const SNR& snr)
-{
- const size_t len = read.TemplateEnd - read.TemplateStart;
-
- if (read.Strand == StrandEnum::FORWARD) {
- const size_t start = read.TemplateStart;
- const size_t end = read.TemplateEnd;
-
- return std::unique_ptr<AbstractTemplate>(
- new Template(fwdTpl_.substr(start, len), ModelFactory::Create(read.Model, snr), start,
- end, read.PinStart, read.PinEnd));
- } else if (read.Strand == StrandEnum::REVERSE) {
- const size_t start = revTpl_.size() - read.TemplateEnd;
- const size_t end = revTpl_.size() - read.TemplateStart;
-
- return std::unique_ptr<AbstractTemplate>(
- new Template(revTpl_.substr(start, len), ModelFactory::Create(read.Model, snr), start,
- end, read.PinEnd, read.PinStart));
- }
-
- throw std::invalid_argument("read is unmapped!");
-}
-
-} // namespace Consensus
-} // namespace PacBio
diff --git a/src/MonoMolecularIntegrator.cpp b/src/MonoMolecularIntegrator.cpp
new file mode 100644
index 0000000..fc80c55
--- /dev/null
+++ b/src/MonoMolecularIntegrator.cpp
@@ -0,0 +1,182 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include <pacbio/consensus/MonoMolecularIntegrator.h>
+#include <pacbio/consensus/Sequence.h>
+
+#include "ModelFactory.h"
+
+namespace PacBio {
+namespace Consensus {
+
+MonoMolecularIntegrator::MonoMolecularIntegrator(const std::string& tpl,
+ const IntegratorConfig& cfg, const SNR& snr,
+ const std::string& model)
+ : AbstractIntegrator(cfg)
+ , mdl_{model}
+ , snr_{snr}
+ , fwdTpl_(tpl, ModelFactory::Create(mdl_, snr_))
+ , revTpl_(::PacBio::Consensus::ReverseComplement(tpl), ModelFactory::Create(mdl_, snr_))
+{
+}
+
+MonoMolecularIntegrator::MonoMolecularIntegrator(MonoMolecularIntegrator&& mmi)
+ : AbstractIntegrator(std::move(mmi))
+ , mdl_{mmi.mdl_}
+ , snr_{mmi.snr_}
+ , fwdTpl_{std::move(mmi.fwdTpl_)}
+ , revTpl_{std::move(mmi.revTpl_)}
+{
+}
+
+State MonoMolecularIntegrator::AddRead(const MappedRead& read)
+{
+ if (read.Model != mdl_) throw std::invalid_argument("invalid model for integrator!");
+ if (read.SignalToNoise != snr_) throw std::invalid_argument("invalid SNR for integrator!");
+
+ try {
+ if (read.Strand == StrandType::FORWARD)
+ return AbstractIntegrator::AddRead(
+ std::unique_ptr<AbstractTemplate>(new VirtualTemplate(
+ fwdTpl_, read.TemplateStart, read.TemplateEnd, read.PinStart, read.PinEnd)),
+ read);
+
+ else if (read.Strand == StrandType::REVERSE)
+ return AbstractIntegrator::AddRead(
+ std::unique_ptr<AbstractTemplate>(new VirtualTemplate(
+ revTpl_, TemplateLength() - read.TemplateEnd,
+ TemplateLength() - read.TemplateStart, read.PinEnd, read.PinStart)),
+ read);
+ } catch (const TemplateTooSmall& e) {
+ return State::TEMPLATE_TOO_SMALL;
+ }
+
+ throw std::invalid_argument("read is unmapped!");
+}
+
+size_t MonoMolecularIntegrator::TemplateLength() const { return fwdTpl_.TrueLength(); }
+char MonoMolecularIntegrator::operator[](const size_t i) const { return fwdTpl_[i].Base; }
+MonoMolecularIntegrator::operator std::string() const
+{
+ std::string result;
+
+ result.resize(fwdTpl_.Length());
+
+ for (size_t i = 0; i < fwdTpl_.Length(); ++i)
+ result[i] = fwdTpl_[i].Base;
+
+ return result;
+}
+
+double MonoMolecularIntegrator::LL(const Mutation& fwdMut)
+{
+ const Mutation revMut(ReverseComplement(fwdMut));
+ fwdTpl_.Mutate(fwdMut);
+ revTpl_.Mutate(revMut);
+ const double ll = AbstractIntegrator::LL(fwdMut);
+ fwdTpl_.Reset();
+ revTpl_.Reset();
+ return ll;
+}
+
+void MonoMolecularIntegrator::ApplyMutation(const Mutation& fwdMut)
+{
+ const Mutation revMut(ReverseComplement(fwdMut));
+
+ fwdTpl_.ApplyMutation(fwdMut);
+ revTpl_.ApplyMutation(revMut);
+
+ for (auto& eval : evals_) {
+ if (eval.Strand() == StrandType::FORWARD)
+ eval.ApplyMutation(fwdMut);
+ else if (eval.Strand() == StrandType::REVERSE)
+ eval.ApplyMutation(revMut);
+ }
+
+ assert(fwdTpl_.Length() == revTpl_.Length());
+
+#ifndef NDEBUG
+ std::string fwd;
+ std::string rev;
+
+ for (size_t i = 0; i < TemplateLength(); ++i) {
+ fwd.push_back(fwdTpl_[i].Base);
+ rev.push_back(revTpl_[i].Base);
+ }
+
+#endif
+
+ assert(fwd == ::PacBio::Consensus::ReverseComplement(rev));
+}
+
+void MonoMolecularIntegrator::ApplyMutations(std::vector<Mutation>* fwdMuts)
+{
+ std::vector<Mutation> revMuts;
+
+ for (auto it = fwdMuts->crbegin(); it != fwdMuts->crend(); ++it)
+ revMuts.emplace_back(ReverseComplement(*it));
+
+ fwdTpl_.ApplyMutations(fwdMuts);
+ revTpl_.ApplyMutations(&revMuts);
+
+ for (auto& eval : evals_) {
+ if (eval.Strand() == StrandType::FORWARD)
+ eval.ApplyMutations(fwdMuts);
+ else if (eval.Strand() == StrandType::REVERSE)
+ eval.ApplyMutations(&revMuts);
+ }
+
+ assert(fwdTpl_.Length() == revTpl_.Length());
+
+#ifndef NDEBUG
+ std::string fwd;
+ std::string rev;
+
+ for (size_t i = 0; i < TemplateLength(); ++i) {
+ fwd.push_back(fwdTpl_[i].Base);
+ rev.push_back(revTpl_[i].Base);
+ }
+#endif
+
+ assert(fwd == ::PacBio::Consensus::ReverseComplement(rev));
+}
+
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/MultiMolecularIntegrator.cpp b/src/MultiMolecularIntegrator.cpp
new file mode 100644
index 0000000..0206f80
--- /dev/null
+++ b/src/MultiMolecularIntegrator.cpp
@@ -0,0 +1,134 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include <pacbio/consensus/MultiMolecularIntegrator.h>
+#include <pacbio/consensus/Sequence.h>
+
+#include "ModelFactory.h"
+
+namespace PacBio {
+namespace Consensus {
+
+MultiMolecularIntegrator::MultiMolecularIntegrator(const std::string& tpl,
+ const IntegratorConfig& cfg)
+ : AbstractIntegrator(cfg), fwdTpl_{tpl}, revTpl_{::PacBio::Consensus::ReverseComplement(tpl)}
+{
+}
+
+State MultiMolecularIntegrator::AddRead(const MappedRead& read)
+{
+ try {
+ return AbstractIntegrator::AddRead(GetTemplate(read, read.SignalToNoise), read);
+ } catch (const TemplateTooSmall& e) {
+ return State::TEMPLATE_TOO_SMALL;
+ }
+}
+
+size_t MultiMolecularIntegrator::TemplateLength() const { return fwdTpl_.length(); }
+char MultiMolecularIntegrator::operator[](const size_t i) const { return fwdTpl_[i]; }
+MultiMolecularIntegrator::operator std::string() const { return fwdTpl_; }
+void MultiMolecularIntegrator::ApplyMutation(const Mutation& fwdMut)
+{
+ const Mutation revMut(ReverseComplement(fwdMut));
+
+ std::vector<Mutation> fwdMuts = {fwdMut};
+ std::vector<Mutation> revMuts = {revMut};
+
+ fwdTpl_ = ::PacBio::Consensus::ApplyMutations(fwdTpl_, &fwdMuts);
+ revTpl_ = ::PacBio::Consensus::ApplyMutations(revTpl_, &revMuts);
+
+ for (auto& eval : evals_) {
+ if (eval.Strand() == StrandType::FORWARD)
+ eval.ApplyMutation(fwdMut);
+ else if (eval.Strand() == StrandType::REVERSE)
+ eval.ApplyMutation(revMut);
+ }
+
+ assert(fwdTpl_.length() == revTpl_.length());
+ assert(fwdTpl_ == ::PacBio::Consensus::ReverseComplement(revTpl_));
+}
+
+void MultiMolecularIntegrator::ApplyMutations(std::vector<Mutation>* fwdMuts)
+{
+ std::vector<Mutation> revMuts;
+
+ for (auto it = fwdMuts->crbegin(); it != fwdMuts->crend(); ++it)
+ revMuts.emplace_back(ReverseComplement(*it));
+
+ fwdTpl_ = ::PacBio::Consensus::ApplyMutations(fwdTpl_, fwdMuts);
+ revTpl_ = ::PacBio::Consensus::ApplyMutations(revTpl_, &revMuts);
+
+ for (auto& eval : evals_) {
+ if (eval.Strand() == StrandType::FORWARD)
+ eval.ApplyMutations(fwdMuts);
+ else if (eval.Strand() == StrandType::REVERSE)
+ eval.ApplyMutations(&revMuts);
+ }
+
+ assert(fwdTpl_.length() == revTpl_.length());
+ assert(fwdTpl_ == ::PacBio::Consensus::ReverseComplement(revTpl_));
+}
+
+std::unique_ptr<AbstractTemplate> MultiMolecularIntegrator::GetTemplate(const MappedRead& read,
+ const SNR& snr)
+{
+ const size_t len = read.TemplateEnd - read.TemplateStart;
+
+ if (read.Strand == StrandType::FORWARD) {
+ const size_t start = read.TemplateStart;
+ const size_t end = read.TemplateEnd;
+
+ return std::unique_ptr<AbstractTemplate>(
+ new Template(fwdTpl_.substr(start, len), ModelFactory::Create(read.Model, snr), start,
+ end, read.PinStart, read.PinEnd));
+ } else if (read.Strand == StrandType::REVERSE) {
+ const size_t start = revTpl_.size() - read.TemplateEnd;
+ const size_t end = revTpl_.size() - read.TemplateStart;
+
+ return std::unique_ptr<AbstractTemplate>(
+ new Template(revTpl_.substr(start, len), ModelFactory::Create(read.Model, snr), start,
+ end, read.PinEnd, read.PinStart));
+ }
+
+ throw std::invalid_argument("read is unmapped!");
+}
+
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/Polish.cpp b/src/Polish.cpp
index 3a5ec5b..bdc91b7 100644
--- a/src/Polish.cpp
+++ b/src/Polish.cpp
@@ -46,7 +46,7 @@
#include <boost/optional.hpp>
-#include <pacbio/consensus/Integrator.h>
+#include <pacbio/consensus/AbstractIntegrator.h>
#include <pacbio/consensus/Polish.h>
using namespace std;
@@ -121,8 +121,8 @@ vector<Mutation> BestMutations(list<ScoredMutation>* scoredMuts, const size_t se
if (separation == 0) throw invalid_argument("nonzero separation required");
while (!scoredMuts->empty()) {
- const auto& mut = *max_element(scoredMuts->begin(), scoredMuts->end(),
- ScoredMutation::ScoreComparer);
+ const auto& mut =
+ *max_element(scoredMuts->begin(), scoredMuts->end(), ScoredMutation::ScoreComparer);
result.emplace_back(mut);
@@ -136,9 +136,8 @@ vector<Mutation> BestMutations(list<ScoredMutation>* scoredMuts, const size_t se
return result;
}
-vector<Mutation> NearbyMutations(vector<Mutation>* applied,
- vector<Mutation>* centers, const AbstractIntegrator& ai,
- const size_t neighborhood)
+vector<Mutation> NearbyMutations(vector<Mutation>* applied, vector<Mutation>* centers,
+ const AbstractIntegrator& ai, const size_t neighborhood)
{
const size_t len = ai.TemplateLength();
const auto clamp = [len](const int i) { return max(0, min<int>(len, i)); };
@@ -191,14 +190,14 @@ vector<Mutation> NearbyMutations(vector<Mutation>* applied,
return result;
}
-tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg)
+PolishResult Polish(AbstractIntegrator* ai, const PolishConfig& cfg)
{
vector<Mutation> muts = Mutations(*ai);
hash<string> hashFn;
size_t oldTpl = hashFn(*ai);
set<size_t> history = {oldTpl};
- size_t nTested = 0;
- size_t nApplied = 0;
+
+ PolishResult result;
for (size_t i = 0; i < cfg.MaximumIterations; ++i) {
// find the best mutations given our parameters
@@ -209,7 +208,7 @@ tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& c
for (const auto& mut : muts) {
const double ll = ai->LL(mut);
if (ll > LL) scoredMuts.emplace_back(mut.WithScore(ll));
- ++nTested;
+ ++result.mutationsTested;
}
// take best mutations in separation window, apply them
@@ -217,10 +216,19 @@ tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& c
}
// convergence!!
- if (muts.empty()) return make_tuple(true, nTested, nApplied);
+ if (muts.empty()) {
+ result.hasConverged = true;
+ return result;
+ }
const size_t newTpl = hashFn(ApplyMutations(*ai, &muts));
+ const auto diagnostics = [&result](AbstractIntegrator* ai) {
+ result.maxAlphaPopulated.emplace_back(ai->MaxAlphaPopulated());
+ result.maxBetaPopulated.emplace_back(ai->MaxBetaPopulated());
+ result.maxNumFlipFlops.emplace_back(ai->MaxNumFlipFlops());
+ };
+
if (history.find(newTpl) != history.end()) {
/* Cyclic behavior guard - Dave A. found some edge cases where the
template was mutating back to an earlier version. This is a bad
@@ -229,10 +237,12 @@ tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& c
made removing muts X + Y beneficial, then you can break that
inifinite loop by just applying X or Y, as presumably this removes
the interaction between them that leads to the cycling behavior.
- This step is just a heuristic work around that was found. */
+ This step is just a heuristic work around that was found. */
ai->ApplyMutation(muts.front());
oldTpl = hashFn(*ai);
- ++nApplied;
+ ++result.mutationsApplied;
+
+ diagnostics(ai);
// get the mutations for the next round
vector<Mutation> applied = {muts.front()};
@@ -240,7 +250,9 @@ tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& c
} else {
ai->ApplyMutations(&muts);
oldTpl = newTpl;
- nApplied += muts.size();
+ result.mutationsApplied += muts.size();
+
+ diagnostics(ai);
// get the mutations for the next round
muts = NearbyMutations(&muts, &muts, *ai, cfg.MutationNeighborhood);
@@ -250,7 +262,7 @@ tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& c
history.insert(oldTpl);
}
- return make_tuple(false, nTested, nApplied);
+ return result;
}
namespace { // anonymous
diff --git a/src/Read.cpp b/src/Read.cpp
index 5f8415f..3779000 100644
--- a/src/Read.cpp
+++ b/src/Read.cpp
@@ -48,17 +48,12 @@ SNR::SNR(const std::vector<double>& snrs) : A(snrs[0]), C(snrs[1]), G(snrs[2]),
}
namespace {
- double clamp(double val, double lo, double hi)
- {
- return std::min(std::max(val, lo), hi);
- }
+double clamp(double val, double lo, double hi) { return std::min(std::max(val, lo), hi); }
} // namespace
SNR ClampSNR(const SNR& val, const SNR& lo, const SNR& hi)
{
- return SNR(clamp(val.A, lo.A, hi.A),
- clamp(val.C, lo.C, hi.C),
- clamp(val.G, lo.G, hi.G),
+ return SNR(clamp(val.A, lo.A, hi.A), clamp(val.C, lo.C, hi.C), clamp(val.G, lo.G, hi.G),
clamp(val.T, lo.T, hi.T));
}
@@ -68,7 +63,7 @@ Read::Read(const std::string& name, const std::string& seq, const std::vector<ui
{
}
-MappedRead::MappedRead(const Read& read, StrandEnum strand, size_t templateStart,
+MappedRead::MappedRead(const Read& read, StrandType strand, size_t templateStart,
size_t templateEnd, bool pinStart, bool pinEnd)
: Read(read)
, Strand{strand}
@@ -82,12 +77,19 @@ MappedRead::MappedRead(const Read& read, StrandEnum strand, size_t templateStart
std::ostream& operator<<(std::ostream& os, const MappedRead& mr)
{
os << "MappedRead(Read(\"" << mr.Name << "\", \"" << mr.Seq << "\", \"" << mr.Model << "\"), ";
- if (mr.Strand == StrandEnum::FORWARD)
- os << "StrandEnum_FORWARD, ";
- else if (mr.Strand == StrandEnum::REVERSE)
- os << "StrandEnum_REVERSE, ";
- else if (mr.Strand == StrandEnum::UNMAPPED)
- os << "StrandEnum_UNMAPPED, ";
+ switch (mr.Strand) {
+ case StrandType::FORWARD:
+ os << "StrandType_FORWARD, ";
+ break;
+ case StrandType::REVERSE:
+ os << "StrandType_REVERSE, ";
+ break;
+ case StrandType::UNMAPPED:
+ os << "StrandType_UNMAPPED, ";
+ break;
+ default:
+ throw std::runtime_error("Unsupported Strand");
+ }
os << mr.TemplateStart << ", " << mr.TemplateEnd << ", ";
os << mr.PinStart << ", " << mr.PinEnd << ")";
return os;
diff --git a/src/Recursor.h b/src/Recursor.h
index 63fe26d..88aa880 100644
--- a/src/Recursor.h
+++ b/src/Recursor.h
@@ -68,7 +68,7 @@ public:
/// This routine will fill the alpha and beta matrices, ensuring
/// that the score computed from the alpha and beta recursions are
/// identical, refilling back-and-forth if necessary.
- size_t FillAlphaBeta(M& alpha, M& beta) const throw(AlphaBetaMismatch);
+ size_t FillAlphaBeta(M& alpha, M& beta) const;
/**
Fill in the alpha matrix. This matrix has the read run along the rows, and
@@ -707,7 +707,7 @@ Recursor<Derived>::Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const Mappe
}
template <typename Derived>
-size_t Recursor<Derived>::FillAlphaBeta(M& a, M& b) const throw(AlphaBetaMismatch)
+size_t Recursor<Derived>::FillAlphaBeta(M& a, M& b) const
{
if (tpl_->Length() == 0) throw std::runtime_error("template length is 0, invalid state!");
diff --git a/src/Template.cpp b/src/Template.cpp
index a457b0f..b316c31 100644
--- a/src/Template.cpp
+++ b/src/Template.cpp
@@ -38,6 +38,7 @@
#include <sstream>
#include <stdexcept>
+#include <pacbio/consensus/Exceptions.h>
#include <pacbio/consensus/Template.h>
namespace PacBio {
@@ -48,6 +49,11 @@ AbstractTemplate::AbstractTemplate(const size_t start, const size_t end, const b
: start_{start}, end_{end}, pinStart_{pinStart}, pinEnd_{pinEnd}
{
assert(start_ <= end_);
+
+ // Templates that are below two bases are not allowed.
+ // This exception will get propagted up the chain up to the Integrator
+ // constructor or Integrator::AddRead.
+ if (end_ - start_ < 2) throw TemplateTooSmall();
}
AbstractTemplate::~AbstractTemplate() {}
@@ -172,17 +178,17 @@ std::pair<double, double> AbstractTemplate::SiteNormalParameters(const size_t i)
// place (or really just move directly to using .Idx without bit shifting
const uint8_t prev = (i == 0) ? 0 : (*this)[i - 1].Idx; // default base : A
const uint8_t curr = params.Idx;
-
+
const double p_m = params.Match, l_m = std::log(p_m), l2_m = l_m * l_m;
const double p_d = params.Deletion, l_d = std::log(p_d), l2_d = l_d * l_d;
const double p_b = params.Branch, l_b = std::log(p_b), l2_b = l_b * l_b;
const double p_s = params.Stick, l_s = std::log(p_s), l2_s = l_s * l_s;
-
+
// First moment expectations (zero terms used for clarity)
- const double E_M = ExpectedLogLikelihoodForMatchEmission(prev, curr, false);
+ const double E_M = ExpectedLLForEmission(MoveType::MATCH, prev, curr, MomentType::FIRST);
const double E_D = 0.0;
- const double E_B = ExpectedLogLikelihoodForBranchEmission(prev, curr, false);
- const double E_S = ExpectedLogLikelihoodForStickEmission(prev, curr, false);
+ const double E_B = ExpectedLLForEmission(MoveType::BRANCH, prev, curr, MomentType::FIRST);
+ const double E_S = ExpectedLLForEmission(MoveType::STICK, prev, curr, MomentType::FIRST);
// Calculate first moment
const double E_MD = (l_m + E_M) * p_m / (p_m + p_d) + (l_d + E_D) * p_d / (p_m + p_d);
@@ -192,13 +198,13 @@ std::pair<double, double> AbstractTemplate::SiteNormalParameters(const size_t i)
// Calculate second momment
// Key expansion used repeatedly here: (A + B)^2 = A^2 + 2AB + B^2
- const double E2_M = ExpectedLogLikelihoodForMatchEmission(prev, curr, true);
- const double E2_S = ExpectedLogLikelihoodForStickEmission(prev, curr, true);
- const double E2_B = ExpectedLogLikelihoodForBranchEmission(prev, curr, true);
+ const double E2_M = ExpectedLLForEmission(MoveType::MATCH, prev, curr, MomentType::SECOND);
+ const double E2_S = ExpectedLLForEmission(MoveType::STICK, prev, curr, MomentType::SECOND);
+ const double E2_B = ExpectedLLForEmission(MoveType::BRANCH, prev, curr, MomentType::SECOND);
const double E2_MD =
(l2_m + 2 * l_m * E_M + E2_M) * p_m / (p_m + p_d) + l2_d * p_d / (p_m + p_d);
- const double E2_I =
- (l2_b + 2 * E_B * l_b + E2_B) * p_b / (p_b + p_s) + (l2_s + 2 * E_S * l_s + E2_S) * p_s / (p_b + p_s);
+ const double E2_I = (l2_b + 2 * E_B * l_b + E2_B) * p_b / (p_b + p_s) +
+ (l2_s + 2 * E_S * l_s + E2_S) * p_s / (p_b + p_s);
const double E2_BS = E2_I * (p_s + p_b) / (p_m + p_d);
const double moment2 = E2_BS + 2 * E_BS * E_MD + E2_MD;
const double var = moment2 - mean * mean;
@@ -280,6 +286,7 @@ boost::optional<Mutation> Template::Mutate(const Mutation& mut)
mutOff_ = mut.LengthDiff();
mutated_ = true;
+ // Either the length is 0 or an operation had been applied
assert(Length() == 0 ||
((*this)[Length() - 1].Match == 1.0 && (*this)[Length() - 1].Branch == 0.0 &&
(*this)[Length() - 1].Stick == 0.0 && (*this)[Length() - 1].Deletion == 0.0));
@@ -354,6 +361,8 @@ finish:
assert(!pinStart_ || start_ == 0);
+ if (Length() < 2) throw TemplateTooSmall();
+
return mutApplied;
}
diff --git a/src/matrix/SparseMatrix.cpp b/src/matrix/SparseMatrix.cpp
index 65e3d79..a9886f5 100644
--- a/src/matrix/SparseMatrix.cpp
+++ b/src/matrix/SparseMatrix.cpp
@@ -83,6 +83,13 @@ size_t SparseMatrix::UsedEntries() const
return filledEntries;
}
+float SparseMatrix::UsedEntriesRatio() const
+{
+ const float filled = static_cast<float>(UsedEntries());
+ const float size = static_cast<float>(Rows() * Columns());
+ return filled / size;
+}
+
size_t SparseMatrix::AllocatedEntries() const
{
size_t sum = 0;
diff --git a/src/matrix/SparseMatrix.h b/src/matrix/SparseMatrix.h
index 14a5b07..dc6329d 100644
--- a/src/matrix/SparseMatrix.h
+++ b/src/matrix/SparseMatrix.h
@@ -72,6 +72,7 @@ public: // Information about entries filled by column
std::pair<size_t, size_t> UsedRowRange(size_t j) const;
bool IsColumnEmpty(size_t j) const;
size_t UsedEntries() const;
+ float UsedEntriesRatio() const;
size_t AllocatedEntries() const; // an entry may be allocated but not used
public: // Accessors
@@ -139,13 +140,13 @@ inline void SparseMatrix::FinishEditingColumn(size_t j, size_t usedRowsBegin, si
inline std::pair<size_t, size_t> SparseMatrix::UsedRowRange(size_t j) const
{
- assert(0 <= j && j < usedRanges_.size());
+ assert(j < usedRanges_.size());
return usedRanges_[j];
}
inline bool SparseMatrix::IsColumnEmpty(size_t j) const
{
- assert(0 <= j && j < usedRanges_.size());
+ assert(j < usedRanges_.size());
size_t begin, end;
std::tie(begin, end) = usedRanges_[j];
return begin >= end;
diff --git a/src/matrix/SparseVector.h b/src/matrix/SparseVector.h
index 6b7442a..46aa18c 100644
--- a/src/matrix/SparseVector.h
+++ b/src/matrix/SparseVector.h
@@ -106,7 +106,7 @@ inline SparseVector::SparseVector(size_t logicalLength, size_t beginRow, size_t
, storage_(allocatedEndRow_ - allocatedBeginRow_, 0.0)
, nReallocs_(0)
{
- assert(beginRow >= 0 && beginRow <= endRow && endRow <= logicalLength);
+ assert(beginRow <= endRow && endRow <= logicalLength);
CheckInvariants();
}
@@ -124,7 +124,7 @@ inline void SparseVector::ResetForRange(size_t beginRow, size_t endRow)
{
// Allows reuse. Destructive.
CheckInvariants();
- assert(beginRow >= 0 && beginRow <= endRow && endRow <= logicalLength_);
+ assert(beginRow <= endRow && endRow <= logicalLength_);
size_t newAllocatedBegin = (beginRow > PADDING) ? beginRow - PADDING : 0;
size_t newAllocatedEnd = std::min(endRow + PADDING, logicalLength_);
if ((newAllocatedEnd - newAllocatedBegin) > (allocatedEndRow_ - allocatedBeginRow_)) {
@@ -150,8 +150,7 @@ inline void SparseVector::ExpandAllocated(size_t newAllocatedBegin, size_t newAl
{
// Expands allocated storage while preserving the contents.
CheckInvariants();
- assert(newAllocatedBegin >= 0 && newAllocatedBegin <= newAllocatedEnd &&
- newAllocatedEnd <= logicalLength_);
+ assert(newAllocatedBegin <= newAllocatedEnd && newAllocatedEnd <= logicalLength_);
assert(newAllocatedBegin <= allocatedBeginRow_ && newAllocatedEnd >= allocatedEndRow_);
// Resize the underlying storage.
storage_.resize(newAllocatedEnd - newAllocatedBegin);
@@ -175,7 +174,7 @@ inline void SparseVector::ExpandAllocated(size_t newAllocatedBegin, size_t newAl
inline bool SparseVector::IsAllocated(size_t i) const
{
- assert(i >= 0 && i < logicalLength_);
+ assert(i < logicalLength_);
return i >= allocatedBeginRow_ && i < allocatedEndRow_;
}
@@ -196,7 +195,7 @@ inline void SparseVector::Set(size_t i, double v)
using std::min;
CheckInvariants();
- assert(i >= 0 && i < logicalLength_);
+ assert(i < logicalLength_);
if (!IsAllocated(i)) {
size_t newBeginRow = min((i > PADDING) ? i - PADDING : 0, allocatedBeginRow_);
size_t newEndRow = min(max(i + PADDING, allocatedEndRow_), logicalLength_);
@@ -216,9 +215,8 @@ inline size_t SparseVector::AllocatedEntries() const
inline void SparseVector::CheckInvariants() const
{
- assert(logicalLength_ >= 0);
- assert(0 <= allocatedBeginRow_ && allocatedBeginRow_ < logicalLength_);
- assert(0 <= allocatedEndRow_ && allocatedEndRow_ <= logicalLength_);
+ assert(allocatedBeginRow_ < logicalLength_);
+ assert(allocatedEndRow_ <= logicalLength_);
assert(allocatedBeginRow_ <= allocatedEndRow_);
assert((allocatedEndRow_ - allocatedBeginRow_) <= storage_.size());
}
diff --git a/src/models/P6C4NoCovModel.cpp b/src/models/P6C4NoCovModel.cpp
index 9a8db3b..f48f700 100644
--- a/src/models/P6C4NoCovModel.cpp
+++ b/src/models/P6C4NoCovModel.cpp
@@ -61,9 +61,9 @@ public:
std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff) const;
std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
+ double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
+
private:
SNR snr_;
};
@@ -119,13 +119,13 @@ double P6C4NoCovParams[4][2][3][4] = {
{4.21031404956015, -0.347546363361823, 0.0293839179303896, -0.000893802212450644},
{2.33143889851302, -0.586068444099136, 0.040044954697795, -0.000957298861394191}}}};
-
// For P6-C4 we cap SNR at 20.0 (19.0 for C); as the training set only went that
// high; extrapolation beyond this cap goes haywire because of the higher-order
// terms in the regression model. See bug 31423.
P6C4NoCovModel::P6C4NoCovModel(const SNR& snr)
- : snr_(ClampSNR(snr, SNR(0,0,0,0), SNR(20,19,20,20)))
-{}
+ : snr_(ClampSNR(snr, SNR(0, 0, 0, 0), SNR(20, 19, 20, 20)))
+{
+}
std::vector<TemplatePosition> P6C4NoCovModel::Populate(const std::string& tpl) const
{
@@ -179,33 +179,30 @@ std::unique_ptr<AbstractRecursor> P6C4NoCovModel::CreateRecursor(
new P6C4NoCovRecursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
}
-
-double P6C4NoCovModel::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- double probMatch = 1.0 - kEps;
+double P6C4NoCovModel::ExpectedLLForEmission(const MoveType move, const uint8_t prev,
+ const uint8_t curr, const MomentType moment) const
+{
const double lgThird = -std::log(3.0);
- const double lgMatch = std::log(probMatch);
- const double probMismatch = kEps;
- const double lgMismatch = lgThird + std::log(probMismatch);
- if (!secondMoment) {
- return probMatch * lgMatch + probMismatch * lgMismatch;
- } else {
- return probMatch * pow(lgMatch, 2.0) + probMismatch * pow(lgMismatch, 2.0);
+ if (move == MoveType::MATCH) {
+ constexpr double probMatch = 1.0 - kEps;
+ constexpr double probMismatch = kEps;
+ const double lgMatch = std::log(probMatch);
+ const double lgMismatch = lgThird + std::log(probMismatch);
+ if (moment == MomentType::FIRST)
+ return probMatch * lgMatch + probMismatch * lgMismatch;
+ else if (moment == MomentType::SECOND)
+ return probMatch * (lgMatch * lgMatch) + probMismatch * (lgMismatch * lgMismatch);
+ } else if (move == MoveType::BRANCH)
+ return 0.0;
+ else if (move == MoveType::STICK) {
+ if (moment == MomentType::FIRST)
+ return lgThird;
+ else if (moment == MomentType::SECOND)
+ return lgThird * lgThird;
}
+ throw std::invalid_argument("invalid move!");
}
-double P6C4NoCovModel::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- return 0.0;
-}
-
-double P6C4NoCovModel::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
- const double lgThird = -std::log(3.0);
- if(!secondMoment) {
- return lgThird;
- } else {
- return pow(lgThird, 2.0);
- }
-}
-
P6C4NoCovRecursor::P6C4NoCovRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
double scoreDiff)
: Recursor<P6C4NoCovRecursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
diff --git a/src/models/S_P1C1Beta_Model.cpp b/src/models/S_P1C1Beta_Model.cpp
index 6327a56..e1cba85 100644
--- a/src/models/S_P1C1Beta_Model.cpp
+++ b/src/models/S_P1C1Beta_Model.cpp
@@ -58,12 +58,8 @@ public:
std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff) const;
std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
+ double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
private:
SNR snr_;
@@ -165,44 +161,23 @@ std::unique_ptr<AbstractRecursor> S_P1C1Beta_Model::CreateRecursor(
std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
}
-inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
- const uint8_t curr,
- const bool secondMoment)
+double S_P1C1Beta_Model::ExpectedLLForEmission(const MoveType move, const uint8_t prev,
+ const uint8_t curr, const MomentType moment) const
{
const uint8_t hpAdd = prev == curr ? 0 : 4;
const uint8_t row = curr + hpAdd;
double expectedLL = 0;
for (size_t i = 0; i < 4; i++) {
- double curProb = emissionPmf[index][row][i];
+ double curProb = emissionPmf[static_cast<uint8_t>(move)][row][i];
double lgCurProb = std::log(curProb);
- if (!secondMoment) {
+ if (moment == MomentType::FIRST)
expectedLL += curProb * lgCurProb;
- } else {
- expectedLL += curProb * pow(lgCurProb, 2.0);
- }
+ else if (moment == MomentType::SECOND)
+ expectedLL += curProb * (lgCurProb * lgCurProb);
}
return expectedLL;
}
-double S_P1C1Beta_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev,
- curr, secondMoment);
-}
-double S_P1C1Beta_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev,
- curr, secondMoment);
-}
-double S_P1C1Beta_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev,
- curr, secondMoment);
-}
-
S_P1C1Beta_Recursor::S_P1C1Beta_Recursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff)
: Recursor<S_P1C1Beta_Recursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
diff --git a/src/models/S_P1C1v1_Model.cpp b/src/models/S_P1C1v1_Model.cpp
index 8717e90..e12cfd4 100644
--- a/src/models/S_P1C1v1_Model.cpp
+++ b/src/models/S_P1C1v1_Model.cpp
@@ -62,19 +62,13 @@ public:
std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff) const;
std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
+ double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
private:
SNR snr_;
double ctxTrans_[CONTEXT_NUMBER][4];
double cachedEmissionExpectations_[CONTEXT_NUMBER][3][2];
- double ExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
- const uint8_t curr, const bool secondMoment) const;
};
REGISTER_MODEL_IMPL(S_P1C1v1_Model);
@@ -259,31 +253,21 @@ constexpr double transProbs[16][3][4] = {
{1.66643125707819, -2.11840547506457, 0.345196264109573, -0.0190747089705634},
{-1.9903454046927, -0.489297962270134, 0.0503913801733079, -0.00174698908180932}}};
-inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t row,
- const bool secondMoment)
+inline double CalculateExpectedLLForEmission(const size_t move, const uint8_t row,
+ const size_t moment)
{
double expectedLL = 0;
- for (size_t i = 0; i < OUTCOME_NUMBER; i++) {
- double curProb = emissionPmf[index][row][i];
- double lgCurProb = std::log(curProb);
- if (!secondMoment) {
+ for (size_t i = 0; i < OUTCOME_NUMBER; ++i) {
+ const double curProb = emissionPmf[move][row][i];
+ const double lgCurProb = std::log(curProb);
+ if (moment == static_cast<uint8_t>(MomentType::FIRST))
expectedLL += curProb * lgCurProb;
- } else {
- expectedLL += curProb * pow(lgCurProb, 2.0);
- }
+ else if (moment == static_cast<uint8_t>(MomentType::SECOND))
+ expectedLL += curProb * (lgCurProb * lgCurProb);
}
return expectedLL;
}
-double S_P1C1v1_Model::ExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
- const uint8_t curr,
- const bool secondMoment) const
-{
- const auto row = (prev << 2) | curr;
- const auto moment = secondMoment ? 1 : 0;
- return cachedEmissionExpectations_[row][index][moment];
-}
-
S_P1C1v1_Model::S_P1C1v1_Model(const SNR& snr) : snr_(snr)
{
// Generate cached transistion probabilities
@@ -314,14 +298,11 @@ S_P1C1v1_Model::S_P1C1v1_Model(const SNR& snr) : snr_(snr)
// Generate cached emission expectations
// TODO: These are identical for all instances, either we should enrich the model or avoid doing
// this in a context dependent way
- for (int ctx = 0; ctx < CONTEXT_NUMBER; ctx++) {
- for (int index = 0; index < 3; index++) {
- cachedEmissionExpectations_[ctx][index][0] =
- CalculateExpectedLogLikelihoodOfOutcomeRow(index, ctx, false);
- cachedEmissionExpectations_[ctx][index][1] =
- CalculateExpectedLogLikelihoodOfOutcomeRow(index, ctx, true);
- }
- }
+ for (size_t ctx = 0; ctx < CONTEXT_NUMBER; ++ctx)
+ for (size_t move = 0; move < 3; ++move)
+ for (size_t moment = 0; moment < 2; ++moment)
+ cachedEmissionExpectations_[ctx][move][moment] =
+ CalculateExpectedLLForEmission(move, ctx, moment);
}
std::unique_ptr<AbstractRecursor> S_P1C1v1_Model::CreateRecursor(
@@ -362,23 +343,12 @@ std::vector<TemplatePosition> S_P1C1v1_Model::Populate(const std::string& tpl) c
return result;
}
-double S_P1C1v1_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev, curr,
- secondMoment);
-}
-double S_P1C1v1_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev, curr,
- secondMoment);
-}
-double S_P1C1v1_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
+double S_P1C1v1_Model::ExpectedLLForEmission(const MoveType move, const uint8_t prev,
+ const uint8_t curr, const MomentType moment) const
{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev, curr,
- secondMoment);
+ const size_t row = (prev << 2) | curr;
+ return cachedEmissionExpectations_[row][static_cast<uint8_t>(move)]
+ [static_cast<uint8_t>(moment)];
}
S_P1C1v1_Recursor::S_P1C1v1_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
diff --git a/src/models/S_P1C1v2_Model.cpp b/src/models/S_P1C1v2_Model.cpp
index 61e16ee..9b59abf 100644
--- a/src/models/S_P1C1v2_Model.cpp
+++ b/src/models/S_P1C1v2_Model.cpp
@@ -70,19 +70,13 @@ public:
std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff) const;
std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
- double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const;
+ double ExpectedLLForEmission(MoveType move, uint8_t prev, uint8_t curr,
+ MomentType moment) const;
private:
SNR snr_;
double ctxTrans_[nContexts][4];
double cachedEmissionExpectations_[nContexts][3][2];
- double ExpectedLogLikelihoodOfOutcomeRow(const size_t move, const uint8_t prev,
- const uint8_t curr, const bool secondMoment) const;
};
REGISTER_MODEL_IMPL(S_P1C1v2_Model);
@@ -98,202 +92,193 @@ public:
virtual double UndoCounterWeights(size_t nEmissions) const;
};
-constexpr double snrRanges[4][2] = {{4.001438, 9.300400}, // A
- {7.132999, 18.840239}, // C
- {4.017619, 9.839173}, // G
- {5.553696, 15.040482}}; // T
+constexpr double snrRanges[4][2] = {{4.000641, 9.799212}, // A
+ {5.654043, 17.958242}, // C
+ {4.000119, 9.744069}, // G
+ {5.751278, 14.595276}}; // T
constexpr double emissionPmf[3][nContexts][nOutcomes] = {
{// matchPmf
- {0.0538934269, 0.00145009847, 0.000903540653, 5.61451444e-05, 0.0717385523, 0.00138207722,
- 0.000983663388, 2.98716179e-05, 0.863881703, 0.00436234248, 0.00108967626, 0.000162001338},
- {0.00440213807, 0.0125860226, 7.60499349e-05, 2.11752707e-05, 0.0153821856, 0.0427745313,
- 0.000264941701, 0.000280821504, 0.0178753594, 0.905210325, 0.000105142401, 0.000939700363},
- {0.000540246421, 0.000218767963, 0.01836689, 0.00340121939, 0.000730234523, 3.4737868e-05,
- 0.0772714471, 0.00471209957, 0.000843144059, 0.000588970797, 0.870275654, 0.0229348618},
- {0.000111108679, 0.000203758415, 0.00804596969, 0.019115043, 0.000549249541, 9.80612339e-05,
- 0.0344882713, 0.051657787, 0.000221063495, 0.000465020805, 0.0610234598, 0.823952168},
- {0.0202789584, 0.00148903221, 0.000597163411, 0.000259954647, 0.064020036, 0.00104431313,
- 0.00124614496, 0.00013358629, 0.905761027, 0.0029572048, 0.00200379219, 0.000143327084},
- {0.0402724632, 0.0128518475, 6.42501609e-05, 2.72583767e-05, 0.0132786761, 0.0373069841,
- 5.29716119e-05, 9.14398755e-05, 0.0162147808, 0.879564809, 0.000141573402, 5.9945642e-05},
- {0.000201881601, 4.29369365e-05, 0.00728725582, 0.00149461331, 0.000409273215, 0.000167745802,
- 0.0346508126, 0.00200385599, 0.000794645188, 0.000287314863, 0.925552349, 0.0270401396},
- {0.000133065007, 7.1321816e-05, 0.00958536688, 0.0187994622, 0.00028633053, 9.12768574e-05,
- 0.033314986, 0.0553368214, 0.000384136098, 0.000731762669, 0.0598679875, 0.821318207},
- {0.0106324794, 0.00067658496, 0.000409726657, 8.44568278e-05, 0.0352580824, 0.000480117964,
- 0.000394468725, 3.89470939e-05, 0.946170299, 0.00473155911, 0.000572506557, 0.000477970619},
- {0.00197964334, 0.0054988032, 0.000105200025, 1.84904847e-05, 0.00740934367, 0.0233884094,
- 0.000111958009, 5.9308563e-05, 0.0117473508, 0.949256624, 0.000233843345, 0.000134414698},
- {0.03469053, 2.12215021e-05, 0.00596709145, 0.000976029849, 0.000399217726, 0.000137996197,
- 0.0300521512, 0.00181745646, 0.000422486611, 0.000438039184, 0.901604554, 0.0233980917},
- {0.000413418726, 3.87097781e-05, 0.0051991451, 0.00888802651, 7.01912149e-05, 1.93669472e-05,
- 0.0190797047, 0.0304401645, 0.000143986073, 0.00102353111, 0.0393876177, 0.895213831},
- {0.0206986587, 0.00139920374, 0.000577718694, 6.30728439e-05, 0.0638528341, 0.000787266592,
- 0.000963775424, 7.07162141e-05, 0.904225376, 0.00531211609, 0.00172552057, 0.000220755728},
- {0.002398519, 0.00597008512, 6.42789108e-05, 4.3328933e-05, 0.00712968535, 0.0222720582,
- 0.000281381205, 2.45760734e-05, 0.0109258848, 0.950205967, 9.46913182e-05, 0.000517544635},
- {0.000304324236, 2.30017401e-05, 0.00799304005, 0.00141347855, 0.000168190169, 1.64084167e-05,
- 0.0379674798, 0.0021998025, 0.000906751406, 0.000542188155, 0.92718794, 0.0212130746},
- {0.0280311711, 0.00051862335, 0.00593624703, 0.0083684267, 0.00324521337, 0.000994877985,
- 0.0173524216, 0.0263269812, 0.0279441, 0.0276693067, 0.0653178223, 0.788237005}},
+ {0.050560033, 0.00144763859, 0.000163177865, 2.0867935e-05, 0.0726032734, 0.000842439402,
+ 0.000271423794, 8.43974963e-06, 0.869290866, 0.00413508802, 0.000521750699, 0.000118837204},
+ {0.00462830888, 0.0139517191, 0.000101186697, 5.67650713e-05, 0.0160684007, 0.0406243136,
+ 0.000138793033, 5.62552642e-05, 0.0190043135, 0.904879566, 0.000174480994, 0.000295282359},
+ {0.000175052112, 2.26892306e-05, 0.0196019025, 0.00400244294, 0.000509824298, 9.75492549e-06,
+ 0.0802293256, 0.00408560015, 0.000147604012, 0.000331175767, 0.872713612, 0.0181486146},
+ {8.73952452e-05, 6.17770545e-05, 0.00903033597, 0.018502656, 0.000200885068, 9.75016622e-05,
+ 0.0351722645, 0.0517276852, 0.000352994508, 0.000385742081, 0.0644170155, 0.819947012},
+ {0.0198831161, 0.000961303214, 0.000144597389, 4.67305367e-05, 0.0638168971, 0.000837850222,
+ 0.000558459661, 3.99576048e-05, 0.910672574, 0.00217152562, 0.000648278718, 0.000202152294},
+ {0.0410273877, 0.0126381834, 5.80888114e-05, 4.64404749e-05, 0.0138647592, 0.0380978485,
+ 2.49507103e-05, 4.31344748e-05, 0.0155899299, 0.878541588, 1.74872929e-05, 3.0599793e-05},
+ {8.32672171e-05, 2.20656741e-05, 0.00834519645, 0.00156910057, 0.000172765793, 2.1418975e-05,
+ 0.0356742714, 0.00212721578, 0.000622484386, 0.000230268885, 0.933599375, 0.0175160384},
+ {3.86341223e-05, 6.93599568e-05, 0.0106638632, 0.0194476326, 2.80847249e-05, 6.80203472e-05,
+ 0.0350317359, 0.0561405145, 0.000248220527, 0.000735245812, 0.064117019, 0.813389881},
+ {0.01055129, 0.000606336462, 0.000106046244, 6.17777656e-05, 0.0340550092, 0.000538619497,
+ 0.000266054642, 9.96327513e-06, 0.948832735, 0.00433935042, 0.000343332891, 0.000270289762},
+ {0.00199646998, 0.0070224845, 6.10929446e-05, 5.77297092e-05, 0.0076668946, 0.0217966053,
+ 9.74216351e-05, 1.5916467e-05, 0.011502371, 0.949283502, 0.000259732895, 0.000225420187},
+ {0.0363913971, 5.18879484e-05, 0.00586382213, 0.00119694109, 7.89913825e-05, 1.4820869e-05,
+ 0.0288807762, 0.00180293314, 0.000312950446, 0.000119941063, 0.909704767, 0.0155609746},
+ {1.64569235e-05, 4.43086063e-05, 0.00453413579, 0.0102359104, 5.18293729e-05, 6.02931756e-05,
+ 0.0180114607, 0.0306943017, 0.000153656166, 0.000607235317, 0.0413053796, 0.894264433},
+ {0.0214236803, 0.00145057122, 0.000196790896, 7.29502814e-05, 0.0617915495, 0.000820733636,
+ 0.00047074373, 5.43281455e-05, 0.908393448, 0.0044080379, 0.000772738613, 0.000118585931},
+ {0.00254794588, 0.00714226772, 5.36718881e-05, 5.4246971e-05, 0.00790636565, 0.0227387232,
+ 3.97346769e-05, 4.53241014e-05, 0.0116566568, 0.947447228, 0.000175142411, 0.000173810276},
+ {8.63087551e-05, 2.68932315e-05, 0.00961793262, 0.00169384557, 7.28762914e-05, 7.4200117e-06,
+ 0.037210596, 0.00195177916, 0.000458586001, 0.000265371391, 0.933488709, 0.0151035143},
+ {0.0297329186, 0.000570699243, 0.00648572858, 0.00907544748, 0.00299260939, 0.000974535227,
+ 0.0194331272, 0.0272735285, 0.0264500987, 0.0267971751, 0.0666750407, 0.783525197}},
{// branchPmf
- {0.305613932, 0.000558996368, 0.000558996368, 0.000558996368, 0.144283703, 0.000558996368,
- 0.000558996368, 0.000558996368, 0.542276416, 0.000558996368, 0.000558996368, 0.000558996368},
- {0.000912790048, 0.0706793677, 0.000912790048, 0.000912790048, 0.000912790048, 0.0660077837,
- 0.000912790048, 0.000912790048, 0.000912790048, 0.850533788, 0.000912790048, 0.000912790048},
- {0.000370148971, 0.000370148971, 0.263701996, 0.000370148971, 0.000370148971, 0.000370148971,
- 0.136580197, 0.000370148971, 0.000370148971, 0.000370148971, 0.594535721, 0.000370148971},
- {0.000992671062, 0.000992671062, 0.000992671062, 0.0903402269, 0.000992671062, 0.000992671062,
- 0.000992671062, 0.116624552, 0.000992671062, 0.000992671062, 0.000992671062, 0.779137827},
- {0.153053079, 0.000140879113, 0.000140879113, 0.000140879113, 0.129535583, 0.000140879113,
- 0.000140879113, 0.000140879113, 0.71543903, 0.000140879113, 0.000140879113, 0.000140879113},
- {0.000273593053, 0.0390475313, 0.000273026027, 0.000273026027, 0.0002731413, 0.0609617098,
- 0.000273026027, 0.000273026027, 0.00027418863, 0.89616655, 0.000273026027, 0.000273026027},
- {0.00024935686, 0.00024935686, 0.224134214, 0.00024935686, 0.00024935686, 0.00024935686,
- 0.119989707, 0.00024935686, 0.00024935686, 0.00024935686, 0.652385082, 0.00024935686},
- {0.000703357057, 0.000703357057, 0.000703357057, 0.0774320717, 0.000703357057, 0.000703357057,
- 0.000703357057, 0.0881611725, 0.000703357057, 0.000703357057, 0.000703357057, 0.824559757},
- {0.120522857, 0.000164103067, 0.000164103067, 0.000164103067, 0.0827988704, 0.000164103067,
- 0.000164103067, 0.000164103067, 0.79438083, 0.000164103067, 0.000164103067, 0.000164103067},
- {0.000324650526, 0.0436389616, 0.000324650526, 0.000324650526, 0.000324650526, 0.0610063298,
- 0.000324650526, 0.000324650526, 0.000324650526, 0.890809601, 0.000324650526, 0.000324650526},
- {0.00044702111, 0.000447020831, 0.298059864, 0.000447020831, 0.000447537949, 0.000447020831,
- 0.0757672197, 0.000447020831, 0.000447039189, 0.000447020831, 0.619914089, 0.000447020831},
- {0.000460428661, 0.000460428661, 0.000460428661, 0.0445651204, 0.000460428661, 0.000460428661,
- 0.000460428661, 0.0472083803, 0.000460428661, 0.000460428661, 0.000460428661, 0.901780498},
- {0.129409727, 0.000180205845, 0.000180205845, 0.000180205845, 0.110681588, 0.000180205845,
- 0.000180205845, 0.000180205845, 0.757385803, 0.000180205845, 0.000180205845, 0.000180205845},
- {0.000237587758, 0.0323776588, 0.000237587758, 0.000237587758, 0.000237587758, 0.0435983253,
- 0.000237587758, 0.000237587758, 0.000237587758, 0.920697787, 0.000237587758, 0.000237587758},
- {0.000188825576, 0.000188825576, 0.166170812, 0.000188825576, 0.000188825576, 0.000188825576,
- 0.0914469645, 0.000188825576, 0.000188825576, 0.000188825576, 0.739738666, 0.000188825576},
- {0.000120613319, 0.000119901068, 0.000119901068, 0.0411298523, 0.00011990536, 0.000119901068,
- 0.000119901068, 0.0701345987, 0.000119982783, 0.000119901068, 0.000119901068, 0.887056136}},
+ {0.271862342, 0.000108958984, 0.000108958984, 0.000108958984, 0.160747222, 0.000108958984,
+ 0.000108958984, 0.000108958984, 0.56586501, 0.000108958984, 0.000108958984, 0.000108958984},
+ {0.000253399313, 0.0805474032, 0.000253399313, 0.000253399313, 0.000253399313, 0.11493698,
+ 0.000253399313, 0.000253399313, 0.000253399313, 0.800968026, 0.000253399313, 0.000253399313},
+ {9.96116186e-05, 9.96116186e-05, 0.295921139, 9.96116186e-05, 9.96116186e-05, 9.96116186e-05,
+ 0.156000597, 9.96116186e-05, 9.96116186e-05, 9.96116186e-05, 0.546683702, 9.96116186e-05},
+ {0.000266389696, 0.000266389696, 0.000266389696, 0.106949539, 0.000266389696, 0.000266389696,
+ 0.000266389696, 0.117892594, 0.000266389696, 0.000266389696, 0.000266389696, 0.771428411},
+ {0.158494227, 3.22815102e-05, 3.22815102e-05, 3.22815102e-05, 0.135246183, 3.22815102e-05,
+ 3.22815102e-05, 3.22815102e-05, 0.705807649, 3.22815102e-05, 3.22815102e-05, 3.22815102e-05},
+ {5.63990004e-05, 0.0396234254, 5.4605313e-05, 5.4605313e-05, 5.47353715e-05, 0.0555789327,
+ 5.4605313e-05, 5.4605313e-05, 5.64103869e-05, 0.904029439, 5.4605313e-05, 5.4605313e-05},
+ {4.80816837e-05, 4.80816837e-05, 0.214977302, 4.80816837e-05, 4.80816837e-05, 4.80816837e-05,
+ 0.123027519, 4.80816837e-05, 4.80816837e-05, 4.80816837e-05, 0.661322035, 4.80816837e-05},
+ {0.00019927387, 0.00019927387, 0.00019927387, 0.101763692, 0.00019927387, 0.00019927387,
+ 0.00019927387, 0.111141255, 0.00019927387, 0.00019927387, 0.00019927387, 0.784305219},
+ {0.126620811, 3.77512454e-05, 3.77512454e-05, 3.77512454e-05, 0.0949253706, 3.77512454e-05,
+ 3.77512454e-05, 3.77512454e-05, 0.777925301, 3.77512454e-05, 3.77512454e-05, 3.77512454e-05},
+ {8.34845269e-05, 0.0522147481, 8.34845269e-05, 8.34845269e-05, 8.34845269e-05, 0.0565042132,
+ 8.34845269e-05, 8.34845269e-05, 8.34845269e-05, 0.890112255, 8.34845269e-05, 8.34845269e-05},
+ {0.000110815138, 0.000110427066, 0.329742326, 0.000110427066, 0.000113945866, 0.000110427066,
+ 0.140289483, 0.000110427066, 0.000117423649, 0.000110427066, 0.528411309, 0.000110427066},
+ {0.000131337002, 0.000131337002, 0.000131337002, 0.0390710366, 0.000131337002, 0.000131337002,
+ 0.000131337002, 0.0617348823, 0.000131337002, 0.000131337002, 0.000131337002, 0.897355363},
+ {0.121151783, 3.91167535e-05, 3.91167535e-05, 3.91167535e-05, 0.121341222, 3.91167535e-05,
+ 3.91167535e-05, 3.91167535e-05, 0.75695936, 3.91167535e-05, 3.91167535e-05, 3.91167535e-05},
+ {6.4451422e-05, 0.030096713, 6.4451422e-05, 6.4451422e-05, 6.4451422e-05, 0.0520326862,
+ 6.4451422e-05, 6.4451422e-05, 6.4451422e-05, 0.916968281, 6.4451422e-05, 6.4451422e-05},
+ {4.34265926e-05, 4.34265926e-05, 0.172479716, 4.34265926e-05, 4.34265926e-05, 4.34265926e-05,
+ 0.113044043, 4.34265926e-05, 4.34265926e-05, 4.34265926e-05, 0.713868268, 4.34265926e-05},
+ {3.16836313e-05, 2.73694388e-05, 2.73694388e-05, 0.04721317, 2.77977828e-05, 2.73694388e-05,
+ 2.73694388e-05, 0.078916509, 3.46399946e-05, 2.73694388e-05, 2.73694388e-05, 0.873475136}},
{// stickPmf
- {0.000572145812, 0.0249470981, 0.444128106, 0.0284293982, 0.000572145812, 0.00410024964,
- 0.126982064, 0.0141199516, 0.000572145812, 0.0811671877, 0.200116148, 0.0714326301},
- {0.157990255, 0.00032657149, 0.325814692, 0.0288899281, 0.0375283566, 0.00032657149,
- 0.132842697, 0.0224909505, 0.0879452518, 0.00032657149, 0.115500492, 0.0883848042},
- {0.358522418, 0.0235568496, 0.000864084339, 0.12651633, 0.052549674, 0.00798327989,
- 0.000864084339, 0.0533075108, 0.101824542, 0.0657216859, 0.000864084339, 0.203105036},
- {0.327657509, 0.0162688106, 0.283895663, 0.000488837542, 0.0404076842, 0.00606060934,
- 0.121371198, 0.000488837542, 0.073572133, 0.0352442565, 0.0916114349, 0.000488837542},
- {0.000292823503, 0.020723219, 0.422716082, 0.037557393, 0.000292823503, 0.00878801277,
- 0.124731093, 0.00806155344, 0.000292823503, 0.225808232, 0.102362669, 0.0469091586},
- {0.138184081, 0.000176275107, 0.353691494, 0.0366734007, 0.0669928396, 0.000160372684,
- 0.152689688, 0.0113283557, 0.0933947819, 0.000164997607, 0.112044223, 0.0337002216},
- {0.376363111, 0.0147959899, 0.000406742121, 0.0326187747, 0.124202612, 0.00485744034,
- 0.000406742121, 0.0134983573, 0.154095152, 0.172573194, 0.000406742121, 0.103741432},
- {0.262107723, 0.0139625679, 0.198087107, 0.000317282509, 0.0764115908, 0.00637190626,
- 0.0728789745, 0.000317282509, 0.104982564, 0.168668474, 0.0939908336, 0.000317282509},
- {0.000568426528, 0.023701166, 0.506335229, 0.0366977326, 0.000568426528, 0.0181752864,
- 0.100905875, 0.0188386785, 0.000568426528, 0.103148426, 0.104347954, 0.0833022395},
- {0.122032869, 0.000140837724, 0.364441855, 0.0318879599, 0.0573868815, 0.000140837724,
- 0.218659155, 0.0182715104, 0.0477966054, 0.000140837724, 0.0787912025, 0.0596052591},
- {0.396405225, 0.016157559, 0.0022109107, 0.0436250361, 0.107726515, 0.0146690405,
- 0.00100899286, 0.00886596365, 0.127007802, 0.0802697981, 0.000874635434, 0.196943448},
- {0.290414877, 0.0177202936, 0.298960183, 0.000526686986, 0.092257921, 0.00700004966,
- 0.131147972, 0.000526686986, 0.0859777319, 0.0271600613, 0.0451474144, 0.000526686986},
- {0.000428202504, 0.0191583409, 0.293741471, 0.0182063946, 0.000428202504, 0.0180388612,
- 0.128372288, 0.0257390091, 0.000428202504, 0.107512898, 0.199359888, 0.186445229},
- {0.149686594, 0.000207150428, 0.229916753, 0.0144187336, 0.0651607498, 0.000207150428,
- 0.0690123099, 0.0156771722, 0.0675998749, 0.000207150428, 0.109695096, 0.277175513},
- {0.403196746, 0.0177901114, 0.000543576149, 0.0343915039, 0.122395417, 0.00275829252,
- 0.000543576149, 0.0269263943, 0.0864036291, 0.043786384, 0.000543576149, 0.258002912},
- {0.142268585, 0.0113274953, 0.228643486, 0.000236113134, 0.0585796342, 0.00333756943,
- 0.175460415, 0.000247031094, 0.0538439511, 0.0417853213, 0.282875612, 0.00024225267}}};
+ {0.000103010778, 0.0380995742, 0.426353247, 0.0339918103, 0.000103010778, 0.0211011476,
+ 0.126299789, 0.0147406207, 0.000103010778, 0.111827288, 0.148961708, 0.0778007297},
+ {0.142966063, 6.82261622e-05, 0.318828309, 0.0291750067, 0.0612651332, 6.82261622e-05,
+ 0.132226999, 0.0222891262, 0.108765546, 6.82261622e-05, 0.107092044, 0.0768459636},
+ {0.435501353, 0.0281061159, 0.00022795498, 0.112842372, 0.0386560686, 0.0153571105,
+ 0.00022795498, 0.0531675523, 0.0885281789, 0.0680543734, 0.00022795498, 0.157963236},
+ {0.277671243, 0.0199532671, 0.279545911, 9.68193392e-05, 0.0672566924, 0.0113474679,
+ 0.103986414, 9.68193392e-05, 0.0874665425, 0.0477403036, 0.104257603, 9.68193392e-05},
+ {6.26766712e-05, 0.0248510469, 0.399492811, 0.0373275328, 6.26766712e-05, 0.00916381627,
+ 0.119657428, 0.0131950955, 6.26766712e-05, 0.212819044, 0.124778109, 0.0582137031},
+ {0.125185057, 3.47389275e-05, 0.306433733, 0.0308523177, 0.0774246347, 3.52102425e-05,
+ 0.149641423, 0.0134852162, 0.129457219, 3.45901197e-05, 0.128579834, 0.0386692545},
+ {0.314712978, 0.0245469889, 7.44182399e-05, 0.0329602878, 0.124756147, 0.0144931638,
+ 7.44182399e-05, 0.0161076306, 0.157254073, 0.222533644, 7.44182399e-05, 0.0920397407},
+ {0.234807777, 0.0143254662, 0.196699446, 7.60379939e-05, 0.0813954759, 0.0105264534,
+ 0.0840430032, 7.60379939e-05, 0.112867722, 0.174110565, 0.0906157878, 7.60379939e-05},
+ {0.000114280663, 0.0243444281, 0.457199123, 0.0381081135, 0.000114280663, 0.0162510324,
+ 0.124731361, 0.0167679501, 0.000114280663, 0.107039005, 0.107835264, 0.106809478},
+ {0.118927574, 3.10562894e-05, 0.343802957, 0.0333522609, 0.0496397906, 3.10562894e-05,
+ 0.195022097, 0.0163660717, 0.0539508263, 3.10562894e-05, 0.125648942, 0.0630410307},
+ {0.335673887, 0.0267972214, 0.00093244908, 0.0455030532, 0.129837105, 0.0126090082,
+ 0.000180646254, 0.0192047603, 0.142185037, 0.0744691665, 0.000164140614, 0.211647424},
+ {0.299454906, 0.0203545133, 0.270281208, 0.000115353453, 0.10064444, 0.00705182304,
+ 0.114896506, 0.000115353453, 0.100554091, 0.0384270526, 0.0474126337, 0.000115353453},
+ {7.92484966e-05, 0.0190180551, 0.262244947, 0.019509475, 7.92484966e-05, 0.0113606249,
+ 0.0949860067, 0.0183237604, 7.92484966e-05, 0.106081588, 0.21160129, 0.256240265},
+ {0.125916528, 4.15946843e-05, 0.209865333, 0.0203882413, 0.0577305702, 4.15946843e-05,
+ 0.0904461035, 0.018649162, 0.0587107584, 4.15946843e-05, 0.129821536, 0.28813901},
+ {0.33820122, 0.0210315415, 0.000104202635, 0.0409843175, 0.118326009, 0.0105217011,
+ 0.000104202635, 0.0250824494, 0.10336307, 0.0371536554, 0.000104202635, 0.304502416},
+ {0.148349765, 0.0127413602, 0.22312407, 4.96955324e-05, 0.0595806675, 0.00463335009,
+ 0.177668167, 4.83714586e-05, 0.0574131588, 0.0412555016, 0.274858141, 4.85075474e-05}}};
constexpr double transProbs[nContexts][3][4] = {
{// AA
- {-10.2090406, 3.32117257, -0.520117212, 0.0249921413},
- {8.9793309, -5.3961519, 0.763613361, -0.0364406433},
- {7.97224482, -3.91644241, 0.485967707, -0.0210642023}},
+ {5.85956362, -4.41463293, 0.683309462, -0.0346718671},
+ {7.06499623, -4.40259797, 0.607137416, -0.027878395},
+ {1.73648922, -0.935476496, 0.0264529997, 0.00196800649}},
{// AC
- {-8.67385002, 1.79692678, -0.189152589, 0.00593875207},
- {-2.06471994, -0.248833169, 0.0239433129, -0.00081901914},
- {1.9844226, -0.834246008, 0.0238906298, 0.000299399008}},
+ {2.68872981, -1.33398847, 0.0860003242, -0.00189734598},
+ {3.55997286, -1.60728992, 0.130435583, -0.00340039547},
+ {4.95711278, -1.98021021, 0.144128237, -0.00360784403}},
{// AG
- {-5.57888183, 0.8609919, -0.0601847875, -0.00209638118},
- {-0.0651352822, -0.37813552, -0.116036072, 0.0118044463},
- {10.3924646, -4.87649024, 0.583074371, -0.0230668231}},
+ {-3.23979278, 0.356138788, -0.0870220245, 0.00546284744},
+ {2.5291947, -2.13705602, 0.206929395, -0.00511956903},
+ {2.20068033, -0.996606761, -0.00278577326, 0.00492497905}},
{// AT
- {-6.05918189, 1.12045586, -0.175515022, 0.0081588429},
- {-7.72954717, 1.74329164, -0.228908375, 0.00956968799},
- {0.739498015, -0.820985799, 0.034285628, 0.000308222633}},
+ {-1.15847683, -0.771986547, 0.0645814917, -0.0018601879},
+ {-2.78585757, 0.178618309, -0.0499252647, 0.00257301448},
+ {3.08433285, -1.67474855, 0.14589205, -0.00469624396}},
{// CA
- {6.71916863, -4.10669176, 0.61911044, -0.0312872449},
- {-10.7393976, 3.94632964, -0.664301449, 0.0363137581},
- {0.534509761, -0.614208207, -0.0230577035, 0.00504754227}},
+ {-0.451942513, -0.575360527, 0.0401324889, 0.000694689568},
+ {0.58830656, -1.53028855, 0.204160977, -0.00810273241},
+ {2.35067779, -1.31686514, 0.0730142817, -1.00923273e-05}},
{// CC
- {-11.8487027, 2.18336459, -0.165744338, 0.00381936993},
- {-5.40511615, 0.582166777, -0.0352022429, 0.000649948647},
- {3.62707756, -1.60860162, 0.127123969, -0.00333194156}},
+ {-7.2469072, 1.26130953, -0.111198017, 0.0031739936},
+ {-5.43347752, 0.980596171, -0.0930509844, 0.00284106343},
+ {-4.62125702, 0.536176189, -0.0532120198, 0.00156457856}},
{// CG
- {-5.33620523, 1.30599458, -0.227619416, 0.0129127127},
- {-8.16537748, 2.82193426, -0.53094601, 0.0319445398},
- {13.0580439, -6.8013044, 0.926568827, -0.0424609205}},
+ {-1.85080654, -0.271149887, 0.0233007441, -0.000158657488},
+ {3.74845392, -2.86890561, 0.378280475, -0.0155172951},
+ {5.12582688, -3.00674655, 0.342928086, -0.0137785283}},
{// CT
- {12.613656, -5.19052298, 0.561451908, -0.0206707933},
- {-9.73651127, 2.15521846, -0.225290947, 0.00767974576},
- {7.16886431, -2.96320727, 0.2726381, -0.00837654404}},
+ {1.59462161, -1.32139342, 0.112979516, -0.00350654749},
+ {-2.66688067, 0.149169695, -0.0394165557, 0.00221529117},
+ {3.35981388, -1.6812294, 0.144580211, -0.00463679475}},
{// GA
- {-3.6938331, 0.483533725, -0.0450473465, 0.000160356592},
- {6.68949621, -4.66399411, 0.677757326, -0.0318630437},
- {1.33410944, -0.988217134, -0.00422287463, 0.00562702615}},
+ {1.94929663, -1.87204746, 0.262415331, -0.0114952071},
+ {0.776645323, -1.57584288, 0.170612002, -0.00429563295},
+ {9.14677248, -5.01761335, 0.672305833, -0.031991748}},
{// GC
- {2.66843035, -1.62797424, 0.147491681, -0.00444976878},
- {1.14199306, -0.843025302, 0.0604509646, -0.00133222978},
- {5.92824303, -2.33995279, 0.175825931, -0.00449046566}},
+ {10.8094257, -3.57210251, 0.294531042, -0.0079448247},
+ {-1.6188921, -0.212323741, 0.0145898135, -0.000189460808},
+ {0.516468575, -0.980880476, 0.0607592136, -0.00125531492}},
{// GG
- {-9.53335176, 2.51765873, -0.318510141, 0.0117320354},
- {-0.524486685, -0.436816275, -0.0926096178, 0.0112705438},
- {8.88135834, -4.79762534, 0.664134538, -0.0311007622}},
+ {-1.967728, -0.522486336, 0.0601164523, -0.00183881474},
+ {3.12610849, -2.59191292, 0.325979211, -0.0138882827},
+ {3.05491681, -2.21846076, 0.287404314, -0.0128841407}},
{// GT
- {3.35914891, -1.38173016, 0.0781044221, -0.000763136872},
- {10.8476498, -4.75169784, 0.525334182, -0.0193477301},
- {15.5403876, -5.8956994, 0.604296436, -0.0209965825}},
+ {-5.79447669, 0.996639643, -0.120105223, 0.00438200289},
+ {7.06158739, -2.93404754, 0.268804061, -0.00798584198},
+ {6.70262773, -2.87567511, 0.264224627, -0.00821191872}},
{// TA
- {5.16029404, -3.68434804, 0.621640606, -0.0350676726},
- {11.8825308, -7.04640014, 1.07909276, -0.0537996376},
- {-9.20746548, 4.71271573, -0.951489582, 0.0565839306}},
+ {-7.07222018, 2.94211219, -0.543110434, 0.0320758432},
+ {-0.220904721, -0.873637208, 0.06909082, 0.000725498815},
+ {3.06972721, -1.54851697, 0.0802862366, 0.00176909841}},
{// TC
- {2.73609906, -1.3992629, 0.120116348, -0.00349409007},
- {-4.54672948, 0.344140535, -0.0129698225, -0.000227430524},
- {0.840535522, -1.10879024, 0.0800657995, -0.00206209905}},
+ {2.13319889, -1.20882369, 0.0954386191, -0.00244438603},
+ {-2.18846256, 0.022602406, -0.0121265739, 0.000696342338},
+ {-5.49462196, 0.733656731, -0.098767722, 0.00362214647}},
{// TG
- {-6.45925912, 1.63128009, -0.226062028, 0.0099607621},
- {-8.86160161, 3.12245384, -0.583442174, 0.033990528},
- {3.46514973, -1.91203442, 0.12119972, 0.00103341906}},
+ {0.459855955, -1.41666334, 0.205580055, -0.00922652629},
+ {1.40013661, -1.69751326, 0.165896712, -0.00352395246},
+ {3.64491162, -2.25206115, 0.23605282, -0.00926403736}},
{// TT
- {2.70097834, -1.96301816, 0.246186023, -0.00988186677},
- {1.80568256, -1.56446979, 0.176114086, -0.00665610868},
- {1.91982573, -1.50191002, 0.153230363, -0.00550115233}}};
+ {-4.66331239, 0.6977867, -0.0659908318, 0.00212194439},
+ {-0.800912389, -0.311917271, 0.00243369751, 0.000960389041},
+ {6.01995449, -2.74903881, 0.271552639, -0.00905647598}}};
-inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t row,
- const bool secondMoment)
+inline double CalculateExpectedLLForEmission(const size_t move, const uint8_t row,
+ const size_t moment)
{
double expectedLL = 0;
for (size_t i = 0; i < nOutcomes; i++) {
- double curProb = emissionPmf[index][row][i];
+ double curProb = emissionPmf[move][row][i];
double lgCurProb = std::log(curProb);
- if (!secondMoment)
+ if (moment == static_cast<uint8_t>(MomentType::FIRST))
expectedLL += curProb * lgCurProb;
- else
- expectedLL += curProb * pow(lgCurProb, 2.0);
+ else if (moment == static_cast<uint8_t>(MomentType::SECOND))
+ expectedLL += curProb * (lgCurProb * lgCurProb);
}
return expectedLL;
}
-double S_P1C1v2_Model::ExpectedLogLikelihoodOfOutcomeRow(const size_t move, const uint8_t prev,
- const uint8_t curr,
- const bool secondMoment) const
-{
- const auto row = (prev << 2) | curr;
- const auto moment = secondMoment ? 1 : 0;
- return cachedEmissionExpectations_[row][move][moment];
-}
-
S_P1C1v2_Model::S_P1C1v2_Model(const SNR& snr) : snr_(snr)
{
for (size_t ctx = 0; ctx < nContexts; ++ctx) {
@@ -314,12 +299,10 @@ S_P1C1v2_Model::S_P1C1v2_Model(const SNR& snr) : snr_(snr)
ctxTrans_[ctx][j] /= sum;
// cached expectations
- for (size_t move = 0; move < 3; ++move) {
- cachedEmissionExpectations_[ctx][move][0] =
- CalculateExpectedLogLikelihoodOfOutcomeRow(move, ctx, false);
- cachedEmissionExpectations_[ctx][move][1] =
- CalculateExpectedLogLikelihoodOfOutcomeRow(move, ctx, true);
- }
+ for (size_t move = 0; move < 3; ++move)
+ for (size_t moment = 0; moment < 2; ++moment)
+ cachedEmissionExpectations_[ctx][move][moment] =
+ CalculateExpectedLLForEmission(move, ctx, moment);
}
}
@@ -361,23 +344,12 @@ std::vector<TemplatePosition> S_P1C1v2_Model::Populate(const std::string& tpl) c
return result;
}
-double S_P1C1v2_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev, curr,
- secondMoment);
-}
-double S_P1C1v2_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
-{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev, curr,
- secondMoment);
-}
-double S_P1C1v2_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
- bool secondMoment) const
+double S_P1C1v2_Model::ExpectedLLForEmission(const MoveType move, const uint8_t prev,
+ const uint8_t curr, const MomentType moment) const
{
- return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev, curr,
- secondMoment);
+ const size_t row = (prev << 2) | curr;
+ return cachedEmissionExpectations_[row][static_cast<uint8_t>(move)]
+ [static_cast<uint8_t>(moment)];
}
S_P1C1v2_Recursor::S_P1C1v2_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
diff --git a/swig/Integrator.i b/swig/Integrator.i
index d11f3e6..fc0968e 100644
--- a/swig/Integrator.i
+++ b/swig/Integrator.i
@@ -1,10 +1,11 @@
%{
-#include <pacbio/consensus/Integrator.h>
+#include <pacbio/consensus/State.h>
+#include <pacbio/consensus/AbstractIntegrator.h>
+#include <pacbio/consensus/MonoMolecularIntegrator.h>
+#include <pacbio/consensus/MultiMolecularIntegrator.h>
%}
-%ignore PacBio::Consensus::AbstractIntegrator::ReadState;
-
%feature("notabstract") PacBio::Consensus::MonoMolecularIntegrator;
%feature("notabstract") PacBio::Consensus::MultiMolecularIntegrator;
@@ -12,4 +13,7 @@ py_tp_str(PacBio::Consensus::AbstractIntegrator);
py_tp_str(PacBio::Consensus::MonoMolecularIntegrator);
py_tp_str(PacBio::Consensus::MultiMolecularIntegrator);
-%include <pacbio/consensus/Integrator.h>
+%include <pacbio/consensus/State.h>
+%include <pacbio/consensus/AbstractIntegrator.h>
+%include <pacbio/consensus/MonoMolecularIntegrator.h>
+%include <pacbio/consensus/MultiMolecularIntegrator.h>
diff --git a/swig/Polish.i b/swig/Polish.i
index f024d2a..a9eef96 100644
--- a/swig/Polish.i
+++ b/swig/Polish.i
@@ -1,13 +1,8 @@
%{
+#include <pacbio/consensus/PolishResult.h>
#include <pacbio/consensus/Polish.h>
%}
-// for results from Polish
-%typemap(out) std::tuple<bool, size_t, size_t> {
- $result = PyTuple_Pack(3, PyBool_FromLong(std::get<0>($1)),
- PyInt_FromSize_t(std::get<1>($1)),
- PyInt_FromSize_t(std::get<2>($1)));
-}
-
+%include <pacbio/consensus/PolishResult.h>
%include <pacbio/consensus/Polish.h>
diff --git a/swig/Read.i b/swig/Read.i
index 7fce42a..6c0b4a4 100644
--- a/swig/Read.i
+++ b/swig/Read.i
@@ -4,10 +4,12 @@
%warnfilter(509) PacBio::Consensus::MappedRead::MappedRead;
%{
+#include <pacbio/consensus/StrandType.h>
#include <pacbio/consensus/Read.h>
%}
%feature("notabstract") PacBio::Consensus::Read;
%feature("notabstract") PacBio::Consensus::MappedRead;
+%include <pacbio/consensus/StrandType.h>
%include <pacbio/consensus/Read.h>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 54d93b9..f4a3a6f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,6 +2,10 @@
# pthread
find_package(Threads)
+if (EXTENSIVE_TESTING)
+ add_definitions(-DEXTENSIVE_TESTING=1)
+endif()
+
# gmock/gtest
set(GMOCK_RootDir ${PacBioConsensus_RootDir}/third-party/gmock-1.7.0)
set(GMOCK_IncludeDir ${GMOCK_RootDir})
diff --git a/tests/TestIntegrator.cpp b/tests/TestIntegrator.cpp
index c714555..050d47b 100644
--- a/tests/TestIntegrator.cpp
+++ b/tests/TestIntegrator.cpp
@@ -48,7 +48,8 @@
#include <tuple>
#include <vector>
-#include <pacbio/consensus/Integrator.h>
+#include <pacbio/consensus/MonoMolecularIntegrator.h>
+#include <pacbio/consensus/MultiMolecularIntegrator.h>
#include <pacbio/consensus/Mutation.h>
#include <pacbio/consensus/Sequence.h>
@@ -66,6 +67,12 @@ using ::testing::UnorderedElementsAreArray;
namespace {
+#if EXTENSIVE_TESTING
+constexpr int numSamples = 333;
+#else
+constexpr int numSamples = 3;
+#endif
+
const double prec = 0.001; // alpha/beta mismatch tolerance
const SNR snr(10, 7, 5, 11);
const string P6C4 = "P6-C4";
@@ -106,14 +113,15 @@ Read MkRead(const string& seq, const SNR& snr, const string& mdl, const vector<u
return Read("NA", seq, ipd, pw, snr, mdl);
}
+#if EXTENSIVE_TESTING
TEST(IntegratorTest, TestLongTemplate)
{
// TODO: Write a test for a longer molecule
const string mdl = P6C4;
vector<uint8_t> pw(longTpl.length(), 1);
MonoMolecularIntegrator ai(longTpl, cfg, snr, mdl);
- EXPECT_EQ(AddReadResult::SUCCESS,
- ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pw), StrandEnum::FORWARD, 0,
+ EXPECT_EQ(State::VALID,
+ ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pw), StrandType::FORWARD, 0,
longTpl.length(), true, true)));
EXPECT_NEAR(-148.92614949338801011, ai.LL(), prec);
}
@@ -125,8 +133,8 @@ void TestTiming(const string& mdl)
MonoMolecularIntegrator ai(longTpl, cfg, snr, mdl);
const auto stime = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < nsamp; ++i)
- EXPECT_EQ(AddReadResult::SUCCESS,
- ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pws), StrandEnum::FORWARD, 0,
+ EXPECT_EQ(State::VALID,
+ ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pws), StrandType::FORWARD, 0,
longTpl.length(), true, true)));
const auto etime = std::chrono::high_resolution_clock::now();
const auto duration =
@@ -164,8 +172,9 @@ TEST(IntegratorTest, TestLongTemplateTimingSP1C1v2)
{
TestTiming(SP1C1v2);
}
+#endif
-std::tuple<string, StrandEnum> Mutate(const string& tpl, const size_t nmut, std::mt19937* const gen)
+std::tuple<string, StrandType> Mutate(const string& tpl, const size_t nmut, std::mt19937* const gen)
{
string result;
@@ -190,9 +199,9 @@ std::tuple<string, StrandEnum> Mutate(const string& tpl, const size_t nmut, std:
std::bernoulli_distribution coin(0.5);
- if (coin(*gen)) return std::make_tuple(result, StrandEnum::FORWARD);
+ if (coin(*gen)) return std::make_tuple(result, StrandType::FORWARD);
- return std::make_tuple(ReverseComplement(result), StrandEnum::REVERSE);
+ return std::make_tuple(ReverseComplement(result), StrandType::REVERSE);
}
template <typename F, typename G>
@@ -214,7 +223,7 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
vector<Mutation> mutations = Mutations(tpl);
for (const auto& mut : mutations) {
string read;
- StrandEnum strand;
+ StrandType strand;
vector<Mutation> muts{mut};
const string app = ApplyMutations(tpl, &muts); // template with mutation applied
std::tie(read, strand) = Mutate(app, nmut, &gen);
@@ -223,8 +232,8 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
auto ai1 = makeIntegrator(tpl);
const auto arr1 = addRead(ai1, MappedRead(MkRead(read, snr, mdl, pws), strand, 0,
tpl.length(), true, true));
- EXPECT_EQ(AddReadResult::SUCCESS, arr1);
- if (arr1 != AddReadResult::SUCCESS) {
+ EXPECT_EQ(State::VALID, arr1);
+ if (arr1 != State::VALID) {
std::cerr << std::endl << "!! alpha/beta mismatch:" << std::endl;
std::cerr << " " << tpl.length() << ", " << tpl << std::endl;
std::cerr << " " << read.length() << ", " << read << std::endl;
@@ -232,8 +241,8 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
auto ai2 = makeIntegrator(app);
const auto arr2 = addRead(ai2, MappedRead(MkRead(read, snr, mdl, pws), strand, 0,
app.length(), true, true));
- EXPECT_EQ(AddReadResult::SUCCESS, arr2);
- if (arr2 != AddReadResult::SUCCESS) {
+ EXPECT_EQ(State::VALID, arr2);
+ if (arr2 != State::VALID) {
std::cerr << std::endl << "!! alpha/beta mismatch:" << std::endl;
std::cerr << " " << app.length() << ", " << app << std::endl;
std::cerr << " " << read.length() << ", " << read << std::endl;
@@ -262,8 +271,9 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
std::cerr << " " << ai1.TemplateLength() << ", " << string(ai1) << std::endl;
std::stringstream result;
std::copy(pws.begin(), pws.end(), std::ostream_iterator<int>(result, " "));
- std::cerr << " " << read.length() << ", " << read << " - " << result.str() << std::endl;
-
+ std::cerr << " " << read.length() << ", " << read << " - " << result.str()
+ << std::endl;
+
++nerror;
}
} catch (const std::exception& e) {
@@ -291,9 +301,9 @@ void MonoEquivalence(const string& mdl)
auto monoRead = [](MonoMolecularIntegrator& ai, const MappedRead& mr) {
return ai.AddRead(mr);
};
- MutationEquivalence(333, 2, makeMono, monoRead, mdl);
- MutationEquivalence(333, 1, makeMono, monoRead, mdl);
- MutationEquivalence(334, 0, makeMono, monoRead, mdl);
+ MutationEquivalence(numSamples, 2, makeMono, monoRead, mdl);
+ MutationEquivalence(numSamples, 1, makeMono, monoRead, mdl);
+ MutationEquivalence(numSamples, 0, makeMono, monoRead, mdl);
}
TEST(IntegratorTest, TestMonoMutationEquivalenceP6C4) { MonoEquivalence(P6C4); }
@@ -305,9 +315,9 @@ void MultiEquivalence(const string& mdl)
auto multiRead = [](MultiMolecularIntegrator& ai, const MappedRead& mr) {
return ai.AddRead(mr);
};
- MutationEquivalence(333, 2, makeMulti, multiRead, mdl);
- MutationEquivalence(333, 1, makeMulti, multiRead, mdl);
- MutationEquivalence(334, 0, makeMulti, multiRead, mdl);
+ MutationEquivalence(numSamples, 2, makeMulti, multiRead, mdl);
+ MutationEquivalence(numSamples, 1, makeMulti, multiRead, mdl);
+ MutationEquivalence(numSamples, 0, makeMulti, multiRead, mdl);
}
TEST(IntegratorTest, TestMultiMutationEquivalenceP6C4) { MultiEquivalence(P6C4); }
@@ -323,8 +333,8 @@ TEST(IntegratorTest, TestP6C4NoCovAgainstCSharpModel)
auto mdl = P6C4;
MultiMolecularIntegrator ai(tpl, cfg);
- EXPECT_EQ(AddReadResult::SUCCESS,
- ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, mdl, pw), StrandEnum::FORWARD, 0,
+ EXPECT_EQ(State::VALID,
+ ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, mdl, pw), StrandType::FORWARD, 0,
tpl.length(), true, true)));
auto score = [&ai](Mutation&& mut) { return ai.LL(mut) - ai.LL(); };
EXPECT_NEAR(-4.74517984808494, ai.LL(), prec);
@@ -337,4 +347,27 @@ TEST(IntegratorTest, TestP6C4NoCovAgainstCSharpModel)
EXPECT_NEAR(-1.60697112438296, score(Mutation(MutationType::INSERTION, 4, 'G')), prec);
}
+TEST(IntegratorTest, TestFailAddRead)
+{
+ const string tpl = "A";
+ const vector<uint8_t> pw(tpl.length(), 1);
+ auto mdl = P6C4;
+ MultiMolecularIntegrator ai(tpl, cfg);
+
+ EXPECT_EQ(State::TEMPLATE_TOO_SMALL,
+ ai.AddRead(MappedRead(MkRead(tpl, snr, mdl, pw), StrandType::FORWARD, 0, tpl.length(),
+ true, true)));
+}
+
+TEST(IntegratorTest, TestSucessAddRead)
+{
+ const string tpl = "AA";
+ const vector<uint8_t> pw(tpl.length(), 1);
+ auto mdl = P6C4;
+ MultiMolecularIntegrator ai(tpl, cfg);
+
+ EXPECT_EQ(State::VALID, ai.AddRead(MappedRead(MkRead(tpl, snr, mdl, pw), StrandType::FORWARD, 0,
+ tpl.length(), true, true)));
+}
+
} // namespace anonymous
diff --git a/tests/TestMutationEnumerator.cpp b/tests/TestMutationEnumerator.cpp
index 3b7a060..f6f0eee 100644
--- a/tests/TestMutationEnumerator.cpp
+++ b/tests/TestMutationEnumerator.cpp
@@ -44,7 +44,8 @@
#include <string>
#include <vector>
-#include <pacbio/consensus/Integrator.h>
+#include <pacbio/consensus/MonoMolecularIntegrator.h>
+#include <pacbio/consensus/MultiMolecularIntegrator.h>
#include <pacbio/consensus/Mutation.h>
using std::string;
diff --git a/tests/TestPolish.cpp b/tests/TestPolish.cpp
index a9abf94..c90bdf3 100644
--- a/tests/TestPolish.cpp
+++ b/tests/TestPolish.cpp
@@ -41,7 +41,8 @@
#include <string>
#include <tuple>
-#include <pacbio/consensus/Integrator.h>
+#include <pacbio/consensus/MonoMolecularIntegrator.h>
+#include <pacbio/consensus/MultiMolecularIntegrator.h>
#include <pacbio/consensus/Polish.h>
#include <pacbio/consensus/Read.h>
#include <pacbio/consensus/Sequence.h>
@@ -62,9 +63,9 @@ TEST(PolishTest, MonoBasic)
{
MonoMolecularIntegrator ai("GCGTCGT", IntegratorConfig(), snr, "P6-C4");
- ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, "P6-C4"), StrandEnum::FORWARD, 0, 7, true, true));
- ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandEnum::FORWARD, 0, 7, true, true));
- ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandEnum::FORWARD, 0, 7, true, true));
+ ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, "P6-C4"), StrandType::FORWARD, 0, 7, true, true));
+ ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandType::FORWARD, 0, 7, true, true));
+ ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandType::FORWARD, 0, 7, true, true));
Polish(&ai, PolishConfig());
@@ -75,17 +76,14 @@ TEST(PolishTest, MultiBasic)
{
MultiMolecularIntegrator ai("GCGTCGT", IntegratorConfig());
- ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, "P6-C4"), StrandEnum::FORWARD, 0, 7, true, true));
- ai.AddRead(MappedRead(MkRead(ReverseComplement("ACGACGT"), snr, "P6-C4"), StrandEnum::REVERSE,
+ ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, "P6-C4"), StrandType::FORWARD, 0, 7, true, true));
+ ai.AddRead(MappedRead(MkRead(ReverseComplement("ACGACGT"), snr, "P6-C4"), StrandType::REVERSE,
0, 7, true, true));
- ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandEnum::FORWARD, 0, 7, true, true));
+ ai.AddRead(MappedRead(MkRead("ACGACGT", snr, "P6-C4"), StrandType::FORWARD, 0, 7, true, true));
- bool polished;
- size_t nTested, nApplied;
+ const auto result = Polish(&ai, PolishConfig());
- std::tie(polished, nTested, nApplied) = Polish(&ai, PolishConfig());
-
- EXPECT_TRUE(polished);
+ EXPECT_TRUE(result.hasConverged);
EXPECT_EQ("ACGACGT", std::string(ai));
}
}
diff --git a/tests/TestTemplate.cpp b/tests/TestTemplate.cpp
index c92f42f..e6c27a1 100644
--- a/tests/TestTemplate.cpp
+++ b/tests/TestTemplate.cpp
@@ -45,6 +45,7 @@
#include <utility>
#include <vector>
+#include <pacbio/consensus/Exceptions.h>
#include <pacbio/consensus/ModelConfig.h>
#include <pacbio/consensus/Mutation.h>
#include <pacbio/consensus/Read.h>
@@ -160,14 +161,14 @@ void TemplateEquivalence(const size_t nsamp, const size_t nvirt, const size_t le
vector<VirtualTemplate> vtpls;
vector<Template> rtpls;
for (size_t j = 0; j < nvirt; ++j) {
- size_t start = 0;
- size_t end = len;
+ int start = 0;
+ int end = len;
// roughly 33% of the time having a spanning read
if (randSpanning(gen)) {
do {
start = randIdx(gen);
end = randIdx(gen);
- } while (start == end);
+ } while (std::abs(start - end) < 2);
if (end < start) swap(start, end);
++end; // increment b by one (end-exclusive)
}
@@ -244,8 +245,13 @@ void TemplateEquivalence(const size_t nsamp, const size_t nvirt, const size_t le
TEST(TemplateTest, TestVirtualTemplateEquivalence)
{
- TemplateEquivalence(1000, 20, 10);
- TemplateEquivalence(500, 20, 30);
+#if EXTENSIVE_TESTING
+ const int numSamples = 1000;
+#else
+ const int numSamples = 10;
+#endif
+ TemplateEquivalence(numSamples, 20, 10);
+ TemplateEquivalence(numSamples / 2, 20, 30);
}
TEST(TemplateTest, TestPinning)
@@ -311,36 +317,10 @@ TEST(TemplateTest, NullTemplate)
const size_t len = tpl.length();
const Mutation mut(MutationType::DELETION, 0, '-');
- Template master(tpl, ModelFactory::Create(mdl, snr), 0, len, true, true);
- VirtualTemplate virt(master, 0, len, false, false);
-
- EXPECT_EQ(len, master.Length());
-
- for (size_t i = 1; i <= len; ++i) {
- master.ApplyMutation(mut);
- EXPECT_EQ(len - i, master.Length());
- virt.ApplyMutation(mut);
- EXPECT_EQ(len - i, virt.Length());
- }
+ ASSERT_THROW(Template("A", ModelFactory::Create(mdl, snr), 0, 1, true, true), TemplateTooSmall);
- {
- const string A(1, 'A');
-
- auto mut_ = master.Mutate(mut);
- EXPECT_FALSE(bool(mut_));
- master.ApplyMutation(mut);
-
- mut_ = master.Mutate(Mutation(MutationType::INSERTION, 0, 'A'));
- ASSERT_TRUE(bool(mut_));
- EXPECT_EQ(A, ToString(master));
- master.Reset();
- master.ApplyMutation(*mut_);
- EXPECT_EQ(A, ToString(master));
- virt.ApplyMutation(*mut_);
- EXPECT_EQ(0, virt.Length());
- }
+ ASSERT_NO_THROW(Template("AA", ModelFactory::Create(mdl, snr), 0, 2, true, true));
}
-
TEST(TemplateTest, P6SiteNormalParameters)
{
@@ -349,10 +329,9 @@ TEST(TemplateTest, P6SiteNormalParameters)
auto mdl = "P6-C4";
Template tester(tpl, ModelFactory::Create(mdl, snr));
auto results = tester.NormalParameters();
-
+
EXPECT_EQ(-9.3915588824261888, results.first);
EXPECT_EQ(26.883352639390957, results.second);
}
-
} // namespace anonymous
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/consensuscore2.git
More information about the debian-med-commit
mailing list