[med-svn] [consensuscore2] 04/09: Imported Upstream version 0.12.0
Afif Elghraoui
afif at moszumanska.debian.org
Mon Jul 4 01:31:26 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository consensuscore2.
commit 4d0299d7135b8b3bc8f740cf9cede0a5d2221666
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sun Jul 3 16:25:30 2016 -0700
Imported Upstream version 0.12.0
---
CMakeLists.txt | 2 +-
README.md | 2 +-
circle.yml | 8 +-
include/pacbio/consensus/Coverage.h | 3 +-
include/pacbio/consensus/Evaluator.h | 34 ++
include/pacbio/consensus/Exceptions.h | 34 ++
include/pacbio/consensus/Integrator.h | 35 ++
include/pacbio/consensus/ModelConfig.h | 39 +-
include/pacbio/consensus/Mutation.h | 34 ++
include/pacbio/consensus/Polish.h | 46 ++-
include/pacbio/consensus/Read.h | 37 ++
include/pacbio/consensus/Sequence.h | 36 ++
include/pacbio/consensus/Template.h | 75 +++-
include/pacbio/consensus/Version.h.in | 37 ++
include/pacbio/consensus/align/AffineAlignment.h | 2 +-
include/pacbio/consensus/align/AlignConfig.h | 2 +-
include/pacbio/consensus/align/LinearAlignment.h | 2 +-
include/pacbio/consensus/align/PairwiseAlignment.h | 2 +-
include/pacbio/consensus/poa/PoaConsensus.h | 2 +-
include/pacbio/consensus/poa/PoaGraph.h | 2 +-
include/pacbio/consensus/poa/RangeFinder.h | 35 ++
setup.py | 135 +++++--
src/Coverage.cpp | 2 +-
src/Evaluator.cpp | 40 +-
src/EvaluatorImpl.cpp | 40 +-
src/EvaluatorImpl.h | 41 +-
src/Integrator.cpp | 66 +++-
src/ModelConfig.cpp | 34 ++
src/ModelFactory.cpp | 34 ++
src/ModelFactory.h | 34 ++
src/Mutation.cpp | 34 ++
src/Polish.cpp | 159 ++++++--
src/Read.cpp | 49 +++
src/Recursor.cpp | 34 ++
src/Recursor.h | 25 +-
src/Sequence.cpp | 34 ++
src/Template.cpp | 65 +++-
src/align/AffineAlignment.cpp | 2 +-
src/align/AlignConfig.cpp | 8 +-
src/align/LinearAlignment.cpp | 2 +-
src/align/PairwiseAlignment.cpp | 2 +-
src/matrix/ScaledMatrix.cpp | 34 ++
src/matrix/ScaledMatrix.h | 55 ++-
src/matrix/SparseMatrix.cpp | 2 +-
src/matrix/SparseMatrix.h | 2 +-
src/matrix/SparseVector.h | 2 +-
src/models/P6C4NoCovModel.cpp | 76 +++-
src/models/SP1C1BetaNoCovModel.cpp | 174 ---------
src/models/SP1C1PwModel.cpp | 301 ---------------
src/models/S_P1C1Beta_Model.cpp | 239 ++++++++++++
src/models/S_P1C1v1_Model.cpp | 423 +++++++++++++++++++++
src/models/S_P1C1v2_Model.cpp | 422 ++++++++++++++++++++
src/poa/PoaConsensus.cpp | 2 +-
src/poa/PoaGraph.cpp | 2 +-
src/poa/PoaGraphImpl.cpp | 34 ++
src/poa/PoaGraphImpl.h | 34 ++
src/poa/PoaGraphTraversals.cpp | 2 +-
src/poa/RangeFinder.cpp | 34 ++
src/poa/VectorL.h | 34 ++
swig/ConsensusCore2.i.in | 12 +
tests/Mutations.cpp | 34 ++
tests/Mutations.h | 36 ++
tests/RandomDNA.cpp | 43 ++-
tests/RandomDNA.h | 36 ++
tests/TestIntegrator.cpp | 136 ++++---
tests/TestTemplate.cpp | 14 +
66 files changed, 2778 insertions(+), 714 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f88853..77e09a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set the version in project()
-project(PacBioConsensus VERSION 0.11.0 LANGUAGES CXX C)
+project(PacBioConsensus VERSION 0.12.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
# set magic variable
diff --git a/README.md b/README.md
index 356039c..9416174 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# ConsensusCore2
+# ConsensusCore2 [![CircleCI](https://circleci.com/gh/PacificBiosciences/ConsensusCore2.svg?style=svg)](https://circleci.com/gh/PacificBiosciences/ConsensusCore2)
`ConsensusCore2` embodies core C++ routines underlying the Arrow HMM
algorithm for PacBio multi-sequence consensus. Arrow is the successor
diff --git a/circle.yml b/circle.yml
index 2017dcb..ec5c2d9 100644
--- a/circle.yml
+++ b/circle.yml
@@ -17,11 +17,11 @@ dependencies:
- mkdir _rev_deps # Create a directory for reverse-dependencies, ie things that depend on us. These are not static, do not cache them.
- pushd _rev_deps ; git clone https://github.com/PacificBiosciences/htslib.git
- pushd _rev_deps ; git clone https://github.com/PacificBiosciences/pbbam.git
- - pushd _rev_deps ; git clone https://github.com/PacificBiosciences/seqan.git
- - pushd _rev_deps ; git clone https://github.com/PacificBiosciences/pbccs.git
+ - pushd _rev_deps ; git clone -b richQuals https://github.com/PacificBiosciences/pbccs.git
+ - pushd _rev_deps ; git clone -b richQuals https://${TOKEN}@github.com/PacificBiosciences/pblaa.git
- pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pbsparse.git
- pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pbchimera.git
- - pushd _rev_deps ; git clone https://${TOKEN}@github.com/PacificBiosciences/pblaa.git
+ - pushd _rev_deps ; git clone https://github.com/PacificBiosciences/seqan.git
- pip install --upgrade pip
- pip install numpy
- pip install cython
@@ -42,7 +42,7 @@ test:
- pushd _build ; make check
- pushd _build ; rm -rf * ; CC=gcc-4.8 CXX=g++-4.8 $(readlink -f ../_deps/cmake-3.3.0-Linux-x86_64/bin/cmake) -DCMAKE_BUILD_TYPE=Release -DBoost_INCLUDE_DIRS=$(readlink -f ../_deps/boost_1_58_0) ..
- pushd _build ; make clean ; make test_pbconsensus
- - tests/bin/test_pbconsensus --gtest_filter=IntegratorTest.TestLongTemplateTiming
+ - tests/bin/test_pbconsensus --gtest_filter=IntegratorTest.TestLongTemplateTiming*
- CC=gcc-4.8 CXX=g++-4.8 CMAKE_COMMAND=$(readlink -f _deps/cmake-3.3.0-Linux-x86_64/bin/cmake) Boost_INCLUDE_DIRS=$(readlink -f _deps/boost_1_58_0) SWIG_COMMAND=$(readlink -f _deps/swig-3.0.8/bin/swig) VERBOSE=1 pip install --verbose --upgrade --no-deps .
- python -c "import ConsensusCore2 ; print ConsensusCore2.__version__"
post:
diff --git a/include/pacbio/consensus/Coverage.h b/include/pacbio/consensus/Coverage.h
index e83cf79..ed9ae58 100644
--- a/include/pacbio/consensus/Coverage.h
+++ b/include/pacbio/consensus/Coverage.h
@@ -1,5 +1,4 @@
-
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/Evaluator.h b/include/pacbio/consensus/Evaluator.h
index 3932710..ad4b7d0 100644
--- a/include/pacbio/consensus/Evaluator.h
+++ b/include/pacbio/consensus/Evaluator.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/include/pacbio/consensus/Exceptions.h b/include/pacbio/consensus/Exceptions.h
index c955979..8179b21 100644
--- a/include/pacbio/consensus/Exceptions.h
+++ b/include/pacbio/consensus/Exceptions.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/include/pacbio/consensus/Integrator.h b/include/pacbio/consensus/Integrator.h
index c48e713..f9a4ef4 100644
--- a/include/pacbio/consensus/Integrator.h
+++ b/include/pacbio/consensus/Integrator.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -62,6 +96,7 @@ public:
double AvgZScore() const;
std::vector<double> ZScores() const;
+ std::vector<std::pair<double, double>> NormalParameters() const;
virtual void ApplyMutation(const Mutation& mut) = 0;
virtual void ApplyMutations(std::vector<Mutation>* muts) = 0;
diff --git a/include/pacbio/consensus/ModelConfig.h b/include/pacbio/consensus/ModelConfig.h
index f2d74d5..d03a5ce 100644
--- a/include/pacbio/consensus/ModelConfig.h
+++ b/include/pacbio/consensus/ModelConfig.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -51,7 +85,10 @@ public:
virtual std::unique_ptr<AbstractRecursor> CreateRecursor(
std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const = 0;
virtual std::vector<TemplatePosition> Populate(const std::string& tpl) const = 0;
- virtual double SubstitutionRate(uint8_t prev, uint8_t curr) const = 0;
+ virtual double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+ virtual double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+ virtual double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+
};
} // namespace Consensus
diff --git a/include/pacbio/consensus/Mutation.h b/include/pacbio/consensus/Mutation.h
index f12feac..a965f2b 100644
--- a/include/pacbio/consensus/Mutation.h
+++ b/include/pacbio/consensus/Mutation.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/include/pacbio/consensus/Polish.h b/include/pacbio/consensus/Polish.h
index 28bd472..4894071 100644
--- a/include/pacbio/consensus/Polish.h
+++ b/include/pacbio/consensus/Polish.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -23,7 +57,17 @@ struct PolishConfig
std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg);
-std::vector<int> ConsensusQVs(AbstractIntegrator& ai);
+struct QualityValues
+{
+ std::vector<int> Qualities;
+ std::vector<int> DeletionQVs;
+ std::vector<int> InsertionQVs;
+ std::vector<int> SubstitutionQVs;
+};
+
+std::vector<int> ConsensusQualities(AbstractIntegrator& ai);
+
+QualityValues ConsensusQVs(AbstractIntegrator& ai);
std::vector<Mutation> Mutations(const AbstractIntegrator& ai);
diff --git a/include/pacbio/consensus/Read.h b/include/pacbio/consensus/Read.h
index c1dcb72..b36230a 100644
--- a/include/pacbio/consensus/Read.h
+++ b/include/pacbio/consensus/Read.h
@@ -1,6 +1,41 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
+#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
@@ -36,6 +71,8 @@ struct SNR
inline bool operator!=(const SNR& other) const { return !(*this == other); }
};
+SNR ClampSNR(const SNR& val, const SNR& min, const SNR& max);
+
struct Read
{
Read(const std::string& name, const std::string& seq, const std::vector<uint8_t>& ipd,
diff --git a/include/pacbio/consensus/Sequence.h b/include/pacbio/consensus/Sequence.h
index b4407c5..8fac9d4 100644
--- a/include/pacbio/consensus/Sequence.h
+++ b/include/pacbio/consensus/Sequence.h
@@ -1,3 +1,39 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#pragma once
#include <string>
diff --git a/include/pacbio/consensus/Template.h b/include/pacbio/consensus/Template.h
index 8058d0f..ae126d9 100644
--- a/include/pacbio/consensus/Template.h
+++ b/include/pacbio/consensus/Template.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -45,8 +79,10 @@ public:
// access model configuration
virtual std::unique_ptr<AbstractRecursor> CreateRecursor(
std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const = 0;
- virtual double SubstitutionRate(uint8_t prev, uint8_t curr) const = 0;
-
+ virtual double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+ virtual double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+ virtual double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const = 0;
+
std::pair<double, double> NormalParameters() const;
// a sad but necessary release valve for MonoMolecularIntegrator Length()
@@ -87,7 +123,16 @@ public:
inline std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr,
double scoreDiff) const;
- inline double SubstitutionRate(uint8_t prev, uint8_t curr) const;
+
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return cfg_->ExpectedLogLikelihoodForMatchEmission(prev, curr, secondMoment);
+ }
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return cfg_->ExpectedLogLikelihoodForStickEmission(prev, curr, secondMoment);
+ }
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return cfg_->ExpectedLogLikelihoodForBranchEmission(prev, curr, secondMoment);
+ }
private:
std::unique_ptr<ModelConfig> cfg_;
@@ -113,12 +158,18 @@ public:
inline boost::optional<Mutation> Mutate(const Mutation&);
inline void Reset() {}
bool ApplyMutation(const Mutation& mut);
-
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return master_.ExpectedLogLikelihoodForMatchEmission(prev, curr, secondMoment);
+ }
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return master_.ExpectedLogLikelihoodForStickEmission(prev, curr, secondMoment);
+ }
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return master_.ExpectedLogLikelihoodForBranchEmission(prev, curr, secondMoment);
+ }
inline std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr,
double scoreDiff) const;
- inline double SubstitutionRate(uint8_t prev, uint8_t curr) const;
-
private:
Template const& master_;
};
@@ -159,7 +210,6 @@ bool AbstractTemplate::InRange(const size_t start, const size_t end) const
}
size_t Template::Length() const { return tpl_.size() + mutOff_; }
-
const TemplatePosition& Template::operator[](size_t i) const
{
// if no mutation, or everything up to the base before mutStart_, just return
@@ -176,7 +226,6 @@ const TemplatePosition& Template::operator[](size_t i) const
}
bool Template::IsMutated() const { return mutated_; }
-
size_t VirtualTemplate::Length() const
{
if (IsMutated()) return end_ - start_ + master_.mutOff_;
@@ -192,11 +241,6 @@ std::unique_ptr<AbstractRecursor> Template::CreateRecursor(std::unique_ptr<Abstr
scoreDiff);
}
-double Template::SubstitutionRate(uint8_t prev, uint8_t curr) const
-{
- return cfg_->SubstitutionRate(prev, curr);
-}
-
const TemplatePosition& VirtualTemplate::operator[](const size_t i) const
{
if (master_.IsMutated() && !pinStart_ && master_.mutEnd_ <= start_)
@@ -223,10 +267,5 @@ std::unique_ptr<AbstractRecursor> VirtualTemplate::CreateRecursor(
scoreDiff);
}
-double VirtualTemplate::SubstitutionRate(uint8_t prev, uint8_t curr) const
-{
- return master_.SubstitutionRate(prev, curr);
-}
-
} // namespace Consensus
} // namespace PacBio
diff --git a/include/pacbio/consensus/Version.h.in b/include/pacbio/consensus/Version.h.in
index 60b9a90..6b01b1f 100644
--- a/include/pacbio/consensus/Version.h.in
+++ b/include/pacbio/consensus/Version.h.in
@@ -1,3 +1,40 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#pragma once
+
#include <string>
#define CC2_GIT_SHA1 "@CC2_GIT_SHA1@"
diff --git a/include/pacbio/consensus/align/AffineAlignment.h b/include/pacbio/consensus/align/AffineAlignment.h
index e26f614..d426086 100644
--- a/include/pacbio/consensus/align/AffineAlignment.h
+++ b/include/pacbio/consensus/align/AffineAlignment.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/align/AlignConfig.h b/include/pacbio/consensus/align/AlignConfig.h
index e338bd9..8afcb1d 100644
--- a/include/pacbio/consensus/align/AlignConfig.h
+++ b/include/pacbio/consensus/align/AlignConfig.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/align/LinearAlignment.h b/include/pacbio/consensus/align/LinearAlignment.h
index 48d2fe3..b44a595 100644
--- a/include/pacbio/consensus/align/LinearAlignment.h
+++ b/include/pacbio/consensus/align/LinearAlignment.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/align/PairwiseAlignment.h b/include/pacbio/consensus/align/PairwiseAlignment.h
index bc901cb..d978287 100644
--- a/include/pacbio/consensus/align/PairwiseAlignment.h
+++ b/include/pacbio/consensus/align/PairwiseAlignment.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/poa/PoaConsensus.h b/include/pacbio/consensus/poa/PoaConsensus.h
index eeac806..e36c14e 100644
--- a/include/pacbio/consensus/poa/PoaConsensus.h
+++ b/include/pacbio/consensus/poa/PoaConsensus.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/poa/PoaGraph.h b/include/pacbio/consensus/poa/PoaGraph.h
index 89febf7..1bad05e 100644
--- a/include/pacbio/consensus/poa/PoaGraph.h
+++ b/include/pacbio/consensus/poa/PoaGraph.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/include/pacbio/consensus/poa/RangeFinder.h b/include/pacbio/consensus/poa/RangeFinder.h
index af3b80c..5c2bb17 100644
--- a/include/pacbio/consensus/poa/RangeFinder.h
+++ b/include/pacbio/consensus/poa/RangeFinder.h
@@ -1,3 +1,38 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
#pragma once
#include <cstddef>
diff --git a/setup.py b/setup.py
index 422f076..48807d3 100644
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,11 @@ import os
import os.path
import sys
+from copy import copy
from distutils import sysconfig
from distutils.command.clean import clean as dclean
from distutils.errors import CompileError
+from distutils.util import strtobool
from setuptools import setup
from setuptools.dist import Distribution
from shutil import rmtree
@@ -18,49 +20,122 @@ thisDir = os.path.dirname(os.path.realpath(__file__))
swigLib = os.path.join(thisDir, "swig", "lib")
modName = "_ConsensusCore2.so"
+def which(program, env=os.environ):
+ def isExe(fpath):
+ return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+ fpath, fname = os.path.split(program)
+ if fpath:
+ if isExe(program):
+ return program
+ else:
+ for path in env.get("PATH", "").split(os.pathsep):
+ path = path.strip("\"")
+ exe = os.path.join(path, program)
+ if isExe(exe):
+ return exe
+ return None
+
+
class MyClean(dclean):
def run(self):
rmtree(swigLib, ignore_errors=True)
dclean.run(self)
+
class MyDist(Distribution):
def has_ext_modules(self):
return True
+
+class CMake(object):
+ def __init__(self, env=os.environ, verbose=False):
+ cmake = env.get("CMAKE_COMMAND", "cmake") or which("cmake", env)
+ if cmake is None:
+ raise RuntimeError("cannot find `cmake` command, "
+ "please populate CMAKE_COMMAND environment variable")
+ self.build = ["make"]
+ self.env = env
+ self.configure = [cmake]
+ self.definitions = dict()
+ self.generator = None
+
+ rpath = self.env.get("CMAKE_SKIP_RPATH", "")
+ try:
+ rpath = bool(strtobool(rpath))
+ except ValueError:
+ rpath = False
+ if rpath:
+ self.add_definition("CMAKE_SKIP_RPATH", "TRUE")
+
+ verbose = self.env.get("VERBOSE", "{0}".format(verbose))
+ try:
+ verbose = bool(strtobool(verbose))
+ except ValueError:
+ verbose = False
+ self.verbose = verbose
+
+ def add_definition(self, key, value):
+ self.definitions[key] = value
+
+ def add_definition_from_env(self, key, default=None):
+ value = env.get(key, default)
+ if value:
+ self.add_definition(key, value)
+
+ def set_build_type(self, buildType):
+ if buildType not in set(["Release", "Debug", "RelWithDebInfo"]):
+ raise ValueError("CMAKE_BUILD_TYPE must be in (Release, Debug, RelWithDebInfo)")
+ self.add_definition("CMAKE_BUILD_TYPE", buildType)
+
+ def set_generator(self, gen):
+ if gen not in set(["Default", "Ninja"]):
+ raise ValueError("valid generators must be in (default, ninja)")
+ if gen == "Default":
+ gen = None
+ self.build = ["make"]
+ if self.verbose:
+ self.build.append("VERBOSE=1")
+ elif gen == "Ninja":
+ self.ninja = self.env.get("NINJA_COMMAND", None) or which("ninja", self.env)
+ if not self.ninja:
+ raise RuntimeError("cannot find `ninja` command, "
+ "please populate NINJA_COMMAND environment variable")
+ self.build = [self.ninja]
+ if self.verbose:
+ self.build.append("-v")
+ self.generator = gen
+
+ def __call__(self, directory):
+ configure = copy(self.configure)
+ if self.generator:
+ configure.append("-G{0}".format(self.generator))
+ for k, v in self.definitions.iteritems():
+ configure.append("-D{0}={1}".format(k, v))
+ configure.append(directory)
+ print("Configuring with command `{0}`".format(" ".join(configure)), file=sys.stderr)
+ retcode = Popen(configure, cwd=buildDir, env=env).wait()
+ if retcode != 0:
+ raise RuntimeError("failed to configure the project")
+ print("Building with command `{0}`".format(" ".join(self.build)), file=sys.stderr)
+ retcode = Popen(self.build, cwd=buildDir).wait()
+ if retcode != 0:
+ raise RuntimeError("failed to build the project")
+
+
+# build the library if we haven't already
if not os.path.exists(os.path.join(swigLib, modName)):
buildDir = mkdtemp()
try:
env = os.environ.copy()
- cmake = env.get("CMAKE_COMMAND", "cmake")
- boost = env.get("Boost_INCLUDE_DIRS", None)
- pyinc = env.get("PYTHON_INCLUDE_DIRS", None)
- rpath = env.get("CMAKE_SKIP_RPATH", None)
- swig = env.get("SWIG_COMMAND", None)
- cmds = [cmake,
- "-DCMAKE_BUILD_TYPE=RelWithDebInfo",
- "-DPYTHON_SWIG=1",
- "-DPYTHON_EXECUTABLE={0}".format(sys.executable)]
- if boost is not None:
- cmds.append("-DBoost_INCLUDE_DIRS={0}".format(boost))
- if pyinc is not None:
- cmds.append("-DPYTHON_INCLUDE_DIRS={0}".format(pyinc))
- if rpath is not None:
- cmds.append("-DCMAKE_SKIP_RPATH=TRUE")
- if swig is not None:
- cmds.append("-DSWIG_COMMAND={0}".format(swig))
- cmds.append(thisDir)
- print("Running command {0}".format(" ".join(cmds)), file=sys.stderr)
- retcode = Popen(cmds, cwd=buildDir, env=env).wait()
- if (retcode != 0):
- raise RuntimeError("failed to configure the ConsensusCore2 with CMake!")
- verbose = env.get("VERBOSE", None)
- cmds = ["make"]
- if verbose is not None:
- cmds.append("VERBOSE=1")
- print("Running command {0}".format(" ".join(cmds)), file=sys.stderr)
- retcode = Popen(cmds, cwd=buildDir).wait()
- if (retcode != 0):
- raise RuntimeError("failed to compile or link ConsensusCore2!")
+ cmake = CMake(env)
+ cmake.add_definition_from_env("Boost_INCLUDE_DIRS")
+ cmake.add_definition_from_env("PYTHON_INCLUDE_DIRS")
+ cmake.add_definition_from_env("SWIG_COMMAND")
+ cmake.add_definition("PYTHON_SWIG", "1")
+ cmake.add_definition("PYTHON_EXECUTABLE", sys.executable)
+ cmake.set_build_type("RelWithDebInfo")
+ cmake.set_generator("Ninja" if which("ninja", env) else "Default")
+ cmake(thisDir)
finally:
rmtree(buildDir)
diff --git a/src/Coverage.cpp b/src/Coverage.cpp
index f0057b8..eefdf74 100644
--- a/src/Coverage.cpp
+++ b/src/Coverage.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/Evaluator.cpp b/src/Evaluator.cpp
index a3508a1..98e8d59 100644
--- a/src/Evaluator.cpp
+++ b/src/Evaluator.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cmath>
#include <memory>
@@ -64,7 +98,6 @@ Evaluator& Evaluator::operator=(Evaluator&& eval)
}
Evaluator::~Evaluator() {}
-
size_t Evaluator::Length() const
{
if (impl_) return impl_->recursor_->tpl_->Length();
@@ -78,7 +111,6 @@ StrandEnum Evaluator::Strand() const
}
Evaluator::operator bool() const { return state_ == EvaluatorState::VALID; }
-
std::string Evaluator::ReadName() const
{
if (impl_) {
@@ -129,7 +161,6 @@ bool Evaluator::ApplyMutations(std::vector<Mutation>* muts)
}
EvaluatorState Evaluator::Status() const { return state_; }
-
void Evaluator::Release()
{
state_ = EvaluatorState::DISABLED;
@@ -137,7 +168,8 @@ void Evaluator::Release()
}
// TODO: this is no longer a simple "invariants check" function---a CheckInvariants function
-// should be const and only do asserts on whether the state is valid. Rename or refactor this. The state in this class
+// should be const and only do asserts on whether the state is valid. Rename or refactor this. The
+// state in this class
// is a bit too complex for my taste.
void Evaluator::CheckInvariants()
{
diff --git a/src/EvaluatorImpl.cpp b/src/EvaluatorImpl.cpp
index a5ea083..e93aaf8 100644
--- a/src/EvaluatorImpl.cpp
+++ b/src/EvaluatorImpl.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cmath>
@@ -59,11 +93,7 @@ EvaluatorImpl::EvaluatorImpl(std::unique_ptr<AbstractTemplate>&& tpl, const Mapp
recursor_->FillAlphaBeta(alpha_, beta_);
}
-std::string EvaluatorImpl::ReadName() const
-{
- return recursor_->read_.Name;
-}
-
+std::string EvaluatorImpl::ReadName() const { return recursor_->read_.Name; }
double EvaluatorImpl::LL(const Mutation& mut_)
{
// apply the virtual mutation
diff --git a/src/EvaluatorImpl.h b/src/EvaluatorImpl.h
index 394a01f..e64c332 100644
--- a/src/EvaluatorImpl.h
+++ b/src/EvaluatorImpl.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -38,9 +72,10 @@ private:
void Recalculate();
private:
- std::unique_ptr<AbstractRecursor> recursor_; // TODO: does this need to be a pointer? is it always non-null?
- // are we making it a UP just so we can do a fwd decl and still have
- // RAII semantics?
+ std::unique_ptr<AbstractRecursor>
+ recursor_; // TODO: does this need to be a pointer? is it always non-null?
+ // are we making it a UP just so we can do a fwd decl and still have
+ // RAII semantics?
ScaledMatrix alpha_;
ScaledMatrix beta_;
ScaledMatrix extendBuffer_;
diff --git a/src/Integrator.cpp b/src/Integrator.cpp
index b46c614..6a5be4e 100644
--- a/src/Integrator.cpp
+++ b/src/Integrator.cpp
@@ -1,6 +1,41 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cassert>
#include <cmath>
+#include <limits>
#include <utility>
#include <pacbio/consensus/Integrator.h>
@@ -11,6 +46,8 @@
namespace PacBio {
namespace Consensus {
+constexpr double NaN = -std::numeric_limits<double>::quiet_NaN();
+
std::set<std::string> SupportedChemistries() { return ModelFactory::SupportedChemistries(); }
IntegratorConfig::IntegratorConfig(const double minZScore, const double scoreDiff)
: MinZScore{minZScore}, ScoreDiff{scoreDiff}
@@ -80,6 +117,10 @@ std::vector<double> AbstractIntegrator::LLs(const Mutation& fwdMut)
lls.push_back(eval.LL(fwdMut));
else if (eval.Strand() == StrandEnum::REVERSE)
lls.push_back(eval.LL(revMut));
+ else {
+ // inactive reads get a strand of "unmapped"
+ lls.push_back(NaN);
+ }
}
return lls;
}
@@ -88,7 +129,10 @@ std::vector<double> AbstractIntegrator::LLs() const
{
std::vector<double> lls;
for (auto& eval : evals_) {
- lls.push_back(eval.LL());
+ if (eval)
+ lls.push_back(eval.LL());
+ else
+ lls.push_back(NaN);
}
return lls;
}
@@ -132,6 +176,20 @@ std::vector<double> AbstractIntegrator::ZScores() const
}
return results;
}
+std::vector<std::pair<double, double>> AbstractIntegrator::NormalParameters() const
+{
+ std::vector<std::pair<double, double>> results;
+ results.reserve(evals_.size());
+ for (const auto& eval : evals_) {
+ if (eval) {
+ results.emplace_back(eval.NormalParameters());
+ } else {
+ auto nan = std::numeric_limits<double>::quiet_NaN();
+ results.emplace_back(std::pair<double, double>(nan, nan));
+ }
+ }
+ return results;
+}
Mutation AbstractIntegrator::ReverseComplement(const Mutation& mut) const
{
@@ -171,9 +229,9 @@ AddReadResult MonoMolecularIntegrator::AddRead(const MappedRead& read)
else if (read.Strand == StrandEnum::REVERSE)
return AbstractIntegrator::AddRead(
- std::unique_ptr<AbstractTemplate>(
- new VirtualTemplate(revTpl_, TemplateLength() - read.TemplateEnd,
- TemplateLength() - read.TemplateStart, read.PinEnd, read.PinStart)),
+ std::unique_ptr<AbstractTemplate>(new VirtualTemplate(
+ revTpl_, TemplateLength() - read.TemplateEnd, TemplateLength() - read.TemplateStart,
+ read.PinEnd, read.PinStart)),
read);
throw std::invalid_argument("read is unmapped!");
diff --git a/src/ModelConfig.cpp b/src/ModelConfig.cpp
index 186c6ae..c5f74a0 100644
--- a/src/ModelConfig.cpp
+++ b/src/ModelConfig.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cassert>
#include <cstdint>
diff --git a/src/ModelFactory.cpp b/src/ModelFactory.cpp
index 71bedde..40ebd68 100644
--- a/src/ModelFactory.cpp
+++ b/src/ModelFactory.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <stdexcept>
#include <utility>
diff --git a/src/ModelFactory.h b/src/ModelFactory.h
index f404d55..3ec3656 100644
--- a/src/ModelFactory.h
+++ b/src/ModelFactory.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/src/Mutation.cpp b/src/Mutation.cpp
index 5675f07..d29daf7 100644
--- a/src/Mutation.cpp
+++ b/src/Mutation.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <algorithm>
#include <cassert>
diff --git a/src/Polish.cpp b/src/Polish.cpp
index 6e4f46a..3a5ec5b 100644
--- a/src/Polish.cpp
+++ b/src/Polish.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <algorithm>
#include <cmath>
@@ -15,6 +49,8 @@
#include <pacbio/consensus/Integrator.h>
#include <pacbio/consensus/Polish.h>
+using namespace std;
+
namespace PacBio {
namespace Consensus {
@@ -26,7 +62,7 @@ PolishConfig::PolishConfig(const size_t iterations, const size_t separation,
{
}
-void Mutations(std::vector<Mutation>* muts, const AbstractIntegrator& ai, const size_t start,
+void Mutations(vector<Mutation>* muts, const AbstractIntegrator& ai, const size_t start,
const size_t end)
{
constexpr auto bases = "ACGT";
@@ -65,27 +101,27 @@ void Mutations(std::vector<Mutation>* muts, const AbstractIntegrator& ai, const
if (bases[j] != last) muts->emplace_back(Mutation(MutationType::INSERTION, end, bases[j]));
}
-std::vector<Mutation> Mutations(const AbstractIntegrator& ai, const size_t start, const size_t end)
+vector<Mutation> Mutations(const AbstractIntegrator& ai, const size_t start, const size_t end)
{
- std::vector<Mutation> muts;
+ vector<Mutation> muts;
Mutations(&muts, ai, start, end);
return muts;
}
-std::vector<Mutation> Mutations(const AbstractIntegrator& ai)
+vector<Mutation> Mutations(const AbstractIntegrator& ai)
{
return Mutations(ai, 0, ai.TemplateLength());
}
-std::vector<Mutation> BestMutations(std::list<ScoredMutation>* scoredMuts, const size_t separation)
+vector<Mutation> BestMutations(list<ScoredMutation>* scoredMuts, const size_t separation)
{
- std::vector<Mutation> result;
+ vector<Mutation> result;
// TODO handle 0-separation correctly
- if (separation == 0) throw std::invalid_argument("nonzero separation required");
+ if (separation == 0) throw invalid_argument("nonzero separation required");
while (!scoredMuts->empty()) {
- const auto& mut = *std::max_element(scoredMuts->begin(), scoredMuts->end(),
+ const auto& mut = *max_element(scoredMuts->begin(), scoredMuts->end(),
ScoredMutation::ScoreComparer);
result.emplace_back(mut);
@@ -100,24 +136,24 @@ std::vector<Mutation> BestMutations(std::list<ScoredMutation>* scoredMuts, const
return result;
}
-std::vector<Mutation> NearbyMutations(std::vector<Mutation>* applied,
- std::vector<Mutation>* centers, const AbstractIntegrator& ai,
+vector<Mutation> NearbyMutations(vector<Mutation>* applied,
+ vector<Mutation>* centers, const AbstractIntegrator& ai,
const size_t neighborhood)
{
const size_t len = ai.TemplateLength();
- const auto clamp = [len](const int i) { return std::max(0, std::min<int>(len, i)); };
+ const auto clamp = [len](const int i) { return max(0, min<int>(len, i)); };
- std::vector<Mutation> result;
+ vector<Mutation> result;
if (centers->empty()) return result;
- std::sort(applied->begin(), applied->end(), Mutation::SiteComparer);
- std::sort(centers->begin(), centers->end(), Mutation::SiteComparer);
+ sort(applied->begin(), applied->end(), Mutation::SiteComparer);
+ sort(centers->begin(), centers->end(), Mutation::SiteComparer);
const auto mutRange = [clamp, neighborhood](const Mutation& mut, const int diff) {
const int start = diff + mut.Start() - neighborhood;
const int end = diff + mut.End() + neighborhood;
- return std::pair<size_t, size_t>(clamp(start), clamp(end));
+ return pair<size_t, size_t>(clamp(start), clamp(end));
};
// find the ranges
@@ -128,7 +164,7 @@ std::vector<Mutation> NearbyMutations(std::vector<Mutation>* applied,
for (; ait != applied->cend() && ait->End() <= cit->Start(); ++ait)
lengthDiff += ait->LengthDiff();
- std::vector<std::pair<size_t, size_t>> ranges = {mutRange(*cit, lengthDiff)};
+ vector<pair<size_t, size_t>> ranges = {mutRange(*cit, lengthDiff)};
size_t currEnd = ranges.back().second;
// increment to the next centerpoint and continue
@@ -138,13 +174,13 @@ std::vector<Mutation> NearbyMutations(std::vector<Mutation>* applied,
for (; ait != applied->cend() && ait->End() <= cit->Start(); ++ait)
lengthDiff += ait->LengthDiff();
- std::tie(nextStart, nextEnd) = mutRange(*cit, lengthDiff);
+ tie(nextStart, nextEnd) = mutRange(*cit, lengthDiff);
// if the next range touches the last one, just extend the last one
if (nextStart <= currEnd)
ranges.back().second = nextEnd;
else {
- ranges.emplace_back(std::make_pair(nextStart, nextEnd));
+ ranges.emplace_back(make_pair(nextStart, nextEnd));
currEnd = nextEnd;
}
}
@@ -155,12 +191,12 @@ std::vector<Mutation> NearbyMutations(std::vector<Mutation>* applied,
return result;
}
-std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg)
+tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConfig& cfg)
{
- std::vector<Mutation> muts = Mutations(*ai);
- std::hash<std::string> hashFn;
+ vector<Mutation> muts = Mutations(*ai);
+ hash<string> hashFn;
size_t oldTpl = hashFn(*ai);
- std::set<size_t> history = {oldTpl};
+ set<size_t> history = {oldTpl};
size_t nTested = 0;
size_t nApplied = 0;
@@ -168,7 +204,7 @@ std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConf
// find the best mutations given our parameters
{
const double LL = ai->LL();
- std::list<ScoredMutation> scoredMuts;
+ list<ScoredMutation> scoredMuts;
for (const auto& mut : muts) {
const double ll = ai->LL(mut);
@@ -181,18 +217,25 @@ std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConf
}
// convergence!!
- if (muts.empty()) return std::make_tuple(true, nTested, nApplied);
+ if (muts.empty()) return make_tuple(true, nTested, nApplied);
const size_t newTpl = hashFn(ApplyMutations(*ai, &muts));
if (history.find(newTpl) != history.end()) {
- // cyclic behavior detected! apply just the single best mutation
+ /* Cyclic behavior guard - Dave A. found some edge cases where the
+ template was mutating back to an earlier version. This is a bad
+ and should be rare. He found that by applying the single best
+ mutation you could avoid the loop. (That is if adding Muts X + Y
+ made removing muts X + Y beneficial, then you can break that
+ inifinite loop by just applying X or Y, as presumably this removes
+ the interaction between them that leads to the cycling behavior.
+ This step is just a heuristic work around that was found. */
ai->ApplyMutation(muts.front());
oldTpl = hashFn(*ai);
++nApplied;
// get the mutations for the next round
- std::vector<Mutation> applied = {muts.front()};
+ vector<Mutation> applied = {muts.front()};
muts = NearbyMutations(&applied, &muts, *ai, cfg.MutationNeighborhood);
} else {
ai->ApplyMutations(&muts);
@@ -207,22 +250,31 @@ std::tuple<bool, size_t, size_t> Polish(AbstractIntegrator* ai, const PolishConf
history.insert(oldTpl);
}
- return std::make_tuple(false, nTested, nApplied);
+ return make_tuple(false, nTested, nApplied);
}
+namespace { // anonymous
+
int ProbabilityToQV(double probability)
{
if (probability < 0.0 || probability > 1.0)
- throw std::invalid_argument("invalid value: probability not in [0,1]");
+ throw invalid_argument("invalid value: probability not in [0,1]");
else if (probability == 0.0)
- probability = std::numeric_limits<double>::min();
+ probability = numeric_limits<double>::min();
return static_cast<int>(round(-10.0 * log10(probability)));
}
-std::vector<int> ConsensusQVs(AbstractIntegrator& ai)
+inline int ScoreSumToQV(const double scoreSum)
+{
+ return ProbabilityToQV(1.0 - 1.0 / (1.0 + scoreSum));
+}
+} // anonymous namespace
+
+vector<int> ConsensusQualities(AbstractIntegrator& ai)
{
- std::vector<int> result;
+ vector<int> quals;
+ quals.reserve(ai.TemplateLength());
const double LL = ai.LL();
for (size_t i = 0; i < ai.TemplateLength(); ++i) {
double scoreSum = 0.0;
@@ -231,12 +283,49 @@ std::vector<int> ConsensusQVs(AbstractIntegrator& ai)
if (m.Start() > i) continue;
// TODO (lhepler): this is dumb, but untestable mutations,
// aka insertions at ends, cause all sorts of weird issues
- double score = ai.LL(m) - LL;
- if (score < 0.0) scoreSum += exp(score);
+ const double score = ai.LL(m) - LL;
+ // this really should never happen
+ if (score >= 0.0) continue;
+ scoreSum += exp(score);
}
- result.push_back(ProbabilityToQV(1.0 - 1.0 / (1.0 + scoreSum)));
+ quals.emplace_back(ScoreSumToQV(scoreSum));
}
- return result;
+ return quals;
+}
+
+QualityValues ConsensusQVs(AbstractIntegrator& ai)
+{
+ const size_t len = ai.TemplateLength();
+ vector<int> quals, delQVs, insQVs, subQVs;
+ quals.reserve(len);
+ delQVs.reserve(len);
+ insQVs.reserve(len);
+ subQVs.reserve(len);
+ const double LL = ai.LL();
+ for (size_t i = 0; i < len; ++i) {
+ double qualScoreSum = 0.0, delScoreSum = 0.0, insScoreSum = 0.0, subScoreSum = 0.0;
+ for (const auto& m : Mutations(ai, i, i + 1)) {
+ // skip mutations that start beyond the current site (e.g. trailing insertions)
+ if (m.Start() > i) continue;
+ const double score = ai.LL(m) - LL;
+ // this really should never happen
+ if (score >= 0.0) continue;
+ const double expScore = exp(score);
+ qualScoreSum += expScore;
+ if (m.IsDeletion())
+ delScoreSum += expScore;
+ else if (m.IsInsertion())
+ insScoreSum += expScore;
+ else if (m.IsSubstitution())
+ subScoreSum += expScore;
+ }
+ quals.emplace_back(ScoreSumToQV(qualScoreSum));
+ delQVs.emplace_back(ScoreSumToQV(delScoreSum));
+ insQVs.emplace_back(ScoreSumToQV(insScoreSum));
+ subQVs.emplace_back(ScoreSumToQV(subScoreSum));
+ }
+ // TODO(lhepler): discuss InsQV being len + 1 to capture trailing insertion
+ return QualityValues{move(quals), move(delQVs), move(insQVs), move(subQVs)};
}
} // namespace Consensus
diff --git a/src/Read.cpp b/src/Read.cpp
index d8718e6..5f8415f 100644
--- a/src/Read.cpp
+++ b/src/Read.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cassert>
@@ -13,6 +47,21 @@ SNR::SNR(const std::vector<double>& snrs) : A(snrs[0]), C(snrs[1]), G(snrs[2]),
assert(snrs.size() == 4);
}
+namespace {
+ double clamp(double val, double lo, double hi)
+ {
+ return std::min(std::max(val, lo), hi);
+ }
+} // namespace
+
+SNR ClampSNR(const SNR& val, const SNR& lo, const SNR& hi)
+{
+ return SNR(clamp(val.A, lo.A, hi.A),
+ clamp(val.C, lo.C, hi.C),
+ clamp(val.G, lo.G, hi.G),
+ clamp(val.T, lo.T, hi.T));
+}
+
Read::Read(const std::string& name, const std::string& seq, const std::vector<uint8_t>& ipd,
const std::vector<uint8_t>& pw, const SNR& snr, const std::string& model)
: Name{name}, Seq{seq}, IPD{ipd}, PulseWidth{pw}, SignalToNoise{snr}, Model{model}
diff --git a/src/Recursor.cpp b/src/Recursor.cpp
index 2ed0419..dcd47a6 100644
--- a/src/Recursor.cpp
+++ b/src/Recursor.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include "Recursor.h"
diff --git a/src/Recursor.h b/src/Recursor.h
index 2cb8a20..63fe26d 100644
--- a/src/Recursor.h
+++ b/src/Recursor.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -189,7 +189,7 @@ void Recursor<Derived>::FillAlpha(const M& guide, M& alpha) const
// Initial condition, we always start with a match
alpha.StartEditingColumn(0, 0, 1);
alpha.Set(0, 0, 1.0);
- alpha.FinishEditingColumn(0, 0, 1);
+ alpha.FinishEditingColumn<false>(0, 0, 1);
// End initial conditions
size_t hintBeginRow = 1, hintEndRow = 1;
@@ -281,7 +281,7 @@ void Recursor<Derived>::FillAlpha(const M& guide, M& alpha) const
hintBeginRow = i;
// Don't rescale until we finish updating the hint.
- alpha.FinishEditingColumn(j, beginRow, endRow);
+ alpha.FinishEditingColumn<true>(j, beginRow, endRow, maxScore);
}
/* Now fill out the probability in the last pinned position.
@@ -297,7 +297,7 @@ void Recursor<Derived>::FillAlpha(const M& guide, M& alpha) const
Derived::EmissionPr(MoveType::MATCH, emissions_[I - 1], prevTplBase, currTplBase);
alpha.StartEditingColumn(J, I, I + 1);
alpha.Set(I, J, likelihood);
- alpha.FinishEditingColumn(J, I, I + 1);
+ alpha.FinishEditingColumn<false>(J, I, I + 1);
}
}
@@ -313,7 +313,7 @@ void Recursor<Derived>::FillBeta(const M& guide, M& beta) const
// Setup initial condition, at the end we are one
beta.StartEditingColumn(J, I, I + 1);
beta.Set(I, J, 1.0);
- beta.FinishEditingColumn(J, I, I + 1);
+ beta.FinishEditingColumn<false>(J, I, I + 1);
// Totally arbitray decision here...
size_t hintBeginRow = I, hintEndRow = I;
@@ -393,7 +393,7 @@ void Recursor<Derived>::FillBeta(const M& guide, M& beta) const
hintEndRow = i;
// Don't rescale until we update the hints
- beta.FinishEditingColumn(j, beginRow, endRow);
+ beta.FinishEditingColumn<true>(j, beginRow, endRow, maxScore);
}
/* Now to fill the top row which must be a match
@@ -404,7 +404,7 @@ void Recursor<Derived>::FillBeta(const M& guide, M& beta) const
auto match_emission_prob =
Derived::EmissionPr(MoveType::MATCH, emissions_[0], kDefaultBase, (*tpl_)[0].Idx);
beta.Set(0, 0, match_emission_prob * beta(1, 1));
- beta.FinishEditingColumn(0, 0, 1);
+ beta.FinishEditingColumn<false>(0, 0, 1);
}
}
@@ -499,7 +499,7 @@ void Recursor<Derived>::ExtendAlpha(const M& alpha, size_t beginColumn, M& ext,
int i;
double score = 0.0;
-
+ double max_score = score;
// Grab values that will be useful for the whole column
auto currTplParams = (*tpl_)[j - 1];
auto currTplBase = currTplParams.Idx;
@@ -554,9 +554,10 @@ void Recursor<Derived>::ExtendAlpha(const M& alpha, size_t beginColumn, M& ext,
score = Combine(score, thisMoveScore);
}
ext.Set(i, extCol, score);
+ if (score > max_score) max_score = score;
}
assert(i == endRow);
- ext.FinishEditingColumn(extCol, beginRow, endRow);
+ ext.FinishEditingColumn<true>(extCol, beginRow, endRow, max_score);
}
}
@@ -628,6 +629,7 @@ void Recursor<Derived>::ExtendBeta(const M& beta, size_t lastColumn, M& ext, int
TemplatePosition currTplParams = kDefaultTplPos;
if (jp > 0) currTplParams = (*tpl_)[jp - 1];
+ double max_score = 0.0;
for (int i = endRow - 1; i >= beginRow; i--) {
uint8_t nextReadEm = 4; // 'N'
@@ -680,8 +682,9 @@ void Recursor<Derived>::ExtendBeta(const M& beta, size_t lastColumn, M& ext, int
}
ext.Set(i, extCol, score);
+ if (score > max_score) max_score = score;
}
- ext.FinishEditingColumn(extCol, beginRow, endRow);
+ ext.FinishEditingColumn<true>(extCol, beginRow, endRow, max_score);
}
// fill out the (0, 0) entry of the matrix
@@ -691,7 +694,7 @@ void Recursor<Derived>::ExtendBeta(const M& beta, size_t lastColumn, M& ext, int
const double match_emission_prob =
Derived::EmissionPr(MoveType::MATCH, emissions_[0], kDefaultBase, (*tpl_)[0].Idx);
ext.Set(0, 0, match_trans_prob * match_emission_prob);
- ext.FinishEditingColumn(0, 0, 1);
+ ext.FinishEditingColumn<false>(0, 0, 1);
}
}
diff --git a/src/Sequence.cpp b/src/Sequence.cpp
index 514743c..0c0d2a7 100644
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <stdexcept>
#include <string>
diff --git a/src/Template.cpp b/src/Template.cpp
index f90f2a0..a457b0f 100644
--- a/src/Template.cpp
+++ b/src/Template.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cassert>
#include <cmath>
@@ -133,28 +167,22 @@ size_t AbstractTemplate::TrueLength() const { return end_ - start_; }
*/
std::pair<double, double> AbstractTemplate::SiteNormalParameters(const size_t i) const
{
- // TODO (ndelaney): This probably needs to be matched to the specific model being used.
- // std::log(1.0/3);
- constexpr double lgThird = -1.0986122886681098;
- const uint8_t prev = (i == 0) ? 0 : (*this)[i - 1].Idx; // default base : A
const auto params = (*this)[i];
-
- // get the substitution rate here:
- const double eps = SubstitutionRate(prev, params.Idx);
-
+ // TODO: This is a bit unsafe, we should understand context and have this conversion in one
+ // place (or really just move directly to using .Idx without bit shifting
+ const uint8_t prev = (i == 0) ? 0 : (*this)[i - 1].Idx; // default base : A
+ const uint8_t curr = params.Idx;
+
const double p_m = params.Match, l_m = std::log(p_m), l2_m = l_m * l_m;
const double p_d = params.Deletion, l_d = std::log(p_d), l2_d = l_d * l_d;
const double p_b = params.Branch, l_b = std::log(p_b), l2_b = l_b * l_b;
const double p_s = params.Stick, l_s = std::log(p_s), l2_s = l_s * l_s;
-
- const double lgeps = std::log(eps);
- const double lg1minusEps = std::log(1.0 - eps);
-
+
// First moment expectations (zero terms used for clarity)
- const double E_M = (1.0 - eps) * lg1minusEps + eps * (lgThird + lgeps);
+ const double E_M = ExpectedLogLikelihoodForMatchEmission(prev, curr, false);
const double E_D = 0.0;
- const double E_B = 0.0;
- const double E_S = lgThird;
+ const double E_B = ExpectedLogLikelihoodForBranchEmission(prev, curr, false);
+ const double E_S = ExpectedLogLikelihoodForStickEmission(prev, curr, false);
// Calculate first moment
const double E_MD = (l_m + E_M) * p_m / (p_m + p_d) + (l_d + E_D) * p_d / (p_m + p_d);
@@ -164,12 +192,13 @@ std::pair<double, double> AbstractTemplate::SiteNormalParameters(const size_t i)
// Calculate second momment
// Key expansion used repeatedly here: (A + B)^2 = A^2 + 2AB + B^2
- const double E2_M = (1.0 - eps) * pow(lg1minusEps, 2.0) + eps * pow(lgThird + lgeps, 2.0);
+ const double E2_M = ExpectedLogLikelihoodForMatchEmission(prev, curr, true);
+ const double E2_S = ExpectedLogLikelihoodForStickEmission(prev, curr, true);
+ const double E2_B = ExpectedLogLikelihoodForBranchEmission(prev, curr, true);
const double E2_MD =
(l2_m + 2 * l_m * E_M + E2_M) * p_m / (p_m + p_d) + l2_d * p_d / (p_m + p_d);
- const double E2_S = pow(lgThird, 2.0);
const double E2_I =
- l2_b * p_b / (p_b + p_s) + (l2_s + 2 * E_S * l_s + E2_S) * p_s / (p_b + p_s);
+ (l2_b + 2 * E_B * l_b + E2_B) * p_b / (p_b + p_s) + (l2_s + 2 * E_S * l_s + E2_S) * p_s / (p_b + p_s);
const double E2_BS = E2_I * (p_s + p_b) / (p_m + p_d);
const double moment2 = E2_BS + 2 * E_BS * E_MD + E2_MD;
const double var = moment2 - mean * mean;
diff --git a/src/align/AffineAlignment.cpp b/src/align/AffineAlignment.cpp
index f390e22..e3e9f60 100644
--- a/src/align/AffineAlignment.cpp
+++ b/src/align/AffineAlignment.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/align/AlignConfig.cpp b/src/align/AlignConfig.cpp
index da35c4b..47b4222 100644
--- a/src/align/AlignConfig.cpp
+++ b/src/align/AlignConfig.cpp
@@ -1,10 +1,4 @@
-/*
-
- *
- * Created on: Feb 12, 2015
- * Author: dalexander
- */
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/align/LinearAlignment.cpp b/src/align/LinearAlignment.cpp
index d0750b5..b9568ff 100644
--- a/src/align/LinearAlignment.cpp
+++ b/src/align/LinearAlignment.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/align/PairwiseAlignment.cpp b/src/align/PairwiseAlignment.cpp
index ceaf665..cbb2ad6 100644
--- a/src/align/PairwiseAlignment.cpp
+++ b/src/align/PairwiseAlignment.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/matrix/ScaledMatrix.cpp b/src/matrix/ScaledMatrix.cpp
index b3362b2..159575c 100644
--- a/src/matrix/ScaledMatrix.cpp
+++ b/src/matrix/ScaledMatrix.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <algorithm>
#include <cmath>
diff --git a/src/matrix/ScaledMatrix.h b/src/matrix/ScaledMatrix.h
index bea0f46..1f1c1ca 100644
--- a/src/matrix/ScaledMatrix.h
+++ b/src/matrix/ScaledMatrix.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
@@ -34,7 +68,8 @@ public: // nullability
static const ScaledMatrix& Null();
public: // information about entries filled by column
- void FinishEditingColumn(int j, int usedBegin, int usedEnd);
+ template <bool maxProvided>
+ void FinishEditingColumn(int j, int usedBegin, int usedEnd, double max_val = 0.0);
public: // Scaling and normalization
double GetLogScale(int j) const;
@@ -54,12 +89,16 @@ inline const ScaledMatrix& ScaledMatrix::Null()
return *nullObj;
}
-inline void ScaledMatrix::FinishEditingColumn(const int j, const int usedBegin, const int usedEnd)
+template <bool maxProvided>
+inline void ScaledMatrix::FinishEditingColumn(const int j, const int usedBegin, const int usedEnd,
+ double max_val)
{
// get the constant to scale by
- double c = 0.0;
- for (int i = usedBegin; i < usedEnd; ++i) {
- c = std::max(c, SparseMatrix::Get(i, j));
+ if (!maxProvided) {
+ max_val = 0.0;
+ for (int i = usedBegin; i < usedEnd; ++i) {
+ max_val = std::max(max_val, SparseMatrix::Get(i, j));
+ }
}
// cumsum stuff
@@ -70,11 +109,11 @@ inline void ScaledMatrix::FinishEditingColumn(const int j, const int usedBegin,
last = logScalars_[j + 1];
// set it
- if (c != 0.0 && c != 1.0) {
+ if (max_val != 0.0 && max_val != 1.0) {
for (int i = usedBegin; i < usedEnd; ++i) {
- SparseMatrix::Set(i, j, SparseMatrix::Get(i, j) / c);
+ SparseMatrix::Set(i, j, SparseMatrix::Get(i, j) / max_val);
}
- logScalars_[j] = last + std::log(c);
+ logScalars_[j] = last + std::log(max_val);
} else {
logScalars_[j] = last;
}
diff --git a/src/matrix/SparseMatrix.cpp b/src/matrix/SparseMatrix.cpp
index b052a74..65e3d79 100644
--- a/src/matrix/SparseMatrix.cpp
+++ b/src/matrix/SparseMatrix.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/matrix/SparseMatrix.h b/src/matrix/SparseMatrix.h
index f31eb16..14a5b07 100644
--- a/src/matrix/SparseMatrix.h
+++ b/src/matrix/SparseMatrix.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/matrix/SparseVector.h b/src/matrix/SparseVector.h
index 83c4f9c..6b7442a 100644
--- a/src/matrix/SparseVector.h
+++ b/src/matrix/SparseVector.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/models/P6C4NoCovModel.cpp b/src/models/P6C4NoCovModel.cpp
index c826665..9a8db3b 100644
--- a/src/models/P6C4NoCovModel.cpp
+++ b/src/models/P6C4NoCovModel.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <cassert>
#include <cmath>
@@ -27,8 +61,9 @@ public:
std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
const MappedRead& mr, double scoreDiff) const;
std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double SubstitutionRate(uint8_t prev, uint8_t curr) const;
-
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const;
private:
SNR snr_;
};
@@ -84,7 +119,14 @@ double P6C4NoCovParams[4][2][3][4] = {
{4.21031404956015, -0.347546363361823, 0.0293839179303896, -0.000893802212450644},
{2.33143889851302, -0.586068444099136, 0.040044954697795, -0.000957298861394191}}}};
-P6C4NoCovModel::P6C4NoCovModel(const SNR& snr) : snr_(snr) {}
+
+// For P6-C4 we cap SNR at 20.0 (19.0 for C); as the training set only went that
+// high; extrapolation beyond this cap goes haywire because of the higher-order
+// terms in the regression model. See bug 31423.
+P6C4NoCovModel::P6C4NoCovModel(const SNR& snr)
+ : snr_(ClampSNR(snr, SNR(0,0,0,0), SNR(20,19,20,20)))
+{}
+
std::vector<TemplatePosition> P6C4NoCovModel::Populate(const std::string& tpl) const
{
std::vector<TemplatePosition> result;
@@ -137,7 +179,33 @@ std::unique_ptr<AbstractRecursor> P6C4NoCovModel::CreateRecursor(
new P6C4NoCovRecursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
}
-double P6C4NoCovModel::SubstitutionRate(uint8_t prev, uint8_t curr) const { return kEps; }
+
+double P6C4NoCovModel::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ double probMatch = 1.0 - kEps;
+ const double lgThird = -std::log(3.0);
+ const double lgMatch = std::log(probMatch);
+ const double probMismatch = kEps;
+ const double lgMismatch = lgThird + std::log(probMismatch);
+ if (!secondMoment) {
+ return probMatch * lgMatch + probMismatch * lgMismatch;
+ } else {
+ return probMatch * pow(lgMatch, 2.0) + probMismatch * pow(lgMismatch, 2.0);
+ }
+}
+
+double P6C4NoCovModel::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ return 0.0;
+}
+
+double P6C4NoCovModel::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr, bool secondMoment) const {
+ const double lgThird = -std::log(3.0);
+ if(!secondMoment) {
+ return lgThird;
+ } else {
+ return pow(lgThird, 2.0);
+ }
+}
+
P6C4NoCovRecursor::P6C4NoCovRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
double scoreDiff)
: Recursor<P6C4NoCovRecursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
diff --git a/src/models/SP1C1BetaNoCovModel.cpp b/src/models/SP1C1BetaNoCovModel.cpp
deleted file mode 100644
index b48dbe0..0000000
--- a/src/models/SP1C1BetaNoCovModel.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-
-#include <cassert>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-
-#include <pacbio/consensus/ModelConfig.h>
-#include <pacbio/consensus/Read.h>
-
-#include "../ModelFactory.h"
-#include "../Recursor.h"
-
-namespace PacBio {
-namespace Consensus {
-namespace {
-
-class SP1C1BetaNoCovModel : public ModelConfig
-{
- REGISTER_MODEL(SP1C1BetaNoCovModel);
-
-public:
- static std::set<std::string> Names() { return {"S/P1-C1/beta"}; }
- SP1C1BetaNoCovModel(const SNR& snr);
- std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
- const MappedRead& mr, double scoreDiff) const;
- std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double SubstitutionRate(uint8_t prev, uint8_t curr) const;
-
-private:
- SNR snr_;
-};
-
-REGISTER_MODEL_IMPL(SP1C1BetaNoCovModel);
-
-class SP1C1BetaNoCovRecursor : public Recursor<SP1C1BetaNoCovRecursor>
-{
-public:
- SP1C1BetaNoCovRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
- double scoreDiff);
- static inline std::vector<uint8_t> EncodeRead(const MappedRead& read);
- static inline double EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr);
- virtual double UndoCounterWeights(size_t nEmissions) const;
-};
-
-double emissionPmf[3][8][4] = {{
- // matchPmf
- {0.980417570, 0.011537479, 0.005804964, 0.002239987}, // AA
- {0.026122324, 0.972937583, 0.000367796, 0.000572296}, // CC
- {0.002544283, 0.002239375, 0.962042375, 0.033173967}, // GG
- {0.000509814, 0.001489097, 0.094228328, 0.903772761}, // TT
- {0.979840156, 0.012582917, 0.005185205, 0.002391722}, // NA
- {0.015528755, 0.984439781, 7.91000E-07, 3.07000E-05}, // NC
- {0.002667013, 0.002095727, 0.961571053, 0.033666207}, // NG
- {0.000506358, 0.001057035, 0.116124340, 0.882312267} // NT
- },
- {
- // branchPmf
- {1, 0, 0, 0}, // AA
- {0, 1, 0, 0}, // CC
- {0, 0, 1, 0}, // GG
- {0, 0, 0, 1}, // TT
- {1, 0, 0, 0}, // NA
- {0, 1, 0, 0}, // NC
- {0, 0, 1, 0}, // NG
- {0, 0, 0, 1} // NT
- },
- {
- // stickPmf
- {0.000000000, 0.254503401, 0.574809968, 0.170686631}, // AA
- {0.399446202, 0.000000000, 0.510664061, 0.089889737}, // CC
- {0.505214805, 0.188597323, 0.000000000, 0.306187872}, // GG
- {0.361855644, 0.132870306, 0.505274050, 0.000000000}, // TT
- {0.000000000, 0.210676350, 0.615161689, 0.174161960}, // NA
- {0.357451562, 0.000000000, 0.473482915, 0.169065523}, // NC
- {0.577147745, 0.169785817, 0.000000000, 0.253066438}, // NG
- {0.446834358, 0.144605809, 0.408559833, 0.000000000} // NT
- }};
-
-double SP1C1BetaNoCovParams[8][4] = {
- // Match, Branch, Stick, Delete
- {0.888913751, 0.021169653, 0.034937054, 0.054979542}, // AA
- {0.835822697, 0.036126801, 0.091992041, 0.036058461}, // CC
- {0.886427657, 0.022596867, 0.039619893, 0.051355584}, // GG
- {0.821252207, 0.072798639, 0.068161389, 0.037787765}, // TT
- {0.857630366, 0.072058988, 0.036435296, 0.033875351}, // NA
- {0.846000625, 0.032981179, 0.076759732, 0.044258463}, // NC
- {0.881462348, 0.042444137, 0.039293952, 0.036799562}, // NG
- {0.879087800, 0.022178294, 0.057073518, 0.041660389} // NT
-};
-
-SP1C1BetaNoCovModel::SP1C1BetaNoCovModel(const SNR& snr) : snr_(snr) {}
-std::vector<TemplatePosition> SP1C1BetaNoCovModel::Populate(const std::string& tpl) const
-{
- std::vector<TemplatePosition> result;
-
- if (tpl.empty()) return result;
-
- uint8_t prev = detail::TranslationTable[static_cast<uint8_t>(tpl[0])];
- if (prev > 3) throw std::invalid_argument("invalid character in sequence!");
-
- for (size_t i = 1; i < tpl.size(); ++i) {
- const uint8_t curr = detail::TranslationTable[static_cast<uint8_t>(tpl[i])];
- if (curr > 3) throw std::invalid_argument("invalid character in sequence!");
- const bool hpAdd = tpl[i - 1] == tpl[i] ? 0 : 4;
- const auto params = SP1C1BetaNoCovParams[curr + hpAdd];
- result.emplace_back(TemplatePosition{
- tpl[i - 1], prev,
- params[0], // match
- params[1], // branch
- params[2], // stick
- params[3] // deletion
- });
- prev = curr;
- }
-
- result.emplace_back(TemplatePosition{tpl.back(), prev, 1.0, 0.0, 0.0, 0.0});
-
- return result;
-}
-
-std::unique_ptr<AbstractRecursor> SP1C1BetaNoCovModel::CreateRecursor(
- std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
-{
- return std::unique_ptr<AbstractRecursor>(new SP1C1BetaNoCovRecursor(
- std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
-}
-
-double SP1C1BetaNoCovModel::SubstitutionRate(uint8_t prev, uint8_t curr) const
-{
- const uint8_t hpAdd = prev == curr ? 0 : 4;
- const uint8_t row = curr + hpAdd;
-
- double eps = 0.0;
-
- for (uint8_t em = 0; em < 4; ++em)
- if (em != curr) eps += emissionPmf[0][row][em];
-
- return eps / 3.0;
-}
-
-SP1C1BetaNoCovRecursor::SP1C1BetaNoCovRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
- const MappedRead& mr, double scoreDiff)
- : Recursor<SP1C1BetaNoCovRecursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
- scoreDiff)
-{
-}
-
-std::vector<uint8_t> SP1C1BetaNoCovRecursor::EncodeRead(const MappedRead& read)
-{
- std::vector<uint8_t> result;
-
- for (const char bp : read.Seq) {
- const uint8_t em = detail::TranslationTable[static_cast<uint8_t>(bp)];
- if (em > 3) throw std::invalid_argument("invalid character in read!");
- result.emplace_back(em);
- }
-
- return result;
-}
-
-double SP1C1BetaNoCovRecursor::EmissionPr(MoveType move, const uint8_t emission, const uint8_t prev,
- const uint8_t curr)
-{
- assert(move != MoveType::DELETION);
- const uint8_t hpAdd = prev == curr ? 0 : 4;
- // Which row do we want?
- const uint8_t row = curr + hpAdd;
- return emissionPmf[static_cast<uint8_t>(move)][row][emission];
-}
-
-double SP1C1BetaNoCovRecursor::UndoCounterWeights(const size_t nEmissions) const { return 0; }
-} // namespace anonymous
-} // namespace Consensus
-} // namespace PacBio
diff --git a/src/models/SP1C1PwModel.cpp b/src/models/SP1C1PwModel.cpp
deleted file mode 100644
index f291597..0000000
--- a/src/models/SP1C1PwModel.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-
-#include <cassert>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-
-#include <pacbio/consensus/ModelConfig.h>
-#include <pacbio/consensus/Read.h>
-
-#include "../ModelFactory.h"
-#include "../Recursor.h"
-
-namespace PacBio {
-namespace Consensus {
-namespace {
-
-class SP1C1PwModel : public ModelConfig
-{
- REGISTER_MODEL(SP1C1PwModel);
-
-public:
- static std::set<std::string> Names() { return {"S/P1-C1", "S/P2-C2/prospective-compatible"}; }
- SP1C1PwModel(const SNR& snr);
- std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
- const MappedRead& mr, double scoreDiff) const;
- std::vector<TemplatePosition> Populate(const std::string& tpl) const;
- double SubstitutionRate(uint8_t prev, uint8_t curr) const;
-
-private:
- SNR snr_;
- double ctxTrans_[16][4];
-};
-
-REGISTER_MODEL_IMPL(SP1C1PwModel);
-
-// TODO(lhepler) comments regarding the CRTP
-class SP1C1PwRecursor : public Recursor<SP1C1PwRecursor>
-{
-public:
- SP1C1PwRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
- double scoreDiff);
- static inline std::vector<uint8_t> EncodeRead(const MappedRead& read);
- static inline double EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr);
- virtual double UndoCounterWeights(size_t nEmissions) const;
-};
-
-double emissionPmf[3][16][12] = {
- {// matchPmf
- { 0.050180496, 0.000810831714, 0.000718815911, 2.41969643e-05, 0.0649534816, 0.000599364712, 0.0024105932, 2.79498377e-05, 0.872718151, 0.0038260229, 0.00356427001, 0.00014363133},
- { 0.00441418155, 0.0111340355, 0.000436100959, 0.000207054124, 0.0143053103, 0.0330608924, 0.00035144973, 0.000107614316, 0.0203264843, 0.914289415, 0.000516920114, 0.000820927935},
- { 0.000830704872, 4.01083601e-05, 0.0207832475, 0.00204041375, 0.00140918852, 1.59627818e-05, 0.0709485116, 0.00228655717, 0.000615064928, 0.000191122171, 0.890515556, 0.0102920349},
- { 0.000280390731, 0.000147856337, 0.00871749656, 0.0143671713, 0.000348528243, 3.09242691e-05, 0.0307957392, 0.0420922984, 0.000253817904, 0.000821599329, 0.0766740254, 0.82544661},
- { 0.0204497033, 0.0006522629, 0.00111751863, 6.88171386e-05, 0.0568321653, 0.000563436817, 0.0028379739, 2.22809772e-05, 0.910472162, 0.00273295039, 0.00398092872, 0.000246686918},
- { 0.0439247398, 0.00955997748, 0.000115883831, 0.000180023452, 0.0119201337, 0.0301506871, 0.000146989241, 0.000212785158, 0.0173905735, 0.886083207, 2.74319201e-05, 0.000259452999},
- { 0.000939303575, 5.20125284e-05, 0.00980339201, 0.000907334466, 0.000463702535, 1.79401825e-05, 0.0324806442, 0.00114187083, 0.00142047181, 0.000124132578, 0.944142586, 0.00848256955},
- { 0.000260915592, 0.00013834241, 0.0095663394, 0.016165433, 0.000260906504, 7.5920278e-05, 0.0322444797, 0.0433540894, 0.000653449354, 0.000422752406, 0.0760714376, 0.820755173},
- { 0.00956039032, 0.000314573628, 0.000370884177, 0.000117770504, 0.028518209, 0.000273935529, 0.000824887956, 3.66790527e-05, 0.953577007, 0.00398330966, 0.00222602028, 0.000169234775},
- { 0.00183055368, 0.00555526733, 7.32920566e-05, 0.000100825459, 0.00667577618, 0.0171484968, 0.000133252926, 0.000100846723, 0.0132914572, 0.954655196, 0.000133282582, 0.000280749525},
- { 0.0372218972, 3.13057624e-05, 0.00747417722, 0.000778927608, 0.000580843383, 9.37154184e-06, 0.0252638791, 0.00087834799, 0.00133003939, 0.000108090582, 0.919709258, 0.0065856977},
- { 7.34021416e-05, 2.11289101e-05, 0.00397571235, 0.00794336018, 0.000182826564, 4.28660053e-05, 0.0160848709, 0.0249312114, 0.000361781181, 0.000965491677, 0.0544096637, 0.890978452},
- { 0.0215150154, 0.000790146036, 0.00100949721, 0.000245739353, 0.0540046078, 0.000482950702, 0.00234925379, 3.35561393e-05, 0.911572324, 0.0044349381, 0.00338886125, 0.000137495886},
- { 0.00212710775, 0.00564615806, 0.000108665095, 8.3124319e-05, 0.00711710252, 0.0182314512, 0.000129199329, 0.00014006181, 0.0128209537, 0.953382861, 0.000111983946, 7.416168e-05},
- { 0.000714475349, 2.89513287e-05, 0.00993106299, 0.000963756992, 0.000625068525, 1.2084195e-05, 0.033994039, 0.0010809747, 0.000985106579, 0.000119201837, 0.944114814, 0.00740766497},
- { 0.0295954394, 0.000308518611, 0.00698927228, 0.00712426088, 0.00293094089, 0.000811966057, 0.0172195287, 0.0213957973, 0.0268789678, 0.0263604224, 0.0801385692, 0.780226789}},
-
- {// branchPmf
- { 0.367580288, 0.00016153169, 0.00016153169, 0.00016153169, 0.177171801, 0.00016153169, 0.00016153169, 0.00016153169, 0.452986467, 0.00016153169, 0.00016153169, 0.00016153169},
- { 0.000630798853, 0.12174078, 0.000630798853, 0.000630798853, 0.000630798853, 0.109459833, 0.000630798853, 0.000630798853, 0.000630798853, 0.759968203, 0.000630798853, 0.000630798853},
- { 0.00018211471, 0.00018211471, 0.436215655, 0.00018211471, 0.00018211471, 0.00018211471, 0.190416017, 0.00018211471, 0.00018211471, 0.00018211471, 0.370818723, 0.00018211471},
- { 0.00162252415, 0.00162252415, 0.00162252415, 0.30683009, 0.00162252415, 0.00162252415, 0.00162252415, 0.187965419, 0.00162252415, 0.00162252415, 0.00162252415, 0.482489153},
- { 0.22285451, 5.86536116e-05, 5.86536116e-05, 5.86536116e-05, 0.135841077, 5.86536116e-05, 5.86536116e-05, 5.86536116e-05, 0.640483263, 5.86536116e-05, 5.86536116e-05, 5.86536116e-05},
- { 0.000109110324, 0.0357894162, 0.000108808428, 0.000108808428, 0.000109086495, 0.0502780024, 0.000108808428, 0.000108808428, 0.000109274002, 0.912408218, 0.000108808428, 0.000108808428},
- { 0.00010725769, 0.00010725769, 0.344987237, 0.00010725769, 0.00010725769, 0.00010725769, 0.152744912, 0.00010725769, 0.00010725769, 0.00010725769, 0.500766243, 0.00010725769},
- { 0.00112670696, 0.00112670696, 0.00112670696, 0.211892718, 0.00112670696, 0.00112670696, 0.00112670696, 0.180261578, 0.00112670696, 0.00112670696, 0.00112670696, 0.592071807},
- { 0.181226885, 7.780819e-05, 7.780819e-05, 7.780819e-05, 0.102628527, 7.780819e-05, 7.780819e-05, 7.780819e-05, 0.715055273, 7.780819e-05, 7.780819e-05, 7.780819e-05},
- { 0.000232046724, 0.0678561269, 0.000232046724, 0.000232046724, 0.000232046724, 0.0826105386, 0.000232046724, 0.000232046724, 0.000232046724, 0.84628468, 0.000232046724, 0.000232046724},
- { 0.000152600774, 0.000152481727, 0.36057732, 0.000152481727, 0.000152671928, 0.000152481727, 0.161807588, 0.000152481727, 0.000161273012, 0.000152481727, 0.475471247, 0.000152481727},
- { 0.000452709553, 0.000452709553, 0.000452709553, 0.113042477, 0.000452709553, 0.000452709553, 0.000452709553, 0.0809680645, 0.000452709553, 0.000452709553, 0.000452709553, 0.799651525},
- { 0.16435014, 7.85392159e-05, 7.85392159e-05, 7.85392159e-05, 0.110607776, 7.85392159e-05, 7.85392159e-05, 7.85392159e-05, 0.723942535, 7.85392159e-05, 7.85392159e-05, 7.85392159e-05},
- { 0.000141875712, 0.0438774381, 0.000141875712, 0.000141875712, 0.000141875712, 0.0505647953, 0.000141875712, 0.000141875712, 0.000141875712, 0.903571507, 0.000141875712, 0.000141875712},
- { 8.49950399e-05, 8.49950399e-05, 0.255828845, 8.49950399e-05, 8.49950399e-05, 8.49950399e-05, 0.130922383, 8.49950399e-05, 8.49950399e-05, 8.49950399e-05, 0.612058841, 8.49950399e-05},
- { 7.34349915e-05, 5.19983486e-05, 5.19983486e-05, 0.0492213576, 5.20027988e-05, 5.19983486e-05, 5.19983486e-05, 0.0699666584, 5.36325917e-05, 5.19983486e-05, 5.19983486e-05, 0.880060932}},
-
- {// stickPmf
- { 0.000144273271, 0.031852115, 0.495294841, 0.038100898, 0.000144273271, 0.017212738, 0.122314015, 0.0167614466, 0.000144273271, 0.0753697914, 0.146214029, 0.0557259401},
- { 0.117962224, 6.59168851e-05, 0.282951648, 0.0192644475, 0.046904862, 6.59168851e-05, 0.116789946, 0.0191866887, 0.259151536, 6.59168851e-05, 0.0740004182, 0.0632608948},
- { 0.4239512, 0.0174098914, 0.000292385129, 0.157401908, 0.0812378375, 0.00996979079, 0.000292385129, 0.0478440513, 0.103765732, 0.0570054455, 0.000292385129, 0.0990750621},
- { 0.333696134, 0.0215995597, 0.257269536, 0.000137635044, 0.0854483376, 0.0098556168, 0.091392627, 0.000137635044, 0.0897605776, 0.0379132189, 0.0719633124, 0.000137635044},
- { 7.54578173e-05, 0.0166332203, 0.455649421, 0.0471517145, 7.54578173e-05, 0.00858907211, 0.128185013, 0.00844390932, 7.54578173e-05, 0.211699135, 0.088201127, 0.0348437253},
- { 0.112105594, 4.52902665e-05, 0.378558635, 0.0324997303, 0.0595842193, 4.26416883e-05, 0.195528879, 0.0131811878, 0.0831252123, 4.13209117e-05, 0.0934087306, 0.0316826901},
- { 0.340631586, 0.027496545, 0.000100648024, 0.0361256743, 0.114163109, 0.0172271722, 0.000100648024, 0.0151484488, 0.128820614, 0.256463889, 0.000100648024, 0.0631177767},
- { 0.249316656, 0.016756361, 0.179589221, 0.000102326946, 0.0850404612, 0.0131422017, 0.0646193548, 0.000102326946, 0.0825249453, 0.254973517, 0.053218666, 0.000102326946},
- { 0.00014006723, 0.0184767984, 0.541111101, 0.0356574565, 0.00014006723, 0.00737734189, 0.13451092, 0.0101765966, 0.00014006723, 0.0553982525, 0.124870347, 0.0713006475},
- { 0.089968542, 2.75232823e-05, 0.323766297, 0.0218632014, 0.0336158929, 2.75232823e-05, 0.215967877, 0.0116012507, 0.0329421856, 2.75232823e-05, 0.225971868, 0.0440826989},
- { 0.353010758, 0.028056778, 0.0141109415, 0.0541686994, 0.148416621, 0.0162924849, 0.000331214987, 0.0257340348, 0.132119269, 0.0676408297, 0.000260259152, 0.158619367},
- { 0.301161856, 0.0178563507, 0.274811467, 0.000154134305, 0.0974458666, 0.00739013642, 0.108102879, 0.000154134305, 0.0675909548, 0.0332511353, 0.09115628, 0.000154134305},
- { 0.000113490293, 0.0147487269, 0.305843505, 0.02221454, 0.000113490293, 0.0081304956, 0.0847889811, 0.0193666968, 0.000113490293, 0.0674361727, 0.184462101, 0.292100858},
- { 0.106375744, 5.18033726e-05, 0.247279422, 0.0217993491, 0.0454227851, 5.18033726e-05, 0.0926635587, 0.0197616719, 0.0520259221, 5.18033726e-05, 0.108375744, 0.305881377},
- { 0.281147231, 0.0141902129, 0.000105605946, 0.0328466806, 0.0817292326, 0.00930067567, 0.000105605946, 0.0288624596, 0.0768269567, 0.0393685594, 0.000105605946, 0.434883144},
- { 0.199657941, 0.0174846089, 0.257321115, 0.000317504884, 0.0573206577, 0.00455372773, 0.165110771, 9.56945913e-05, 0.0324444703, 0.0257359099, 0.23944167, 0.00010809618}}};
-
-double transProbs[16][3][4] = {
- // Fit for context: AA
- {
- { 4.64180060259476, -3.9536165441315, 0.629346482632806, -0.0333195939644658 },
- { 6.95397186355503, -4.81640615254612, 0.73957810252931, -0.0380629903397578 },
- { 2.0125609286762, -1.64230680477304, 0.188043347538493, -0.00800025350740117 }
- },
- // Fit for context: AC
- {
- { -7.96643693768338, 2.45203940450617, -0.535064344077792, 0.0358830223158447 },
- { -8.7665502183788, 3.11090386888253, -0.502837045360252, 0.0268719645928884 },
- { 2.55241343397313, -2.61388872007515, 0.400198378643623, -0.0214289578490057 }
- },
- // Fit for context: AG
- {
- { -8.95609979609381, 2.76776188942349, -0.448430889583276, 0.0238526474666025 },
- { 8.70083117131101, -6.14855240263022, 1.01305377841628, -0.0562353867193715 },
- { 3.85673355874382, -2.37323430285549, 0.253639486232327, -0.00792265363283865 }
- },
- // Fit for context: AT
- {
- { 12.1647336238122, -7.9776855494273, 1.17170998397018, -0.057603362646467 },
- { 5.00290648719487, -4.16621645092003, 0.696297873836792, -0.0391611545344868 },
- { -4.01995319742413, 0.523828889049741, -0.103132759467356, 0.00520451145808958 }
- },
- // Fit for context: CA
- {
- { -2.51102087348086, 0.204483340825312, -0.0649203448574841, 0.0049040915345933 },
- { 1.40048802441362, -2.24748845336743, 0.39759031823371, -0.0232098660636963 },
- { 6.25379124637344, -3.74840757828474, 0.503914277108986, -0.023498322774112 }
- },
- // Fit for context: CC
- {
- { -8.45842695849197, 3.04649166320213, -0.537865339629986, 0.0303837875540666 },
- { -5.3848460306973, 1.68210750534009, -0.267218567372589, 0.0138107213099974 },
- { -6.92818052574026, 2.48746042364403, -0.454449677350371, 0.0258638165622547 }
- },
- // Fit for context: CG
- {
- { -1.47733518453213, -0.60964075694068, 0.0706287595409263, -0.00243871775381204 },
- { -5.64639770150871, 1.63363359198718, -0.296877718476959, 0.0160551081644542 },
- { 1.32933319237265, -1.76650070845658, 0.229267885604329, -0.0108217271792876 }
- },
- // Fit for context: CT
- {
- { -1.89298348233174, -1.05169004992836, 0.0785852195682609, 0.000648287464643558 },
- { 0.409528582048115, -1.53434275700689, 0.250882766974551, -0.0141427303266602 },
- { -0.814049625216239, -1.07671818027045, 0.171246935298451, -0.00990748002576864 }
- },
- // Fit for context: GA
- {
- { 0.201195999563248, -1.46936914407633, 0.251926058146855, -0.0144336483641249 },
- { 1.15692566801376, -2.37338445578781, 0.422803587943207, -0.0249314131424101 },
- { -0.865159714636933, -0.530146528415917, -0.00126633169748894, 0.0023281565006292 }
- },
- // Fit for context: GC
- {
- { -4.20369476733155, -0.403208361824981, 0.12053240324456, -0.0078924717222251 },
- { -7.07309386117761, 2.32339953661694, -0.344692279433874, 0.0170692678385147 },
- { -3.29462555451758, -0.0449183331796563, 0.0158592774136627, -0.00218563581174302 }
- },
- // Fit for context: GG
- {
- { -6.618759868799, 1.77630213513116, -0.301917469878687, 0.0162987892702458 },
- { 6.46016885698086, -4.60291330456813, 0.71394376036697, -0.038926481203748 },
- { 4.1641434837661, -2.90948798940317, 0.413100412855066, -0.0198119106419827 }
- },
- // Fit for context: GT
- {
- { 5.32979814815732, -4.36105624112514, 0.652808796272787, -0.0326266821355755 },
- { -1.7029081461207, -0.499477389809844, 0.0573393510996656, -0.00310784895624791 },
- { 7.11595788176173, -5.08972674986682, 0.801101205804288, -0.0414439829669328 }
- },
- // Fit for context: TA
- {
- { -4.74271554330721, 1.28373596500549, -0.229225891578021, 0.0133403409911494 },
- { 2.22374687297304, -2.38792067161617, 0.373288541637948, -0.0191516834711714 },
- { 3.66108514125149, -2.59128954758056, 0.337444749378416, -0.0160370039528165 }
- },
- // Fit for context: TC
- {
- { 2.04612192357648, -2.52151599689975, 0.379682953937167, -0.0179838619408694 },
- { -1.31673402810369, -0.562278453209614, 0.107691140486616, -0.00666343387997336 },
- { -4.94003805413098, 1.01280215159965, -0.19017869162516, 0.0108150675409149 }
- },
- // Fit for context: TG
- {
- { 4.79424434531782, -4.02399375007546, 0.682218296797831, -0.0376455420429643 },
- { 5.75033593156235, -4.10145870919526, 0.636217309073862, -0.033566881903926 },
- { -0.267152607541904, -0.732160305934965, 0.029848264522247, 0.0010401530454877 }
- },
- // Fit for context: TT
- {
- { -5.68426500741552, 1.35382620376114, -0.177669648153777, 0.0069662442934688 },
- { -1.02037267928727, -0.844797584296236, 0.130687255561928, -0.00722481027088629 },
- { -0.376565549274208, -1.21268955791849, 0.178640505929895, -0.00928721253355987 }
- }
-
-};
-
-SP1C1PwModel::SP1C1PwModel(const SNR& snr) : snr_(snr)
-{
- const double snr1 = snr_.A, snr2 = snr1 * snr1, snr3 = snr2 * snr1;
- for (int ctx = 0; ctx < 16; ++ctx) {
- double sum = 1.0;
- ctxTrans_[ctx][0] = 1.0;
- for (size_t j = 0; j < 3; ++j) {
- double xb = transProbs[ctx][j][0] + snr1 * transProbs[ctx][j][1] +
- snr2 * transProbs[ctx][j][2] + snr3 * transProbs[ctx][j][3];
- xb = std::exp(xb);
- ctxTrans_[ctx][j + 1] = xb;
- sum += xb;
- }
- for (size_t j = 0; j < 4; ++j)
- ctxTrans_[ctx][j] /= sum;
- }
-}
-
-std::unique_ptr<AbstractRecursor> SP1C1PwModel::CreateRecursor(
- std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
-{
- return std::unique_ptr<AbstractRecursor>(
- new SP1C1PwRecursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
-}
-
-std::vector<TemplatePosition> SP1C1PwModel::Populate(const std::string& tpl) const
-{
- std::vector<TemplatePosition> result;
-
- if (tpl.empty()) return result;
-
- result.reserve(tpl.size());
-
- // Calculate probabilities in all 16 Contexts
- uint8_t prev = detail::TranslationTable[static_cast<uint8_t>(tpl[0])];
- if (prev > 3) throw std::invalid_argument("invalid character in template!");
-
- for (size_t i = 1; i < tpl.size(); ++i) {
- const uint8_t curr = detail::TranslationTable[static_cast<uint8_t>(tpl[i])];
- if (curr > 3) throw std::invalid_argument("invalid character in template!");
- const auto row = (prev << 2) | curr;
- const auto params = ctxTrans_[row];
- result.emplace_back(TemplatePosition{
- tpl[i - 1], prev,
- params[0], // match
- params[1], // branch
- params[2], // stick
- params[3] // deletion
- });
- prev = curr;
- }
- result.emplace_back(TemplatePosition{tpl.back(), prev, 1.0, 0.0, 0.0, 0.0});
-
- return result;
-}
-
-double SP1C1PwModel::SubstitutionRate(uint8_t prev, uint8_t curr) const
-{
- const auto row = (prev << 2) | curr;
- double eps = 0.0;
- for (uint8_t pw = 0; pw < 3; ++pw)
- for (uint8_t bp = 0; bp < 4; ++bp)
- if (bp != curr)
- eps += emissionPmf[static_cast<uint8_t>(MoveType::MATCH)][row][(pw << 2) | bp];
- return eps / (3 * 4);
-}
-
-SP1C1PwRecursor::SP1C1PwRecursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
- double scoreDiff)
- : Recursor<SP1C1PwRecursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff)
-{
-}
-
-std::vector<uint8_t> SP1C1PwRecursor::EncodeRead(const MappedRead& read)
-{
- std::vector<uint8_t> result;
-
- for (size_t i = 0; i < read.Length(); ++i) {
- uint8_t pw = std::max(2, std::min(0, read.PulseWidth[i] - 1));
- uint8_t bp = detail::TranslationTable[static_cast<uint8_t>(read.Seq[i])];
- if (bp > 3) throw std::invalid_argument("invalid character in read!");
- uint8_t em = (pw << 2) | bp;
- if (em > 11) throw std::runtime_error("read encoding error!");
- result.emplace_back(em);
- }
-
- return result;
-}
-
-double SP1C1PwRecursor::EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr)
-{
- assert(move != MoveType::DELETION);
- const auto row = (prev << 2) | curr;
- return emissionPmf[static_cast<uint8_t>(move)][row][emission];
-}
-
-double SP1C1PwRecursor::UndoCounterWeights(const size_t nEmissions) const { return 0; }
-} // namespace anonymous
-} // namespace Consensus
-} // namespace PacBio
diff --git a/src/models/S_P1C1Beta_Model.cpp b/src/models/S_P1C1Beta_Model.cpp
new file mode 100644
index 0000000..6327a56
--- /dev/null
+++ b/src/models/S_P1C1Beta_Model.cpp
@@ -0,0 +1,239 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <stdexcept>
+
+#include <pacbio/consensus/ModelConfig.h>
+#include <pacbio/consensus/Read.h>
+
+#include "../ModelFactory.h"
+#include "../Recursor.h"
+
+namespace PacBio {
+namespace Consensus {
+namespace {
+
+class S_P1C1Beta_Model : public ModelConfig
+{
+ REGISTER_MODEL(S_P1C1Beta_Model);
+
+public:
+ static std::set<std::string> Names() { return {"S/P1-C1/beta"}; }
+ S_P1C1Beta_Model(const SNR& snr);
+ std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
+ const MappedRead& mr, double scoreDiff) const;
+ std::vector<TemplatePosition> Populate(const std::string& tpl) const;
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+
+private:
+ SNR snr_;
+};
+
+REGISTER_MODEL_IMPL(S_P1C1Beta_Model);
+
+class S_P1C1Beta_Recursor : public Recursor<S_P1C1Beta_Recursor>
+{
+public:
+ S_P1C1Beta_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
+ double scoreDiff);
+ static inline std::vector<uint8_t> EncodeRead(const MappedRead& read);
+ static inline double EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr);
+ virtual double UndoCounterWeights(size_t nEmissions) const;
+};
+
+constexpr double emissionPmf[3][8][4] = {
+ {
+ // matchPmf
+ {0.980417570, 0.011537479, 0.005804964, 0.002239987}, // AA
+ {0.026122324, 0.972937583, 0.000367796, 0.000572296}, // CC
+ {0.002544283, 0.002239375, 0.962042375, 0.033173967}, // GG
+ {0.000509814, 0.001489097, 0.094228328, 0.903772761}, // TT
+ {0.979840156, 0.012582917, 0.005185205, 0.002391722}, // NA
+ {0.015528755, 0.984439781, 7.91000E-07, 3.07000E-05}, // NC
+ {0.002667013, 0.002095727, 0.961571053, 0.033666207}, // NG
+ {0.000506358, 0.001057035, 0.116124340, 0.882312267} // NT
+ },
+ {
+ // branchPmf
+ {1, 0, 0, 0}, // AA
+ {0, 1, 0, 0}, // CC
+ {0, 0, 1, 0}, // GG
+ {0, 0, 0, 1}, // TT
+ {1, 0, 0, 0}, // NA
+ {0, 1, 0, 0}, // NC
+ {0, 0, 1, 0}, // NG
+ {0, 0, 0, 1} // NT
+ },
+ {
+ // stickPmf
+ {0.000000000, 0.254503401, 0.574809968, 0.170686631}, // AA
+ {0.399446202, 0.000000000, 0.510664061, 0.089889737}, // CC
+ {0.505214805, 0.188597323, 0.000000000, 0.306187872}, // GG
+ {0.361855644, 0.132870306, 0.505274050, 0.000000000}, // TT
+ {0.000000000, 0.210676350, 0.615161689, 0.174161960}, // NA
+ {0.357451562, 0.000000000, 0.473482915, 0.169065523}, // NC
+ {0.577147745, 0.169785817, 0.000000000, 0.253066438}, // NG
+ {0.446834358, 0.144605809, 0.408559833, 0.000000000} // NT
+ }};
+
+constexpr double transProbs[8][4] = {
+ // Match, Branch, Stick, Delete
+ {0.888913751, 0.021169653, 0.034937054, 0.054979542}, // AA
+ {0.835822697, 0.036126801, 0.091992041, 0.036058461}, // CC
+ {0.886427657, 0.022596867, 0.039619893, 0.051355584}, // GG
+ {0.821252207, 0.072798639, 0.068161389, 0.037787765}, // TT
+ {0.857630366, 0.072058988, 0.036435296, 0.033875351}, // NA
+ {0.846000625, 0.032981179, 0.076759732, 0.044258463}, // NC
+ {0.881462348, 0.042444137, 0.039293952, 0.036799562}, // NG
+ {0.879087800, 0.022178294, 0.057073518, 0.041660389} // NT
+};
+
+S_P1C1Beta_Model::S_P1C1Beta_Model(const SNR& snr) : snr_(snr) {}
+std::vector<TemplatePosition> S_P1C1Beta_Model::Populate(const std::string& tpl) const
+{
+ std::vector<TemplatePosition> result;
+
+ if (tpl.empty()) return result;
+
+ uint8_t prev = detail::TranslationTable[static_cast<uint8_t>(tpl[0])];
+ if (prev > 3) throw std::invalid_argument("invalid character in sequence!");
+
+ for (size_t i = 1; i < tpl.size(); ++i) {
+ const uint8_t curr = detail::TranslationTable[static_cast<uint8_t>(tpl[i])];
+ if (curr > 3) throw std::invalid_argument("invalid character in sequence!");
+ const bool hpAdd = tpl[i - 1] == tpl[i] ? 0 : 4;
+ const auto params = transProbs[curr + hpAdd];
+ result.emplace_back(TemplatePosition{
+ tpl[i - 1], prev,
+ params[0], // match
+ params[1], // branch
+ params[2], // stick
+ params[3] // deletion
+ });
+ prev = curr;
+ }
+
+ result.emplace_back(TemplatePosition{tpl.back(), prev, 1.0, 0.0, 0.0, 0.0});
+
+ return result;
+}
+
+std::unique_ptr<AbstractRecursor> S_P1C1Beta_Model::CreateRecursor(
+ std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
+{
+ return std::unique_ptr<AbstractRecursor>(new S_P1C1Beta_Recursor(
+ std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
+}
+
+inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
+ const uint8_t curr,
+ const bool secondMoment)
+{
+ const uint8_t hpAdd = prev == curr ? 0 : 4;
+ const uint8_t row = curr + hpAdd;
+ double expectedLL = 0;
+ for (size_t i = 0; i < 4; i++) {
+ double curProb = emissionPmf[index][row][i];
+ double lgCurProb = std::log(curProb);
+ if (!secondMoment) {
+ expectedLL += curProb * lgCurProb;
+ } else {
+ expectedLL += curProb * pow(lgCurProb, 2.0);
+ }
+ }
+ return expectedLL;
+}
+
+double S_P1C1Beta_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev,
+ curr, secondMoment);
+}
+double S_P1C1Beta_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev,
+ curr, secondMoment);
+}
+double S_P1C1Beta_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return CalculateExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev,
+ curr, secondMoment);
+}
+
+S_P1C1Beta_Recursor::S_P1C1Beta_Recursor(std::unique_ptr<AbstractTemplate>&& tpl,
+ const MappedRead& mr, double scoreDiff)
+ : Recursor<S_P1C1Beta_Recursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
+ scoreDiff)
+{
+}
+
+std::vector<uint8_t> S_P1C1Beta_Recursor::EncodeRead(const MappedRead& read)
+{
+ std::vector<uint8_t> result;
+
+ for (const char bp : read.Seq) {
+ const uint8_t em = detail::TranslationTable[static_cast<uint8_t>(bp)];
+ if (em > 3) throw std::invalid_argument("invalid character in read!");
+ result.emplace_back(em);
+ }
+
+ return result;
+}
+
+double S_P1C1Beta_Recursor::EmissionPr(MoveType move, const uint8_t emission, const uint8_t prev,
+ const uint8_t curr)
+{
+ assert(move != MoveType::DELETION);
+ const uint8_t hpAdd = prev == curr ? 0 : 4;
+ // Which row do we want?
+ const uint8_t row = curr + hpAdd;
+ return emissionPmf[static_cast<uint8_t>(move)][row][emission];
+}
+
+double S_P1C1Beta_Recursor::UndoCounterWeights(const size_t nEmissions) const { return 0; }
+} // namespace anonymous
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/models/S_P1C1v1_Model.cpp b/src/models/S_P1C1v1_Model.cpp
new file mode 100644
index 0000000..8717e90
--- /dev/null
+++ b/src/models/S_P1C1v1_Model.cpp
@@ -0,0 +1,423 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <stdexcept>
+
+#include <pacbio/consensus/ModelConfig.h>
+#include <pacbio/consensus/Read.h>
+
+#include "../ModelFactory.h"
+#include "../Recursor.h"
+
+namespace PacBio {
+namespace Consensus {
+namespace {
+
+constexpr double kCounterWeight = 15.0;
+constexpr size_t OUTCOME_NUMBER = 12;
+constexpr size_t CONTEXT_NUMBER = 16;
+
+class S_P1C1v1_Model : public ModelConfig
+{
+ REGISTER_MODEL(S_P1C1v1_Model);
+
+public:
+ static std::set<std::string> Names() { return {"S/P1-C1.1"}; }
+ S_P1C1v1_Model(const SNR& snr);
+ std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
+ const MappedRead& mr, double scoreDiff) const;
+ std::vector<TemplatePosition> Populate(const std::string& tpl) const;
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+
+private:
+ SNR snr_;
+ double ctxTrans_[CONTEXT_NUMBER][4];
+ double cachedEmissionExpectations_[CONTEXT_NUMBER][3][2];
+ double ExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
+ const uint8_t curr, const bool secondMoment) const;
+};
+
+REGISTER_MODEL_IMPL(S_P1C1v1_Model);
+
+// TODO(lhepler) comments regarding the CRTP
+class S_P1C1v1_Recursor : public Recursor<S_P1C1v1_Recursor>
+{
+public:
+ S_P1C1v1_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
+ double scoreDiff);
+ static inline std::vector<uint8_t> EncodeRead(const MappedRead& read);
+ static inline double EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr);
+ virtual double UndoCounterWeights(size_t nEmissions) const;
+};
+
+constexpr double emissionPmf[3][CONTEXT_NUMBER][OUTCOME_NUMBER] = {
+ {// matchPmf
+ {0.0516695237, 0.000856759021, 0.000805765869, 6.40098033e-05, 0.0735681929, 0.000490647727,
+ 0.00171763848, 1.66588276e-05, 0.864949369, 0.00298262543, 0.0027926093, 6.28389773e-05},
+ {0.00543436007, 0.011889156, 0.000101241451, 0.000115879948, 0.0161576112, 0.0379073447,
+ 0.00013138862, 0.000162963331, 0.0238359857, 0.903407615, 0.000310425863, 0.00051517498},
+ {0.00078327569, 8.9195486e-05, 0.0213823858, 0.00233242884, 0.00122583139, 2.26787869e-05,
+ 0.0797753558, 0.0023406575, 0.000536349203, 0.000218984243, 0.881882162, 0.00937678742},
+ {0.000153647232, 0.00010627987, 0.0103924584, 0.0168897121, 0.000528420929, 0.000114998697,
+ 0.034445993, 0.0461495153, 0.000202642175, 0.000688915405, 0.0797339416, 0.810568344},
+ {0.0217684866, 0.000615574217, 0.00123796174, 0.000194115893, 0.0630331228, 0.000312092976,
+ 0.00217859654, 1.70111886e-05, 0.905859233, 0.00150065174, 0.0030195303, 0.00023882051},
+ {0.043036651, 0.0109154406, 0.000137917521, 6.63118913e-05, 0.0135411051, 0.0336696383,
+ 3.68321175e-05, 9.7192835e-05, 0.0198015123, 0.878552904, 1.95554178e-05, 9.54805479e-05},
+ {0.00018454615, 1.51427373e-05, 0.00855522704, 0.00108940771, 0.000328562866, 1.41858457e-05,
+ 0.0341491789, 0.00128003844, 0.00124774351, 0.000256778116, 0.944704089, 0.00815078658},
+ {0.000244636834, 5.95364998e-05, 0.0103025634, 0.0176534311, 0.000417091732, 3.89027835e-05,
+ 0.0347137702, 0.0495173978, 0.000658139356, 0.000481549284, 0.0798548882, 0.80602469},
+ {0.0120953705, 0.000297201253, 0.000336831573, 0.000109810239, 0.0339973245, 0.000295424749,
+ 0.000821327226, 7.0554379e-05, 0.947447668, 0.00298212225, 0.00115060424, 0.000366663193},
+ {0.00202072785, 0.0061102332, 0.000138730316, 8.82424379e-05, 0.00783346136, 0.0193899073,
+ 0.000128803697, 5.1853045e-05, 0.0145084431, 0.948899725, 0.000308586587, 0.00049984933},
+ {0.0362076945, 3.82567383e-05, 0.00699047085, 0.000843806016, 0.00025900091, 2.74289133e-05,
+ 0.0275479876, 0.00133679409, 0.00089127495, 0.000135756189, 0.918742407, 0.00694931573},
+ {0.000148143946, 5.09628138e-05, 0.00492091587, 0.00825067123, 3.34491844e-05, 1.52215475e-05,
+ 0.0185009947, 0.0282098475, 0.000211194, 0.000843725462, 0.0534485418, 0.885335084},
+ {0.0233179774, 0.000994145372, 0.000594429081, 3.30759279e-05, 0.061000414, 0.00042148067,
+ 0.00179973978, 7.98899e-05, 0.906095769, 0.00277286156, 0.00269906625, 0.000153458766},
+ {0.00252578813, 0.0058253842, 2.66040065e-05, 8.55149972e-05, 0.00783559799, 0.020356316,
+ 0.000125999422, 9.67021798e-05, 0.0145272461, 0.948105104, 0.000231890377, 0.000228741056},
+ {0.000344291837, 1.71744233e-05, 0.0093318888, 0.00101109441, 0.000518176918, 2.49752238e-05,
+ 0.0364757971, 0.00118973194, 0.00131474596, 0.000128603181, 0.942187364, 0.00743159244},
+ {0.0286354873, 0.000348696974, 0.00669477573, 0.00817040337, 0.00263533574, 0.000832163375,
+ 0.0187454532, 0.0241172554, 0.0272027554, 0.0261244204, 0.0786962737, 0.777776291}},
+
+ {// branchPmf
+ {0.337545783, 0.000189833088, 0.000189833088, 0.000189833088, 0.167897832, 0.000189833088,
+ 0.000189833088, 0.000189833088, 0.491898721, 0.000189833088, 0.000189833088, 0.000189833088},
+ {0.000455211501, 0.10721842, 0.000455211501, 0.000455211501, 0.000455211501, 0.0880290507,
+ 0.000455211501, 0.000455211501, 0.000455211501, 0.798379568, 0.000455211501, 0.000455211501},
+ {0.000139076929, 0.000139076929, 0.277526154, 0.000139076929, 0.000139076929, 0.000139076929,
+ 0.164738707, 0.000139076929, 0.000139076929, 0.000139076929, 0.555788062, 0.000139076929},
+ {0.000340988677, 0.000340988677, 0.000340988677, 0.107667327, 0.000340988677, 0.000340988677,
+ 0.000340988677, 0.11481587, 0.000340988677, 0.000340988677, 0.000340988677, 0.772742962},
+ {0.15792522, 4.71456125e-05, 4.71456125e-05, 4.71456125e-05, 0.141752834, 4.71456125e-05,
+ 4.71456125e-05, 4.71456125e-05, 0.699661908, 4.71456125e-05, 4.71456125e-05, 4.71456125e-05},
+ {8.48749074e-05, 0.0356794691, 8.47792483e-05, 8.47792483e-05, 8.50080149e-05, 0.0463288295,
+ 8.47792483e-05, 8.47792483e-05, 8.85882375e-05, 0.916800659, 8.47792483e-05, 8.47792483e-05},
+ {7.09844065e-05, 7.09844065e-05, 0.228163146, 7.09844065e-05, 7.09844065e-05, 7.09844065e-05,
+ 0.125258165, 7.09844065e-05, 7.09844065e-05, 7.09844065e-05, 0.645584908, 7.09844065e-05},
+ {0.000294417195, 0.000294417195, 0.000294417195, 0.0762864785, 0.000294417195, 0.000294417195,
+ 0.000294417195, 0.115945928, 0.000294417195, 0.000294417195, 0.000294417195, 0.803645753},
+ {0.116533833, 5.79322675e-05, 5.79322675e-05, 5.79322675e-05, 0.0942835568, 5.79322675e-05,
+ 5.79322675e-05, 5.79322675e-05, 0.788371558, 5.79322675e-05, 5.79322675e-05, 5.79322675e-05},
+ {0.000156684083, 0.0527691682, 0.000156684083, 0.000156684083, 0.000156684083, 0.0520229677,
+ 0.000156684083, 0.000156684083, 0.000156684083, 0.893014287, 0.000156684083, 0.000156684083},
+ {0.000167018487, 0.000165322717, 0.312421562, 0.000165322717, 0.000165401501, 0.000165322717,
+ 0.133726114, 0.000165322717, 0.000165734487, 0.000165322717, 0.55153562, 0.000165322717},
+ {0.000203462057, 0.000203462057, 0.000203462057, 0.0471982179, 0.000203462057, 0.000203462057,
+ 0.000203462057, 0.049201974, 0.000203462057, 0.000203462057, 0.000203462057, 0.900751339},
+ {0.119585426, 6.14072028e-05, 6.14072028e-05, 6.14072028e-05, 0.116129123, 6.14072028e-05,
+ 6.14072028e-05, 6.14072028e-05, 0.76342575, 6.14072028e-05, 6.14072028e-05, 6.14072028e-05},
+ {0.000110720498, 0.0349410744, 0.000110720498, 0.000110720498, 0.000110720498, 0.0453942421,
+ 0.000110720498, 0.000110720498, 0.000110720498, 0.918114596, 0.000110720498, 0.000110720498},
+ {7.09455464e-05, 7.09455464e-05, 0.186570455, 7.09455464e-05, 7.09455464e-05, 7.09455464e-05,
+ 0.11383208, 7.09455464e-05, 7.09455464e-05, 7.09455464e-05, 0.698604227, 7.09455464e-05},
+ {5.44400066e-05, 4.58781747e-05, 4.58781747e-05, 0.0513058841, 4.58791482e-05, 4.58781747e-05,
+ 4.58781747e-05, 0.081923538, 4.95222541e-05, 4.58781747e-05, 4.58781747e-05, 0.866116077}},
+
+ {// stickPmf
+ {0.000154038684, 0.0291154091, 0.423444453, 0.0352058932, 0.000154038684, 0.0176086145,
+ 0.149856392, 0.0151919119, 0.000154038684, 0.0916984378, 0.168808535, 0.067838043},
+ {0.13839578, 9.92440537e-05, 0.329722566, 0.0318555909, 0.0371088704, 9.92440537e-05,
+ 0.148457457, 0.0195393534, 0.112987247, 9.92440537e-05, 0.103583418, 0.0775557653},
+ {0.39242986, 0.0361688498, 0.00033884198, 0.138030777, 0.0355767848, 0.0102554086,
+ 0.00033884198, 0.054319306, 0.0665562733, 0.0785778949, 0.00033884198, 0.185374109},
+ {0.265583401, 0.0323250154, 0.267620652, 0.000144990997, 0.0553401869, 0.0100214732,
+ 0.12586624, 0.000144990997, 0.081077577, 0.0457693156, 0.115236212, 0.000144990997},
+ {8.86069101e-05, 0.0224045604, 0.391863179, 0.0374698777, 8.86069101e-05, 0.014879736,
+ 0.127064528, 0.0148023081, 8.86069101e-05, 0.199043674, 0.127110897, 0.0646523839},
+ {0.132279199, 5.64661147e-05, 0.321806183, 0.0323685244, 0.0725530116, 4.63524839e-05,
+ 0.160579431, 0.0137370025, 0.108005126, 4.69523474e-05, 0.116162938, 0.0421336554},
+ {0.320519663, 0.0239698473, 0.00011228542, 0.0412339441, 0.124873594, 0.0131178903,
+ 0.00011228542, 0.0214946836, 0.139243924, 0.223216618, 0.00011228542, 0.091431553},
+ {0.236440465, 0.0125729978, 0.174982446, 0.000114967022, 0.072742801, 0.0157082833,
+ 0.107504931, 0.000114967022, 0.0812575292, 0.199021401, 0.0988494102, 0.000114967022},
+ {0.000172377278, 0.0252780963, 0.441347929, 0.0431573519, 0.000172377278, 0.0109779693,
+ 0.122083706, 0.01370133, 0.000172377278, 0.112156842, 0.126224513, 0.103693244},
+ {0.11616354, 4.4550521e-05, 0.38730196, 0.0316267288, 0.048597637, 4.4550521e-05, 0.217316542,
+ 0.0147582454, 0.0443979757, 4.4550521e-05, 0.0941246042, 0.0453563631},
+ {0.351437251, 0.0237786623, 0.00123104756, 0.0609100017, 0.132002208, 0.0127583294,
+ 0.000288446433, 0.0203701885, 0.1316581, 0.080542757, 0.000252892079, 0.18353973},
+ {0.298492049, 0.0269851185, 0.29827165, 0.000200099143, 0.102490322, 0.0117983684, 0.121179733,
+ 0.000200099143, 0.0770158895, 0.0380659958, 0.0241000812, 0.000200099143},
+ {0.000141627536, 0.01777018, 0.301881108, 0.0226933622, 0.000141627536, 0.0109104656,
+ 0.114187821, 0.0163453226, 0.000141627536, 0.102595478, 0.208037711, 0.204445531},
+ {0.132859004, 7.16119861e-05, 0.2406106, 0.0231724181, 0.057038948, 7.16119861e-05,
+ 0.0925183577, 0.0141944935, 0.0659598334, 7.16119861e-05, 0.125422694, 0.247650755},
+ {0.355614563, 0.0234102543, 0.000187722927, 0.0564654303, 0.109173536, 0.0154941999,
+ 0.000187722927, 0.0155496293, 0.106690121, 0.0539175558, 0.000187722927, 0.262182928},
+ {0.152511749, 0.0105044023, 0.228700197, 9.13331245e-05, 0.0553648837, 0.00620728656,
+ 0.191510525, 7.33906221e-05, 0.048230451, 0.0444486569, 0.261928846, 7.37812688e-05}}};
+
+constexpr double transProbs[16][3][4] = {
+ // Fit for context: AA
+ {{-11.8402163277545, 4.12000773672109, -0.661725654820908, 0.0337357107347639},
+ {-3.10509518253287, 0.275600076181856, -0.101980967380149, 0.00728037713082447},
+ {8.79817734479091, -4.80619827278369, 0.668699377698361, -0.0323014351665758}},
+ // Fit for context: AC
+ {{3.76105501880563, -3.52354687054574, 0.518155785867385, -0.0259189282231727},
+ {1.14984691288476, -1.86148610164711, 0.282410704404651, -0.0137482001748907},
+ {8.87581132716677, -6.06418672877366, 0.934174708128712, -0.047710831271599}},
+ // Fit for context: AG
+ {{-3.5155637685456, 0.445157302722387, -0.0895426643665516, 0.00489266836530986},
+ {-2.13069399948839, -0.350350414444882, 0.00136462130313272, 0.000872314444289463},
+ {8.18160747215397, -4.75181353430953, 0.663320066629866, -0.0312212296436783}},
+ // Fit for context: AT
+ {{-0.791966407223805, -1.1076552938185, 0.121601533987201, -0.00531175337455805},
+ {-0.881444261440707, -0.731399892071115, 0.05057749339712, 9.31173789284391e-05},
+ {1.41263899033538, -2.13869979708684, 0.29554287522677, -0.0139759898505845}},
+ // Fit for context: CA
+ {{-3.26897975135237, 0.486468075687521, -0.0760370712635174, 0.00388977870825071},
+ {-1.35534562252107, -0.638577072165448, 0.0728422429297584, -0.0017528458186216},
+ {1.44591127557233, -1.35648438299165, 0.109211078325847, -0.00258389758690641}},
+ // Fit for context: CC
+ {{-3.06858725937395, 0.180650646816408, -0.0144155236738455, -0.000689002527617641},
+ {-6.84579191173796, 2.25516178646427, -0.339242808398693, 0.0163235455182541},
+ {0.960878124005925, -1.79437153365345, 0.26563747809023, -0.0132138048710256}},
+ // Fit for context: CG
+ {{-2.94152963474479, 0.229999854263155, -0.0481872379951125, 0.00285101023017463},
+ {0.869169888335733, -1.64839041486039, 0.219451593119437, -0.00974508815102731},
+ {3.06149815060729, -2.70884449310353, 0.353919865598604, -0.0152760395255156}},
+ // Fit for context: CT
+ {{0.713137277702561, -1.70038938696478, 0.195497764255509, -0.006277117941137},
+ {0.547238286104262, -1.18712111898372, 0.10016377902078, 0.000101142585729921},
+ {12.8142966669109, -7.60356150884139, 1.14279549736876, -0.0556464549926148}},
+ // Fit for context: GA
+ {{-4.04796366671756, 0.995050162374295, -0.176460865193268, 0.0098125020081521},
+ {-2.84979586974211, -0.0156903140508224, -0.0289509861158852, 0.00249431280742574},
+ {3.58059555038287, -2.82647199241841, 0.361847788449807, -0.0165539653343998}},
+ // Fit for context: GC
+ {{-0.349844213146088, -1.43550809248254, 0.203825919576521, -0.00916693940279327},
+ {1.63175479730304, -2.09434440700094, 0.344467568084574, -0.0177539882113115},
+ {-0.575918177053267, -1.5981774767604, 0.220590706766665, -0.0105341908671507}},
+ // Fit for context: GG
+ {{-2.7885490831891, 0.0219683824222214, -0.0434630049972039, 0.00405333456781744},
+ {1.88946633862993, -2.07810257475806, 0.259413174779754, -0.0120217530655737},
+ {1.69364885624261, -1.79573141305228, 0.241169169566732, -0.0114709227454174}},
+ // Fit for context: GT
+ {{-3.5677104673935, 0.358289552435854, -0.0789736924123636, 0.0036044786219107},
+ {-6.64088518072203, 2.07297640845549, -0.400256333112317, 0.0235022424593019},
+ {4.80925899732482, -3.74080441862834, 0.510449289512398, -0.0219234011000643}},
+ // Fit for context: TA
+ {{-2.64069119760436, 0.38549684716509, -0.0816358329471849, 0.00533873493972419},
+ {1.9113212271993, -2.61336018392704, 0.462996555125255, -0.0271165824050175},
+ {0.238605182625403, -0.457118176718339, -0.0792840976069514, 0.00934498149921929}},
+ // Fit for context: TC
+ {{-0.392997656904618, -0.971016641524484, 0.103185513110187, -0.00226499422687784},
+ {-5.41259348922946, 1.37988834568899, -0.223433285481649, 0.0121196778796477},
+ {0.756454968716176, -2.18832755353204, 0.294407264252205, -0.0125416902322882}},
+ // Fit for context: TG
+ {{-3.13537076489306, 0.356196913405038, -0.0801078852726234, 0.00556755944444425},
+ {0.0417901722734827, -1.44128792816831, 0.193288926462961, -0.00976454902739811},
+ {2.9482632980712, -2.38567585927352, 0.272478951901359, -0.0102457478965403}},
+ // Fit for context: TT
+ {{-2.23386221240998, -0.150165813792347, 0.0503325945637299, -0.00444264897626409},
+ {1.66643125707819, -2.11840547506457, 0.345196264109573, -0.0190747089705634},
+ {-1.9903454046927, -0.489297962270134, 0.0503913801733079, -0.00174698908180932}}};
+
+inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t row,
+ const bool secondMoment)
+{
+ double expectedLL = 0;
+ for (size_t i = 0; i < OUTCOME_NUMBER; i++) {
+ double curProb = emissionPmf[index][row][i];
+ double lgCurProb = std::log(curProb);
+ if (!secondMoment) {
+ expectedLL += curProb * lgCurProb;
+ } else {
+ expectedLL += curProb * pow(lgCurProb, 2.0);
+ }
+ }
+ return expectedLL;
+}
+
+double S_P1C1v1_Model::ExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t prev,
+ const uint8_t curr,
+ const bool secondMoment) const
+{
+ const auto row = (prev << 2) | curr;
+ const auto moment = secondMoment ? 1 : 0;
+ return cachedEmissionExpectations_[row][index][moment];
+}
+
+S_P1C1v1_Model::S_P1C1v1_Model(const SNR& snr) : snr_(snr)
+{
+ // Generate cached transistion probabilities
+
+ double snr1 = snr_.A;
+ // Adjust as necessary to clamp to bounds of SNRs observe during training
+ if (snr1 < 4.0) {
+ snr1 = 4.0;
+ } else if (snr1 > 10.65) {
+ snr1 = 10.65;
+ }
+
+ const double snr2 = snr1 * snr1, snr3 = snr2 * snr1;
+ for (int ctx = 0; ctx < CONTEXT_NUMBER; ++ctx) {
+ double sum = 1.0;
+ ctxTrans_[ctx][0] = 1.0;
+ for (size_t j = 0; j < 3; ++j) {
+ double xb = transProbs[ctx][j][0] + snr1 * transProbs[ctx][j][1] +
+ snr2 * transProbs[ctx][j][2] + snr3 * transProbs[ctx][j][3];
+ xb = std::exp(xb);
+ ctxTrans_[ctx][j + 1] = xb;
+ sum += xb;
+ }
+ for (size_t j = 0; j < 4; ++j)
+ ctxTrans_[ctx][j] /= sum;
+ }
+
+ // Generate cached emission expectations
+ // TODO: These are identical for all instances, either we should enrich the model or avoid doing
+ // this in a context dependent way
+ for (int ctx = 0; ctx < CONTEXT_NUMBER; ctx++) {
+ for (int index = 0; index < 3; index++) {
+ cachedEmissionExpectations_[ctx][index][0] =
+ CalculateExpectedLogLikelihoodOfOutcomeRow(index, ctx, false);
+ cachedEmissionExpectations_[ctx][index][1] =
+ CalculateExpectedLogLikelihoodOfOutcomeRow(index, ctx, true);
+ }
+ }
+}
+
+std::unique_ptr<AbstractRecursor> S_P1C1v1_Model::CreateRecursor(
+ std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
+{
+ return std::unique_ptr<AbstractRecursor>(
+ new S_P1C1v1_Recursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
+}
+
+std::vector<TemplatePosition> S_P1C1v1_Model::Populate(const std::string& tpl) const
+{
+ std::vector<TemplatePosition> result;
+
+ if (tpl.empty()) return result;
+
+ result.reserve(tpl.size());
+
+ // Calculate probabilities in all 16 Contexts
+ uint8_t prev = detail::TranslationTable[static_cast<uint8_t>(tpl[0])];
+ if (prev > 3) throw std::invalid_argument("invalid character in template!");
+
+ for (size_t i = 1; i < tpl.size(); ++i) {
+ const uint8_t curr = detail::TranslationTable[static_cast<uint8_t>(tpl[i])];
+ if (curr > 3) throw std::invalid_argument("invalid character in template!");
+ const auto row = (prev << 2) | curr;
+ const auto params = ctxTrans_[row];
+ result.emplace_back(TemplatePosition{
+ tpl[i - 1], prev,
+ params[0], // match
+ params[1], // branch
+ params[2], // stick
+ params[3] // deletion
+ });
+ prev = curr;
+ }
+ result.emplace_back(TemplatePosition{tpl.back(), prev, 1.0, 0.0, 0.0, 0.0});
+
+ return result;
+}
+
+double S_P1C1v1_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev, curr,
+ secondMoment);
+}
+double S_P1C1v1_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev, curr,
+ secondMoment);
+}
+double S_P1C1v1_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev, curr,
+ secondMoment);
+}
+
+S_P1C1v1_Recursor::S_P1C1v1_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
+ double scoreDiff)
+ : Recursor<S_P1C1v1_Recursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
+ scoreDiff)
+{
+}
+
+std::vector<uint8_t> S_P1C1v1_Recursor::EncodeRead(const MappedRead& read)
+{
+ std::vector<uint8_t> result;
+
+ result.reserve(read.Length());
+
+ for (size_t i = 0; i < read.Length(); ++i) {
+ if (read.PulseWidth[i] < 1U) throw std::runtime_error("invalid PulseWidth in read!");
+ const uint8_t pw = std::min(2, read.PulseWidth[i] - 1);
+ const uint8_t bp = detail::TranslationTable[static_cast<uint8_t>(read.Seq[i])];
+ if (bp > 3) throw std::invalid_argument("invalid character in read!");
+ const uint8_t em = (pw << 2) | bp;
+ if (em > 11) throw std::runtime_error("read encoding error!");
+ result.emplace_back(em);
+ }
+
+ return result;
+}
+
+double S_P1C1v1_Recursor::EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr)
+{
+ assert(move != MoveType::DELETION);
+ const auto row = (prev << 2) | curr;
+ return emissionPmf[static_cast<uint8_t>(move)][row][emission] * kCounterWeight;
+}
+
+double S_P1C1v1_Recursor::UndoCounterWeights(const size_t nEmissions) const
+{
+ return -std::log(kCounterWeight) * nEmissions;
+}
+} // namespace anonymous
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/models/S_P1C1v2_Model.cpp b/src/models/S_P1C1v2_Model.cpp
new file mode 100644
index 0000000..61e16ee
--- /dev/null
+++ b/src/models/S_P1C1v2_Model.cpp
@@ -0,0 +1,422 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Lance Hepler
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <stdexcept>
+
+#include <pacbio/consensus/ModelConfig.h>
+#include <pacbio/consensus/Read.h>
+
+#include "../ModelFactory.h"
+#include "../Recursor.h"
+
+namespace PacBio {
+namespace Consensus {
+namespace {
+
+template <typename T>
+inline T clip(const T val, const T range[2])
+{
+ return std::max(range[0], std::min(val, range[1]));
+}
+
+constexpr double kCounterWeight = 20.0;
+constexpr size_t nOutcomes = 12;
+constexpr size_t nContexts = 16;
+
+class S_P1C1v2_Model : public ModelConfig
+{
+ REGISTER_MODEL(S_P1C1v2_Model);
+
+public:
+ static std::set<std::string> Names() { return {"S/P1-C1.2"}; }
+ S_P1C1v2_Model(const SNR& snr);
+ std::unique_ptr<AbstractRecursor> CreateRecursor(std::unique_ptr<AbstractTemplate>&& tpl,
+ const MappedRead& mr, double scoreDiff) const;
+ std::vector<TemplatePosition> Populate(const std::string& tpl) const;
+ double ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+ double ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const;
+
+private:
+ SNR snr_;
+ double ctxTrans_[nContexts][4];
+ double cachedEmissionExpectations_[nContexts][3][2];
+ double ExpectedLogLikelihoodOfOutcomeRow(const size_t move, const uint8_t prev,
+ const uint8_t curr, const bool secondMoment) const;
+};
+
+REGISTER_MODEL_IMPL(S_P1C1v2_Model);
+
+// TODO(lhepler) comments regarding the CRTP
+class S_P1C1v2_Recursor : public Recursor<S_P1C1v2_Recursor>
+{
+public:
+ S_P1C1v2_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
+ double scoreDiff);
+ static inline std::vector<uint8_t> EncodeRead(const MappedRead& read);
+ static inline double EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr);
+ virtual double UndoCounterWeights(size_t nEmissions) const;
+};
+
+constexpr double snrRanges[4][2] = {{4.001438, 9.300400}, // A
+ {7.132999, 18.840239}, // C
+ {4.017619, 9.839173}, // G
+ {5.553696, 15.040482}}; // T
+
+constexpr double emissionPmf[3][nContexts][nOutcomes] = {
+ {// matchPmf
+ {0.0538934269, 0.00145009847, 0.000903540653, 5.61451444e-05, 0.0717385523, 0.00138207722,
+ 0.000983663388, 2.98716179e-05, 0.863881703, 0.00436234248, 0.00108967626, 0.000162001338},
+ {0.00440213807, 0.0125860226, 7.60499349e-05, 2.11752707e-05, 0.0153821856, 0.0427745313,
+ 0.000264941701, 0.000280821504, 0.0178753594, 0.905210325, 0.000105142401, 0.000939700363},
+ {0.000540246421, 0.000218767963, 0.01836689, 0.00340121939, 0.000730234523, 3.4737868e-05,
+ 0.0772714471, 0.00471209957, 0.000843144059, 0.000588970797, 0.870275654, 0.0229348618},
+ {0.000111108679, 0.000203758415, 0.00804596969, 0.019115043, 0.000549249541, 9.80612339e-05,
+ 0.0344882713, 0.051657787, 0.000221063495, 0.000465020805, 0.0610234598, 0.823952168},
+ {0.0202789584, 0.00148903221, 0.000597163411, 0.000259954647, 0.064020036, 0.00104431313,
+ 0.00124614496, 0.00013358629, 0.905761027, 0.0029572048, 0.00200379219, 0.000143327084},
+ {0.0402724632, 0.0128518475, 6.42501609e-05, 2.72583767e-05, 0.0132786761, 0.0373069841,
+ 5.29716119e-05, 9.14398755e-05, 0.0162147808, 0.879564809, 0.000141573402, 5.9945642e-05},
+ {0.000201881601, 4.29369365e-05, 0.00728725582, 0.00149461331, 0.000409273215, 0.000167745802,
+ 0.0346508126, 0.00200385599, 0.000794645188, 0.000287314863, 0.925552349, 0.0270401396},
+ {0.000133065007, 7.1321816e-05, 0.00958536688, 0.0187994622, 0.00028633053, 9.12768574e-05,
+ 0.033314986, 0.0553368214, 0.000384136098, 0.000731762669, 0.0598679875, 0.821318207},
+ {0.0106324794, 0.00067658496, 0.000409726657, 8.44568278e-05, 0.0352580824, 0.000480117964,
+ 0.000394468725, 3.89470939e-05, 0.946170299, 0.00473155911, 0.000572506557, 0.000477970619},
+ {0.00197964334, 0.0054988032, 0.000105200025, 1.84904847e-05, 0.00740934367, 0.0233884094,
+ 0.000111958009, 5.9308563e-05, 0.0117473508, 0.949256624, 0.000233843345, 0.000134414698},
+ {0.03469053, 2.12215021e-05, 0.00596709145, 0.000976029849, 0.000399217726, 0.000137996197,
+ 0.0300521512, 0.00181745646, 0.000422486611, 0.000438039184, 0.901604554, 0.0233980917},
+ {0.000413418726, 3.87097781e-05, 0.0051991451, 0.00888802651, 7.01912149e-05, 1.93669472e-05,
+ 0.0190797047, 0.0304401645, 0.000143986073, 0.00102353111, 0.0393876177, 0.895213831},
+ {0.0206986587, 0.00139920374, 0.000577718694, 6.30728439e-05, 0.0638528341, 0.000787266592,
+ 0.000963775424, 7.07162141e-05, 0.904225376, 0.00531211609, 0.00172552057, 0.000220755728},
+ {0.002398519, 0.00597008512, 6.42789108e-05, 4.3328933e-05, 0.00712968535, 0.0222720582,
+ 0.000281381205, 2.45760734e-05, 0.0109258848, 0.950205967, 9.46913182e-05, 0.000517544635},
+ {0.000304324236, 2.30017401e-05, 0.00799304005, 0.00141347855, 0.000168190169, 1.64084167e-05,
+ 0.0379674798, 0.0021998025, 0.000906751406, 0.000542188155, 0.92718794, 0.0212130746},
+ {0.0280311711, 0.00051862335, 0.00593624703, 0.0083684267, 0.00324521337, 0.000994877985,
+ 0.0173524216, 0.0263269812, 0.0279441, 0.0276693067, 0.0653178223, 0.788237005}},
+ {// branchPmf
+ {0.305613932, 0.000558996368, 0.000558996368, 0.000558996368, 0.144283703, 0.000558996368,
+ 0.000558996368, 0.000558996368, 0.542276416, 0.000558996368, 0.000558996368, 0.000558996368},
+ {0.000912790048, 0.0706793677, 0.000912790048, 0.000912790048, 0.000912790048, 0.0660077837,
+ 0.000912790048, 0.000912790048, 0.000912790048, 0.850533788, 0.000912790048, 0.000912790048},
+ {0.000370148971, 0.000370148971, 0.263701996, 0.000370148971, 0.000370148971, 0.000370148971,
+ 0.136580197, 0.000370148971, 0.000370148971, 0.000370148971, 0.594535721, 0.000370148971},
+ {0.000992671062, 0.000992671062, 0.000992671062, 0.0903402269, 0.000992671062, 0.000992671062,
+ 0.000992671062, 0.116624552, 0.000992671062, 0.000992671062, 0.000992671062, 0.779137827},
+ {0.153053079, 0.000140879113, 0.000140879113, 0.000140879113, 0.129535583, 0.000140879113,
+ 0.000140879113, 0.000140879113, 0.71543903, 0.000140879113, 0.000140879113, 0.000140879113},
+ {0.000273593053, 0.0390475313, 0.000273026027, 0.000273026027, 0.0002731413, 0.0609617098,
+ 0.000273026027, 0.000273026027, 0.00027418863, 0.89616655, 0.000273026027, 0.000273026027},
+ {0.00024935686, 0.00024935686, 0.224134214, 0.00024935686, 0.00024935686, 0.00024935686,
+ 0.119989707, 0.00024935686, 0.00024935686, 0.00024935686, 0.652385082, 0.00024935686},
+ {0.000703357057, 0.000703357057, 0.000703357057, 0.0774320717, 0.000703357057, 0.000703357057,
+ 0.000703357057, 0.0881611725, 0.000703357057, 0.000703357057, 0.000703357057, 0.824559757},
+ {0.120522857, 0.000164103067, 0.000164103067, 0.000164103067, 0.0827988704, 0.000164103067,
+ 0.000164103067, 0.000164103067, 0.79438083, 0.000164103067, 0.000164103067, 0.000164103067},
+ {0.000324650526, 0.0436389616, 0.000324650526, 0.000324650526, 0.000324650526, 0.0610063298,
+ 0.000324650526, 0.000324650526, 0.000324650526, 0.890809601, 0.000324650526, 0.000324650526},
+ {0.00044702111, 0.000447020831, 0.298059864, 0.000447020831, 0.000447537949, 0.000447020831,
+ 0.0757672197, 0.000447020831, 0.000447039189, 0.000447020831, 0.619914089, 0.000447020831},
+ {0.000460428661, 0.000460428661, 0.000460428661, 0.0445651204, 0.000460428661, 0.000460428661,
+ 0.000460428661, 0.0472083803, 0.000460428661, 0.000460428661, 0.000460428661, 0.901780498},
+ {0.129409727, 0.000180205845, 0.000180205845, 0.000180205845, 0.110681588, 0.000180205845,
+ 0.000180205845, 0.000180205845, 0.757385803, 0.000180205845, 0.000180205845, 0.000180205845},
+ {0.000237587758, 0.0323776588, 0.000237587758, 0.000237587758, 0.000237587758, 0.0435983253,
+ 0.000237587758, 0.000237587758, 0.000237587758, 0.920697787, 0.000237587758, 0.000237587758},
+ {0.000188825576, 0.000188825576, 0.166170812, 0.000188825576, 0.000188825576, 0.000188825576,
+ 0.0914469645, 0.000188825576, 0.000188825576, 0.000188825576, 0.739738666, 0.000188825576},
+ {0.000120613319, 0.000119901068, 0.000119901068, 0.0411298523, 0.00011990536, 0.000119901068,
+ 0.000119901068, 0.0701345987, 0.000119982783, 0.000119901068, 0.000119901068, 0.887056136}},
+ {// stickPmf
+ {0.000572145812, 0.0249470981, 0.444128106, 0.0284293982, 0.000572145812, 0.00410024964,
+ 0.126982064, 0.0141199516, 0.000572145812, 0.0811671877, 0.200116148, 0.0714326301},
+ {0.157990255, 0.00032657149, 0.325814692, 0.0288899281, 0.0375283566, 0.00032657149,
+ 0.132842697, 0.0224909505, 0.0879452518, 0.00032657149, 0.115500492, 0.0883848042},
+ {0.358522418, 0.0235568496, 0.000864084339, 0.12651633, 0.052549674, 0.00798327989,
+ 0.000864084339, 0.0533075108, 0.101824542, 0.0657216859, 0.000864084339, 0.203105036},
+ {0.327657509, 0.0162688106, 0.283895663, 0.000488837542, 0.0404076842, 0.00606060934,
+ 0.121371198, 0.000488837542, 0.073572133, 0.0352442565, 0.0916114349, 0.000488837542},
+ {0.000292823503, 0.020723219, 0.422716082, 0.037557393, 0.000292823503, 0.00878801277,
+ 0.124731093, 0.00806155344, 0.000292823503, 0.225808232, 0.102362669, 0.0469091586},
+ {0.138184081, 0.000176275107, 0.353691494, 0.0366734007, 0.0669928396, 0.000160372684,
+ 0.152689688, 0.0113283557, 0.0933947819, 0.000164997607, 0.112044223, 0.0337002216},
+ {0.376363111, 0.0147959899, 0.000406742121, 0.0326187747, 0.124202612, 0.00485744034,
+ 0.000406742121, 0.0134983573, 0.154095152, 0.172573194, 0.000406742121, 0.103741432},
+ {0.262107723, 0.0139625679, 0.198087107, 0.000317282509, 0.0764115908, 0.00637190626,
+ 0.0728789745, 0.000317282509, 0.104982564, 0.168668474, 0.0939908336, 0.000317282509},
+ {0.000568426528, 0.023701166, 0.506335229, 0.0366977326, 0.000568426528, 0.0181752864,
+ 0.100905875, 0.0188386785, 0.000568426528, 0.103148426, 0.104347954, 0.0833022395},
+ {0.122032869, 0.000140837724, 0.364441855, 0.0318879599, 0.0573868815, 0.000140837724,
+ 0.218659155, 0.0182715104, 0.0477966054, 0.000140837724, 0.0787912025, 0.0596052591},
+ {0.396405225, 0.016157559, 0.0022109107, 0.0436250361, 0.107726515, 0.0146690405,
+ 0.00100899286, 0.00886596365, 0.127007802, 0.0802697981, 0.000874635434, 0.196943448},
+ {0.290414877, 0.0177202936, 0.298960183, 0.000526686986, 0.092257921, 0.00700004966,
+ 0.131147972, 0.000526686986, 0.0859777319, 0.0271600613, 0.0451474144, 0.000526686986},
+ {0.000428202504, 0.0191583409, 0.293741471, 0.0182063946, 0.000428202504, 0.0180388612,
+ 0.128372288, 0.0257390091, 0.000428202504, 0.107512898, 0.199359888, 0.186445229},
+ {0.149686594, 0.000207150428, 0.229916753, 0.0144187336, 0.0651607498, 0.000207150428,
+ 0.0690123099, 0.0156771722, 0.0675998749, 0.000207150428, 0.109695096, 0.277175513},
+ {0.403196746, 0.0177901114, 0.000543576149, 0.0343915039, 0.122395417, 0.00275829252,
+ 0.000543576149, 0.0269263943, 0.0864036291, 0.043786384, 0.000543576149, 0.258002912},
+ {0.142268585, 0.0113274953, 0.228643486, 0.000236113134, 0.0585796342, 0.00333756943,
+ 0.175460415, 0.000247031094, 0.0538439511, 0.0417853213, 0.282875612, 0.00024225267}}};
+
+constexpr double transProbs[nContexts][3][4] = {
+ {// AA
+ {-10.2090406, 3.32117257, -0.520117212, 0.0249921413},
+ {8.9793309, -5.3961519, 0.763613361, -0.0364406433},
+ {7.97224482, -3.91644241, 0.485967707, -0.0210642023}},
+ {// AC
+ {-8.67385002, 1.79692678, -0.189152589, 0.00593875207},
+ {-2.06471994, -0.248833169, 0.0239433129, -0.00081901914},
+ {1.9844226, -0.834246008, 0.0238906298, 0.000299399008}},
+ {// AG
+ {-5.57888183, 0.8609919, -0.0601847875, -0.00209638118},
+ {-0.0651352822, -0.37813552, -0.116036072, 0.0118044463},
+ {10.3924646, -4.87649024, 0.583074371, -0.0230668231}},
+ {// AT
+ {-6.05918189, 1.12045586, -0.175515022, 0.0081588429},
+ {-7.72954717, 1.74329164, -0.228908375, 0.00956968799},
+ {0.739498015, -0.820985799, 0.034285628, 0.000308222633}},
+ {// CA
+ {6.71916863, -4.10669176, 0.61911044, -0.0312872449},
+ {-10.7393976, 3.94632964, -0.664301449, 0.0363137581},
+ {0.534509761, -0.614208207, -0.0230577035, 0.00504754227}},
+ {// CC
+ {-11.8487027, 2.18336459, -0.165744338, 0.00381936993},
+ {-5.40511615, 0.582166777, -0.0352022429, 0.000649948647},
+ {3.62707756, -1.60860162, 0.127123969, -0.00333194156}},
+ {// CG
+ {-5.33620523, 1.30599458, -0.227619416, 0.0129127127},
+ {-8.16537748, 2.82193426, -0.53094601, 0.0319445398},
+ {13.0580439, -6.8013044, 0.926568827, -0.0424609205}},
+ {// CT
+ {12.613656, -5.19052298, 0.561451908, -0.0206707933},
+ {-9.73651127, 2.15521846, -0.225290947, 0.00767974576},
+ {7.16886431, -2.96320727, 0.2726381, -0.00837654404}},
+ {// GA
+ {-3.6938331, 0.483533725, -0.0450473465, 0.000160356592},
+ {6.68949621, -4.66399411, 0.677757326, -0.0318630437},
+ {1.33410944, -0.988217134, -0.00422287463, 0.00562702615}},
+ {// GC
+ {2.66843035, -1.62797424, 0.147491681, -0.00444976878},
+ {1.14199306, -0.843025302, 0.0604509646, -0.00133222978},
+ {5.92824303, -2.33995279, 0.175825931, -0.00449046566}},
+ {// GG
+ {-9.53335176, 2.51765873, -0.318510141, 0.0117320354},
+ {-0.524486685, -0.436816275, -0.0926096178, 0.0112705438},
+ {8.88135834, -4.79762534, 0.664134538, -0.0311007622}},
+ {// GT
+ {3.35914891, -1.38173016, 0.0781044221, -0.000763136872},
+ {10.8476498, -4.75169784, 0.525334182, -0.0193477301},
+ {15.5403876, -5.8956994, 0.604296436, -0.0209965825}},
+ {// TA
+ {5.16029404, -3.68434804, 0.621640606, -0.0350676726},
+ {11.8825308, -7.04640014, 1.07909276, -0.0537996376},
+ {-9.20746548, 4.71271573, -0.951489582, 0.0565839306}},
+ {// TC
+ {2.73609906, -1.3992629, 0.120116348, -0.00349409007},
+ {-4.54672948, 0.344140535, -0.0129698225, -0.000227430524},
+ {0.840535522, -1.10879024, 0.0800657995, -0.00206209905}},
+ {// TG
+ {-6.45925912, 1.63128009, -0.226062028, 0.0099607621},
+ {-8.86160161, 3.12245384, -0.583442174, 0.033990528},
+ {3.46514973, -1.91203442, 0.12119972, 0.00103341906}},
+ {// TT
+ {2.70097834, -1.96301816, 0.246186023, -0.00988186677},
+ {1.80568256, -1.56446979, 0.176114086, -0.00665610868},
+ {1.91982573, -1.50191002, 0.153230363, -0.00550115233}}};
+
+inline double CalculateExpectedLogLikelihoodOfOutcomeRow(const int index, const uint8_t row,
+ const bool secondMoment)
+{
+ double expectedLL = 0;
+ for (size_t i = 0; i < nOutcomes; i++) {
+ double curProb = emissionPmf[index][row][i];
+ double lgCurProb = std::log(curProb);
+ if (!secondMoment)
+ expectedLL += curProb * lgCurProb;
+ else
+ expectedLL += curProb * pow(lgCurProb, 2.0);
+ }
+ return expectedLL;
+}
+
+double S_P1C1v2_Model::ExpectedLogLikelihoodOfOutcomeRow(const size_t move, const uint8_t prev,
+ const uint8_t curr,
+ const bool secondMoment) const
+{
+ const auto row = (prev << 2) | curr;
+ const auto moment = secondMoment ? 1 : 0;
+ return cachedEmissionExpectations_[row][move][moment];
+}
+
+S_P1C1v2_Model::S_P1C1v2_Model(const SNR& snr) : snr_(snr)
+{
+ for (size_t ctx = 0; ctx < nContexts; ++ctx) {
+ const uint8_t bp = ctx & 3; // (equivalent to % 4)
+ const double snr1 = clip(snr_[bp], snrRanges[bp]), snr2 = snr1 * snr1, snr3 = snr2 * snr1;
+ double sum = 1.0;
+
+ // cached transitions
+ ctxTrans_[ctx][0] = 1.0;
+ for (size_t j = 0; j < 3; ++j) {
+ double xb = transProbs[ctx][j][0] + snr1 * transProbs[ctx][j][1] +
+ snr2 * transProbs[ctx][j][2] + snr3 * transProbs[ctx][j][3];
+ xb = std::exp(xb);
+ ctxTrans_[ctx][j + 1] = xb;
+ sum += xb;
+ }
+ for (size_t j = 0; j < 4; ++j)
+ ctxTrans_[ctx][j] /= sum;
+
+ // cached expectations
+ for (size_t move = 0; move < 3; ++move) {
+ cachedEmissionExpectations_[ctx][move][0] =
+ CalculateExpectedLogLikelihoodOfOutcomeRow(move, ctx, false);
+ cachedEmissionExpectations_[ctx][move][1] =
+ CalculateExpectedLogLikelihoodOfOutcomeRow(move, ctx, true);
+ }
+ }
+}
+
+std::unique_ptr<AbstractRecursor> S_P1C1v2_Model::CreateRecursor(
+ std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr, double scoreDiff) const
+{
+ return std::unique_ptr<AbstractRecursor>(
+ new S_P1C1v2_Recursor(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr, scoreDiff));
+}
+
+std::vector<TemplatePosition> S_P1C1v2_Model::Populate(const std::string& tpl) const
+{
+ std::vector<TemplatePosition> result;
+
+ if (tpl.empty()) return result;
+
+ result.reserve(tpl.size());
+
+ // calculate transition probabilities
+ uint8_t prev = detail::TranslationTable[static_cast<uint8_t>(tpl[0])];
+ if (prev > 3) throw std::invalid_argument("invalid character in template!");
+
+ for (size_t i = 1; i < tpl.size(); ++i) {
+ const uint8_t curr = detail::TranslationTable[static_cast<uint8_t>(tpl[i])];
+ if (curr > 3) throw std::invalid_argument("invalid character in template!");
+ const auto row = (prev << 2) | curr;
+ const auto params = ctxTrans_[row];
+ result.emplace_back(TemplatePosition{
+ tpl[i - 1], prev,
+ params[0], // match
+ params[1], // branch
+ params[2], // stick
+ params[3] // deletion
+ });
+ prev = curr;
+ }
+ result.emplace_back(TemplatePosition{tpl.back(), prev, 1.0, 0.0, 0.0, 0.0});
+
+ return result;
+}
+
+double S_P1C1v2_Model::ExpectedLogLikelihoodForMatchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::MATCH), prev, curr,
+ secondMoment);
+}
+double S_P1C1v2_Model::ExpectedLogLikelihoodForStickEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::STICK), prev, curr,
+ secondMoment);
+}
+double S_P1C1v2_Model::ExpectedLogLikelihoodForBranchEmission(uint8_t prev, uint8_t curr,
+ bool secondMoment) const
+{
+ return ExpectedLogLikelihoodOfOutcomeRow(static_cast<uint8_t>(MoveType::BRANCH), prev, curr,
+ secondMoment);
+}
+
+S_P1C1v2_Recursor::S_P1C1v2_Recursor(std::unique_ptr<AbstractTemplate>&& tpl, const MappedRead& mr,
+ double scoreDiff)
+ : Recursor<S_P1C1v2_Recursor>(std::forward<std::unique_ptr<AbstractTemplate>>(tpl), mr,
+ scoreDiff)
+{
+}
+
+std::vector<uint8_t> S_P1C1v2_Recursor::EncodeRead(const MappedRead& read)
+{
+ std::vector<uint8_t> result;
+
+ result.reserve(read.Length());
+
+ for (size_t i = 0; i < read.Length(); ++i) {
+ if (read.PulseWidth[i] < 1U) throw std::runtime_error("invalid PulseWidth in read!");
+ const uint8_t pw = std::min(2, read.PulseWidth[i] - 1);
+ const uint8_t bp = detail::TranslationTable[static_cast<uint8_t>(read.Seq[i])];
+ if (bp > 3) throw std::invalid_argument("invalid character in read!");
+ const uint8_t em = (pw << 2) | bp;
+ if (em > 11) throw std::runtime_error("read encoding error!");
+ result.emplace_back(em);
+ }
+
+ return result;
+}
+
+double S_P1C1v2_Recursor::EmissionPr(MoveType move, uint8_t emission, uint8_t prev, uint8_t curr)
+{
+ assert(move != MoveType::DELETION);
+ const auto row = (prev << 2) | curr;
+ return emissionPmf[static_cast<uint8_t>(move)][row][emission] * kCounterWeight;
+}
+
+double S_P1C1v2_Recursor::UndoCounterWeights(const size_t nEmissions) const
+{
+ return -std::log(kCounterWeight) * nEmissions;
+}
+} // namespace anonymous
+} // namespace Consensus
+} // namespace PacBio
diff --git a/src/poa/PoaConsensus.cpp b/src/poa/PoaConsensus.cpp
index 37cba74..f8b914a 100644
--- a/src/poa/PoaConsensus.cpp
+++ b/src/poa/PoaConsensus.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/poa/PoaGraph.cpp b/src/poa/PoaGraph.cpp
index bdd0664..b3e80d1 100644
--- a/src/poa/PoaGraph.cpp
+++ b/src/poa/PoaGraph.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/poa/PoaGraphImpl.cpp b/src/poa/PoaGraphImpl.cpp
index 23fcc4b..83c2c4b 100644
--- a/src/poa/PoaGraphImpl.cpp
+++ b/src/poa/PoaGraphImpl.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <set>
diff --git a/src/poa/PoaGraphImpl.h b/src/poa/PoaGraphImpl.h
index c05b78f..2662f2e 100644
--- a/src/poa/PoaGraphImpl.h
+++ b/src/poa/PoaGraphImpl.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/src/poa/PoaGraphTraversals.cpp b/src/poa/PoaGraphTraversals.cpp
index fe8dcd0..bf91a64 100644
--- a/src/poa/PoaGraphTraversals.cpp
+++ b/src/poa/PoaGraphTraversals.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
diff --git a/src/poa/RangeFinder.cpp b/src/poa/RangeFinder.cpp
index c63b6e3..48afba1 100644
--- a/src/poa/RangeFinder.cpp
+++ b/src/poa/RangeFinder.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include <algorithm>
#include <map>
diff --git a/src/poa/VectorL.h b/src/poa/VectorL.h
index bdf3007..cb24f9e 100644
--- a/src/poa/VectorL.h
+++ b/src/poa/VectorL.h
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#pragma once
diff --git a/swig/ConsensusCore2.i.in b/swig/ConsensusCore2.i.in
index f76469e..717e2ec 100644
--- a/swig/ConsensusCore2.i.in
+++ b/swig/ConsensusCore2.i.in
@@ -44,15 +44,27 @@ namespace std
%template(IntPair) std::pair<int, int>;
%template(LengthPair) std::pair<size_t, size_t>;
+ %apply const pair<double, double>& {pair<double, double>*};
+ %apply const pair<int, int>& {pair<int, int>*};
+ %apply const pair<size_t, size_t>& {pair<size_t, size_t>*};
+
// set types
%template(StringSet) std::set<std::string>;
+ %apply const set<string>& {set<string>*};
+
// vector types
%template(DoubleVector) std::vector<double>;
%template(IntPairVector) std::vector<std::pair<int, int> >;
%template(IntVector) std::vector<int>;
%template(StringVector) std::vector<string>;
%template(Uint8Vector) std::vector<uint8_t>;
+
+ %apply const vector<double>& {vector<double>*};
+ %apply const vector<pair<int, int> >& {vector<pair<int, int> >*};
+ %apply const vector<int>& {vector<int>*};
+ %apply const vector<string>& {vector<string>*};
+ %apply const vector<uint8_t>& {vector<uint8_t>*};
}
%define py_tp_str(cls)
diff --git a/tests/Mutations.cpp b/tests/Mutations.cpp
index c443a94..1e57b8a 100644
--- a/tests/Mutations.cpp
+++ b/tests/Mutations.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include "Mutations.h"
diff --git a/tests/Mutations.h b/tests/Mutations.h
index e3d1027..550242c 100644
--- a/tests/Mutations.h
+++ b/tests/Mutations.h
@@ -1,3 +1,39 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#pragma once
#include <string>
#include <vector>
diff --git a/tests/RandomDNA.cpp b/tests/RandomDNA.cpp
index 559e1ea..ca0963f 100644
--- a/tests/RandomDNA.cpp
+++ b/tests/RandomDNA.cpp
@@ -1,3 +1,37 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
#include "RandomDNA.h"
@@ -17,11 +51,10 @@ string RandomDNA(const size_t n, mt19937* const gen)
std::vector<uint8_t> RandomPW(const size_t n, mt19937* const gen)
{
- std::vector<uint8_t> pws {1, 2, 3};
std::vector<uint8_t> result(n);
- uniform_int_distribution<size_t> rand(0, 3);
-
+ uniform_int_distribution<size_t> rand(1, 4);
for (size_t i = 0; i < n; ++i)
- result[i] = pws[rand(*gen)];
+ result[i] = rand(*gen);
+
return result;
-}
\ No newline at end of file
+}
diff --git a/tests/RandomDNA.h b/tests/RandomDNA.h
index 097cd7f..b9d7185 100644
--- a/tests/RandomDNA.h
+++ b/tests/RandomDNA.h
@@ -1,3 +1,39 @@
+// Copyright (c) 2011-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#pragma once
#include <random>
#include <string>
diff --git a/tests/TestIntegrator.cpp b/tests/TestIntegrator.cpp
index f91b6ff..c714555 100644
--- a/tests/TestIntegrator.cpp
+++ b/tests/TestIntegrator.cpp
@@ -68,8 +68,9 @@ namespace {
const double prec = 0.001; // alpha/beta mismatch tolerance
const SNR snr(10, 7, 5, 11);
-const std::string P6C4 = "P6-C4";
-const std::string SP1C1 = "S/P1-C1";
+const string P6C4 = "P6-C4";
+const string SP1C1 = "S/P1-C1.1";
+const string SP1C1v2 = "S/P1-C1.2";
const string longTpl =
"GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCGTCAT"
@@ -99,17 +100,17 @@ const string longRead =
"AATTACTATCTCCCGAAAGAATC";
const IntegratorConfig cfg(std::numeric_limits<double>::quiet_NaN());
-Read MkRead(const std::string& seq, const SNR& snr, const std::string& mdl, const std::vector<uint8_t>& pw)
+Read MkRead(const string& seq, const SNR& snr, const string& mdl, const vector<uint8_t>& pw)
{
- std::vector<uint8_t> ipd(0, seq.length());
+ vector<uint8_t> ipd(0, seq.length());
return Read("NA", seq, ipd, pw, snr, mdl);
}
TEST(IntegratorTest, TestLongTemplate)
{
- //TODO: Write a test for a longer molecule
- auto mdl = P6C4;
- std::vector<uint8_t> pw(longTpl.length(), 1);
+ // TODO: Write a test for a longer molecule
+ const string mdl = P6C4;
+ vector<uint8_t> pw(longTpl.length(), 1);
MonoMolecularIntegrator ai(longTpl, cfg, snr, mdl);
EXPECT_EQ(AddReadResult::SUCCESS,
ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pw), StrandEnum::FORWARD, 0,
@@ -117,39 +118,61 @@ TEST(IntegratorTest, TestLongTemplate)
EXPECT_NEAR(-148.92614949338801011, ai.LL(), prec);
}
+void TestTiming(const string& mdl)
+{
+ const vector<uint8_t> pws(longTpl.length(), 2);
+ const size_t nsamp = 5000;
+ MonoMolecularIntegrator ai(longTpl, cfg, snr, mdl);
+ const auto stime = std::chrono::high_resolution_clock::now();
+ for (size_t i = 0; i < nsamp; ++i)
+ EXPECT_EQ(AddReadResult::SUCCESS,
+ ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pws), StrandEnum::FORWARD, 0,
+ longTpl.length(), true, true)));
+ const auto etime = std::chrono::high_resolution_clock::now();
+ const auto duration =
+ std::chrono::duration_cast<std::chrono::microseconds>(etime - stime).count();
+ // std::cout << "avg duration: " << duration / nsamp << "us" << std::endl;
+ EXPECT_LT(duration / nsamp, 1500);
+}
+
// disable this test under debug builds (which are not fast enough to pass these timings)
#ifndef NDEBUG
-TEST(IntegratorTest, DISABLED_TestLongTemplateTiming)
+TEST(IntegratorTest, DISABLED_TestLongTemplateTimingP6C4)
#else
-TEST(IntegratorTest, TestLongTemplateTiming)
+TEST(IntegratorTest, TestLongTemplateTimingP6C4)
#endif
{
- const size_t nsamp = 2000;
- const std::vector<uint8_t> pws(longTpl.length(), 2);
- std::vector<string> mdls {P6C4, SP1C1};
- for( auto mdl : mdls) {
- MonoMolecularIntegrator ai(longTpl, cfg, snr, mdl);
- const auto stime = std::chrono::high_resolution_clock::now();
- for (size_t i = 0; i < nsamp; ++i)
- EXPECT_EQ(AddReadResult::SUCCESS,
- ai.AddRead(MappedRead(MkRead(longRead, snr, mdl, pws), StrandEnum::FORWARD, 0,
- longTpl.length(), true, true)));
- const auto etime = std::chrono::high_resolution_clock::now();
- const auto duration =
- std::chrono::duration_cast<std::chrono::microseconds>(etime - stime).count();
- EXPECT_LT(duration / nsamp, 1500);
- }
+ TestTiming(P6C4);
+}
+
+// disable this test under debug builds (which are not fast enough to pass these timings)
+#ifndef NDEBUG
+TEST(IntegratorTest, DISABLED_TestLongTemplateTimingSP1C1)
+#else
+TEST(IntegratorTest, TestLongTemplateTimingSP1C1)
+#endif
+{
+ TestTiming(SP1C1);
+}
+
+// disable this test under debug builds (which are not fast enough to pass these timings)
+#ifndef NDEBUG
+TEST(IntegratorTest, DISABLED_TestLongTemplateTimingSP1C1v2)
+#else
+TEST(IntegratorTest, TestLongTemplateTimingSP1C1v2)
+#endif
+{
+ TestTiming(SP1C1v2);
}
-std::tuple<std::string, StrandEnum> Mutate(const std::string& tpl, const size_t nmut,
- std::mt19937* const gen)
+std::tuple<string, StrandEnum> Mutate(const string& tpl, const size_t nmut, std::mt19937* const gen)
{
string result;
if (nmut == 0)
result = tpl;
else {
- std::vector<Mutation> muts;
+ vector<Mutation> muts;
std::uniform_int_distribution<size_t> rand(0, tpl.length() - 1);
std::set<size_t> sites;
@@ -174,7 +197,7 @@ std::tuple<std::string, StrandEnum> Mutate(const std::string& tpl, const size_t
template <typename F, typename G>
void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeIntegrator,
- const G& addRead, const std::string mdl)
+ const G& addRead, const string& mdl)
{
std::random_device rd;
std::mt19937 gen(rd());
@@ -195,11 +218,11 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
vector<Mutation> muts{mut};
const string app = ApplyMutations(tpl, &muts); // template with mutation applied
std::tie(read, strand) = Mutate(app, nmut, &gen);
- std::vector<uint8_t> pws = RandomPW(read.length(), &gen);
+ vector<uint8_t> pws = RandomPW(read.length(), &gen);
try {
auto ai1 = makeIntegrator(tpl);
- const auto arr1 = addRead(
- ai1, MappedRead(MkRead(read, snr, mdl, pws), strand, 0, tpl.length(), true, true));
+ const auto arr1 = addRead(ai1, MappedRead(MkRead(read, snr, mdl, pws), strand, 0,
+ tpl.length(), true, true));
EXPECT_EQ(AddReadResult::SUCCESS, arr1);
if (arr1 != AddReadResult::SUCCESS) {
std::cerr << std::endl << "!! alpha/beta mismatch:" << std::endl;
@@ -207,8 +230,8 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
std::cerr << " " << read.length() << ", " << read << std::endl;
}
auto ai2 = makeIntegrator(app);
- const auto arr2 = addRead(
- ai2, MappedRead(MkRead(read, snr, mdl, pws), strand, 0, app.length(), true, true));
+ const auto arr2 = addRead(ai2, MappedRead(MkRead(read, snr, mdl, pws), strand, 0,
+ app.length(), true, true));
EXPECT_EQ(AddReadResult::SUCCESS, arr2);
if (arr2 != AddReadResult::SUCCESS) {
std::cerr << std::endl << "!! alpha/beta mismatch:" << std::endl;
@@ -237,7 +260,10 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
std::cerr << " " << tpl.length() << ", " << tpl << std::endl;
std::cerr << " " << app.length() << ", " << app << std::endl;
std::cerr << " " << ai1.TemplateLength() << ", " << string(ai1) << std::endl;
- std::cerr << " " << read.length() << ", " << read << std::endl;
+ std::stringstream result;
+ std::copy(pws.begin(), pws.end(), std::ostream_iterator<int>(result, " "));
+ std::cerr << " " << read.length() << ", " << read << " - " << result.str() << std::endl;
+
++nerror;
}
} catch (const std::exception& e) {
@@ -257,9 +283,11 @@ void MutationEquivalence(const size_t nsamp, const size_t nmut, const F& makeInt
// EXPECT_LT(nerror, ntests / 1000);
}
-
-void MonoEquivalence(std::string mdl) {
- auto makeMono = [mdl](const string& tpl) { return MonoMolecularIntegrator(tpl, cfg, snr, mdl); };
+void MonoEquivalence(const string& mdl)
+{
+ auto makeMono = [mdl](const string& tpl) {
+ return MonoMolecularIntegrator(tpl, cfg, snr, mdl);
+ };
auto monoRead = [](MonoMolecularIntegrator& ai, const MappedRead& mr) {
return ai.AddRead(mr);
};
@@ -267,18 +295,12 @@ void MonoEquivalence(std::string mdl) {
MutationEquivalence(333, 1, makeMono, monoRead, mdl);
MutationEquivalence(334, 0, makeMono, monoRead, mdl);
}
-
-TEST(IntegratorTest, TestMonoMutationEquivalenceP6C4)
-{
- MonoEquivalence(P6C4);
-}
-
-TEST(IntegratorTest, TestMonoMutationEquivalenceSP1C1)
-{
- MonoEquivalence(SP1C1);
-}
-void MultiEquivalence(std::string mdl) {
+TEST(IntegratorTest, TestMonoMutationEquivalenceP6C4) { MonoEquivalence(P6C4); }
+TEST(IntegratorTest, TestMonoMutationEquivalenceSP1C1) { MonoEquivalence(SP1C1); }
+TEST(IntegratorTest, TestMonoMutationEquivalenceSP1C1v2) { MonoEquivalence(SP1C1v2); }
+void MultiEquivalence(const string& mdl)
+{
auto makeMulti = [](const string& tpl) { return MultiMolecularIntegrator(tpl, cfg); };
auto multiRead = [](MultiMolecularIntegrator& ai, const MappedRead& mr) {
return ai.AddRead(mr);
@@ -287,28 +309,20 @@ void MultiEquivalence(std::string mdl) {
MutationEquivalence(333, 1, makeMulti, multiRead, mdl);
MutationEquivalence(334, 0, makeMulti, multiRead, mdl);
}
-
-TEST(IntegratorTest, TestMultiMutationEquivalenceP6C4)
-{
- MultiEquivalence(P6C4);
-}
-
-TEST(IntegratorTest, TestMultiMutationEquivalenceSP)
-{
- MultiEquivalence(SP1C1);
-}
-
+TEST(IntegratorTest, TestMultiMutationEquivalenceP6C4) { MultiEquivalence(P6C4); }
+TEST(IntegratorTest, TestMultiMutationEquivalenceSP1C1) { MultiEquivalence(SP1C1); }
+TEST(IntegratorTest, TestMultiMutationEquivalenceSP1C1v2) { MultiEquivalence(SP1C1v2); }
// TODO(lhepler): test multi/mono equivalence
// TODO(lhepler): test multiple mutation testing mono and multi
TEST(IntegratorTest, TestP6C4NoCovAgainstCSharpModel)
{
const string tpl = "ACGTCGT";
- const std::vector<uint8_t> pw(tpl.length(), 1);
+ const vector<uint8_t> pw(tpl.length(), 1);
auto mdl = P6C4;
MultiMolecularIntegrator ai(tpl, cfg);
-
+
EXPECT_EQ(AddReadResult::SUCCESS,
ai.AddRead(MappedRead(MkRead("ACGTACGT", snr, mdl, pw), StrandEnum::FORWARD, 0,
tpl.length(), true, true)));
diff --git a/tests/TestTemplate.cpp b/tests/TestTemplate.cpp
index 9318fda..c92f42f 100644
--- a/tests/TestTemplate.cpp
+++ b/tests/TestTemplate.cpp
@@ -340,5 +340,19 @@ TEST(TemplateTest, NullTemplate)
EXPECT_EQ(0, virt.Length());
}
}
+
+
+TEST(TemplateTest, P6SiteNormalParameters)
+{
+ const string tpl = "ACGATACATACGATCGA";
+ const SNR snr(10, 7, 5, 11);
+ auto mdl = "P6-C4";
+ Template tester(tpl, ModelFactory::Create(mdl, snr));
+ auto results = tester.NormalParameters();
+
+ EXPECT_EQ(-9.3915588824261888, results.first);
+ EXPECT_EQ(26.883352639390957, results.second);
+}
+
} // namespace anonymous
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/consensuscore2.git
More information about the debian-med-commit
mailing list