[med-svn] [contrafold] 01/02: Imported Upstream version 2.02
Andreas Tille
tille at debian.org
Thu Sep 15 07:25:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository contrafold.
commit 12870e02de51fef3defaaad520359f75521ffbdf
Author: Andreas Tille <tille at debian.org>
Date: Thu Sep 15 09:24:35 2016 +0200
Imported Upstream version 2.02
---
doc/algorithm2e.sty | 2122 +++++++++
doc/contrafold.pdf | Bin 0 -> 663307 bytes
doc/logo.jpg | Bin 0 -> 612876 bytes
doc/macros.sty | 334 ++
doc/manual.pdf | Bin 0 -> 745727 bytes
doc/manual.tex | 836 ++++
doc/supplementary.pdf | Bin 0 -> 175634 bytes
doc/supplementary.tex | 1054 +++++
src/BundleMethod.hpp | 59 +
src/BundleMethod.ipp | 295 ++
src/CGLinear.hpp | 43 +
src/CGLinear.ipp | 131 +
src/CGOptimizationWrapper.hpp | 41 +
src/CGOptimizationWrapper.ipp | 54 +
src/ComputationEngine.hpp | 118 +
src/ComputationEngine.ipp | 588 +++
src/ComputationWrapper.hpp | 70 +
src/ComputationWrapper.ipp | 460 ++
src/Config.hpp | 239 ++
src/Contrafold.cpp | 553 +++
src/DistributedComputation.hpp | 182 +
src/DistributedComputation.ipp | 406 ++
src/EncapsulatedPostScript.cpp | 145 +
src/EncapsulatedPostScript.hpp | 34 +
src/FileDescription.cpp | 81 +
src/FileDescription.hpp | 36 +
src/InferenceEngine.hpp | 301 ++
src/InferenceEngine.ipp | 4759 +++++++++++++++++++++
src/InnerOptimizationWrapper.hpp | 47 +
src/InnerOptimizationWrapper.ipp | 43 +
src/InnerOptimizationWrapperBundleMethod.hpp | 44 +
src/InnerOptimizationWrapperBundleMethod.ipp | 83 +
src/InnerOptimizationWrapperLBFGS.hpp | 45 +
src/InnerOptimizationWrapperLBFGS.ipp | 99 +
src/InnerOptimizationWrapperSubgradientMethod.hpp | 44 +
src/InnerOptimizationWrapperSubgradientMethod.ipp | 85 +
src/InnerOptimizationWrapperViterbi.hpp | 44 +
src/InnerOptimizationWrapperViterbi.ipp | 89 +
src/LBFGS.hpp | 54 +
src/LBFGS.ipp | 207 +
src/LineSearch.hpp | 72 +
src/LineSearch.ipp | 168 +
src/LogSpace.hpp | 280 ++
src/MakeCoords.cpp | 413 ++
src/MakeCoords.hpp | 87 +
src/MakeDefaults.pl | 61 +
src/Makefile | 106 +
src/OptimizationWrapper.hpp | 56 +
src/OptimizationWrapper.ipp | 362 ++
src/Options.cpp | 131 +
src/Options.hpp | 39 +
src/OuterOptimizationWrapper.hpp | 45 +
src/OuterOptimizationWrapper.ipp | 137 +
src/ParameterManager.hpp | 120 +
src/ParameterManager.ipp | 268 ++
src/PlotRNA.cpp | 490 +++
src/SStruct.cpp | 641 +++
src/SStruct.hpp | 99 +
src/ScorePrediction.cpp | 298 ++
src/SparseMatrix.hpp | 135 +
src/SparseMatrix.ipp | 627 +++
src/SubgradientMethod.hpp | 50 +
src/SubgradientMethod.ipp | 226 +
src/Utilities.cpp | 420 ++
src/Utilities.hpp | 216 +
src/Utilities.ipp | 599 +++
src/contrafold.params.complementary | 708 +++
src/contrafold.params.noncomplementary | 708 +++
src/contrafold.params.profile | 708 +++
src/roc_area.pl | 171 +
src/score_directory.pl | 147 +
71 files changed, 22413 insertions(+)
diff --git a/doc/algorithm2e.sty b/doc/algorithm2e.sty
new file mode 100644
index 0000000..e453255
--- /dev/null
+++ b/doc/algorithm2e.sty
@@ -0,0 +1,2122 @@
+% algorithm2e.sty --- style file for algorithms
+%% Copyright 1996-2005 Christophe Fiorio
+%
+% This program may be distributed and/or modified under the
+% conditions of the LaTeX Project Public License, either version 1.2
+% of this license or (at your option) any later version.
+% The latest version of this license is in
+% http://www.latex-project.org/lppl.txt
+% and version 1.2 or later is part of all distributions of LaTeX
+% version 1999/12/01 or later.
+%
+% This program consists of the files algorithm2e.sty and algorithm2e.tex
+%
+% Report bugs and comments to:
+% fiorio at lirmm.fr
+%
+% $Id: algorithm2e.sty,v 3.9 2005/10/04 12:34:52 fiorio Exp $
+%
+% PACKAGES REQUIRED:
+%
+% - float (in contrib/supported/float)
+% - ifthen (in base)
+% - xspace (in packages/tools)
+%
+%%%%%%%%%%%%%%% Release 3.9
+%
+% History:
+%
+% - October 04 2005 - revision 3.9 -
+% * ADD: - \setalcaphskip command which set the horizontal skip before Algorithm: in caption when
+% used in ruled algorithm.
+% * ADD: - SetAlgoInsideSkip command which allows to add an extra vertical space before and after
+% the core of the algorithm (ie: \SetAlgoInsideSkip{bigskip})
+% * CHANGE: - caption, when used with figure option, is no more controlled by algorithm2e package
+% and so follows the exact behaviour of figures. The drawback is that you cannot change
+% the typo with AlTitleFnt or CapFnt. The avantage is that if you use caption package,
+% it works.
+% * FIX: - problem with numbering line and pdflatex
+% * FIX: - error when algorithm2e package was used with beamer and listings together
+% - February 12 2005 - revision 3.8 -
+% * FIX: - extra line with noend option.
+% - February 10 2005 - revision 3.7 -
+% * ADD: - sidecomment: different macros allowing to put text right after code
+% on the same line. They are defined in the same time comment macros
+% are defined with a star after the macro name. By default comments
+% are right justified but this can be change with appropriate option
+% in the macro. Ex:
+% . default: \tcc*{side comment}
+% . same as previous: \tcc*[r]{side comment}
+% . left justify: \tcc*[l]{side comment}
+% . here: \tcc*[h]{side comment} don't put the end of line mark before
+% comment (; by default) and don't end the line.
+% . flushed: \tcc*[f]{side comment} same as the precedent but right
+% justified
+% * ADD: - scright OPTION (default): right justified side comments (side comments
+% are flushed to the righr)
+% * ADD: - scleft OPTION: left justified side comments (side comments are
+% put right after the code line)
+% * ADD: - \SetSideCommentLeft acts as scleft option
+% * ADD: - \SetSideCommentRight acts as scright option
+% * ADD: - block like macro side text: all macro defining a block allows now
+% to put text right after key words by putting text into (). Done to
+% be used with sidecomment macros, but all text can be used. Ex:
+% \eIf(\tcc*[f]{then comment}){test}{then text}(else side text){else text}
+% * ADD: - fillcomment OPTION (default): end mark of comment is flushed to the
+% right so comments fill all the width of text
+% * ADD: - nofillcomment OPTION: end mark of comment is put right after the
+% comment
+% * ADD: - \SetNoFillComment acts as nofillcomment option.
+% * ADD: - \SetFillComment acts as fillcomment option.
+% * ADD: - dotocloa OPTION which adds an entry in the toc for the list of
+% algorithms. This option load package tocbibind if not already done
+% and so list of figures and list of tables are also added in the toc.
+% If you want to control which ones of the lists will be added in the
+% toc, please load package tocbibind before package algorithm and give
+% it the options you want.
+% * FIX: - vertical spacing for uif macro with noend option
+% * FIX: - all the compatibility problems between caption and other packages
+% * FIX: - typographical differences between list of algorithms and other lists
+% when in report or book
+%
+% - January 24 2005 - revision 3.6 -
+% * FIX: - vertical spacing and space characters at the beginning or end of
+% comments.
+% line numbers of comments not in the nlsty.
+% Thanks to Arnaud Giersch for his comments and suggestions.
+% * FIX: - Set*Sty macro: the styles defined was not protected and was modified
+% by surrounding context. For example KwTo in a \For{}{} was in bold AND
+% italic instead of just in bold.
+% * FIX: - line number misplacement after \Indp
+%
+% - January 21 2005 - revision 3.5 -
+% * ADD: - hidden numbering of the lines. Lines are auto-numbered but numbers
+% are shown only on lines you specify:
+% * linesnumberedhidden option or \linesnumberedhidden macro activate
+% this functionnality.
+% * \showln and \showlnlabel{lab} macros make the number visible on
+% the line. \showlnlabel{lab} allows to set a label for this line.
+% Thanks to Samson de Jager who makes this suggestion and provides the
+% macros.
+% * ADD: - \AlCapFnt and \SetAlCapFnt which allow to have a different font for
+% caption. Works like \AlFnt and \SetAlFnt and by default is the same.
+% * ADD: - \AlCapSkip skip length. This vertical space is added before caption
+% in plain ou boxed mode. It allows to change distance between text
+% and caption.
+% * FIX: - caption compatible with IEEEtran class.
+% * FIX: - some vertical spacing error with \uIf macros (Thanks to Arnaud Giersch)
+% * FIX: - Procedure and Function: lines are also numbered like algorithms
+% * FIX: - CommentSty was not used for Comments
+%
+% - January 10 2005 - revision 3.4 -
+% * FIX: - caption compatible with new release of Beamer class.
+%
+% - June 16 2004 - revision 3.3 -
+% * FIX: - Hyperlink references of Hyperref package works now if compiled with pdflatex
+% and [naturalnames] option of hyperref package is used.
+% * FIX: - algorithm[H] had problem in an list environment - corrected
+% * FIX: - interline was not so regular in nested blocks - corrected
+% * ADD - \Setvlineskip macro which set the vertical skip after the little horizontal
+% rule which closes a block in Vlined mode. By default 0.8ex
+%
+% - June 11 2004 - revision 3.2 - AUTO NUMBERING LINES !!!
+% * ADD: auto numbering of the lines (the so asked and so long awaiting feature)
+% this feature is managed by 3 options and 3 commands:
+% - linesnumbered option: lines of the algo are numbered except for comments and
+% input/output (KwInput and KwInOut)
+% - commentsnumbered option: makes comments be numbered
+% - inoutnumbered option: makes data input/output be numbered
+% - \nllabel{lab} labels the line so you can cite with \ref{lab}
+% - \linesnumbered make the following algorithms having auto-numbered lines
+% - \linesnotnumbered make the following algorithms having no auto-numbered lines
+% * Change: algo2e option renames listofalgorithm in listofalgorithme
+% * FIX: new solution for compatibility with color package, more robust and not tricky.
+% Many thanks to David Carlisle for his advices
+%
+% - June 09 2004 - revision 3.1 -
+% * Change: \SetKwSwitch command defines an additionnal
+% macro \uCase and \Case prints end
+% * Change: now macros SetKw* do a renewcommand if the
+% keyword is already defined. So you can redefine
+% default definition at your own convenience or
+% change your definition without introducing a
+% new macro and changing your text.
+% * ADD: new macro \SetKwIF which do \SetKwIf and
+% \SetKwIfElseIf.The following default definition has been added:
+% \SetKwIF{If}{ElseIf}{Else}{if}{then}{else if}{else}{endif}
+% and so you get the macros;
+% \If \eIf \lIf \uIf \ElseIf \uElseIf \lElseIf \Else
+% \uElse \lElse
+% * ADD: new macro \SetAlgoSkip which allow to fix the
+% vertical skip before and after the algorithms.
+% Default is smallskip, do \SetAlgoSkip{} if you
+% don't want an extra space or \SetAlgoSkip{medskip}
+% or \SetAlgoSkip{bigskip} if you want bigger space.
+% * ADD: macro \SetKwIf defines in addition a new macro
+% \uElse (depending on wat name you
+% have given in #2 arg).
+% * ADD: macro \SetKwIfElseIf defines in addition a new macro
+% \uElse and \ugElseIf (depending on what name you
+% have given in #2 and #3 arg).
+% * Change: baseline of algorithm is now top, so two
+% algorithms can be put side by side.
+% * FIX: Compatibility with color package solved. The problem
+% was due to a redefinition of standard macros by color package
+% This solves compatibility problem with other packages
+% as pstcol or colortbl.
+% (notified by Dirk Fressmann, Antti Tarvainen and Koby Crammer)
+% * Fix: extra little shift to the right with boxed style
+% algorithm removed (notified by P. Tanovski)
+% * Fix: algoln option was buggy (notified bye Jiaying Shen)
+% * Fix: german and portuges option didn't work due to bad
+% typo (notified by Martin Sievers, Thorsten Vitt
+% and Jeronimo Pellegrini)
+%
+% - February 13 2004 - revision 3.0 -
+% * Major revision which makes the package independent from
+% float.sty, so now
+% - algorithm* works better, in particular can be used in
+% multicols environments
+% - (known bug corrected)
+% [H] works now for all sort of environment but is
+% handled differently for classic environment and star
+% environment (algorithm, figure, procedure and
+% function). For star environment, H acts like for
+% classical figure environment, so it doesn't stay here
+% absolutely.
+% - (known bug corrected)
+% you can use now floatflt package with algorithm
+% package and even with figure option. Beware that if
+% you want to put an algorithm inside a floatingfigure,
+% it cannot be floating, so [H] is required and then
+% figure option should not be used, since standard
+% figure[H] are still floating with LaTeX.
+% * boxruled: a new style added. Possible now since no
+% style no more defined by the float package.
+% * nocaptionofalgo: dosen't print Algorithm #: in the
+% caption for algorithm in ruled or algoruled style.
+% note: this is just documentation of a macro which was
+% already in the package.
+% - December 14 2003 - revision 2.52 -
+% * output message shorter
+% * french keyword macro \PourTous was missing for
+% longend option, it has been added.
+% * TitleofAlgo prints Function or Procedure in
+% corresponding environments.
+%
+% - October 27 2003 - revision 2.51 - Revision submitted to CTAN archive
+% * correction of a minor which make caption in procedure
+% and function to be blanck with pdfscreen package
+% (thanks to Joel Gossens for the notification)
+% * add two internal definition to avoid some errors when
+% used with Hyperref package (Hyperref package need to
+% define new counter macro from existing ones, and
+% don't do it for algorithm2e package, so we do it)
+%
+% - October 17 2003 - revision 2.50 - first revision for CTAN archive
+%
+% * add \AlFnt and \SetAlFnt{font} macros:
+% \AlFnt is used at the beginning of the caption and the
+% body of algorithm in order to define the fonts used
+% for typesetting algorithms. You can use it elsewhere
+% you want to typeset text as algorithm. For example
+% you can do \SetAlFnt{\small\sf} to have algorithms
+% typeset in small sf font. Default is nothing so
+% algorithm is typeset as the text of the document.
+% * add \AlTitleFnt{text} and \SetAlTitleFnt{font} macros:
+% The {Algorithm: } in the caption is typeset with
+% \AlTitleFnt{Algorithm:}. You can use it to have text
+% typeset as {Algorithm:} of captions. Default is
+% textbf.
+% Default can be redefined by \SetAlTitleFnt{font}.
+% * add CommentSty typo for text comment.
+% * add some compatibility with hyperref package (still
+% an error on multiply defined refs but pdf correctly
+% generated)
+% * flush text to left in order to have correct
+% indentation even with class as amsart which center
+% all figures
+% * add german, portugues and czech options for title of
+% algorithms and typo.
+% * add portuguese translation of predefined keywords
+% * add czech translation of some predefined keywords
+%
+% - December 23 2002 - revision 2.40
+% * add some french keyword missing
+% * add function* and procedure* environment like
+% algorithme* environment: print in one column even
+% if twocolumn option is specified for the document.
+% * add a new macro \SetKwComment to define macro which
+% writes comments in the text. First argument is the
+% name of the macro, second is the text put before the
+% comment, third is the text put at the end of the
+% comment.Default are \tcc and \tcp
+% * add new options to change the way algo are numbered:
+% [algopart] algo are numbered within part (counter must exist)
+% [algochapter] algo are numbered within chapter
+% [algosection] algo are numbered within section
+%
+% - March 27 2002 - revision 2.39
+% * Gilles Geeraerts: added the \SetKwIfElseIf to manage
+% if (c)
+% i;
+% else if (c)
+% i;
+% ...
+% else
+% i;
+% end
+% * Also added \gIf \gElsIf \gElse.
+%
+% - January 02 2001 - revision 2.38
+% * bugs related to the caption in procedure and function
+% environment are corrected.
+% * bug related to option noend (extra vertical space added
+% after block command as If or For) is corrected.
+% * czech option language added (thanks to Libor Bus: l.bus at sh.cvut.cz).
+%
+% - October 16 2000 - revision 2.37
+% * option algo2e added: change the name of environment
+% algorithm into algorithm2e. So allow to use the package
+% with some journal style which already define an algorithm
+% environment.
+%
+% - September 13 2000 - revision 2.36
+% * option slide added: require package color
+% * Hack for slide class in order to have correct
+% margins
+%
+% - November 25 1999 - revision 2.35
+% * revision number match RCS number
+% * Thanks to David A. Bader, a new option is added:
+% noend: no end keywords are printed.
+%
+% - November 19 1999 - revision 2.32
+% * minor bug on longend option corrected.
+%
+% - August 26 1999 - revision 2.31
+% * add an option : figure
+% this option makes algorithms be figure and so are numbered
+% as figures, have Figure as caption and are put in
+% the \listoffigures
+%
+% - January 21 1999 - revision 2.3 beta
+% add 2 new environments: procedure and function.
+% These environments works like algorithm environment but:
+% - the ruled (or algoruled) style is imperative.
+% - the caption now writes Procedure name....
+% - the syntax of the \caption command is restricted as
+% follow: you MUST put a name followed by 2 braces like
+% this ``()''. You can put arguments inside the braces and
+% text after. If no argument is given, the braces will be
+% removed in the title.
+% - label now puts the name (the text before the braces in the
+% caption) of the procedure or function as reference (not
+% the number like a classic algorithm environment).
+% There are also two new styles : ProcNameSty and
+% ProcArgSty. These style are by default the same as FuncSty
+% and ArgSty but are used in the caption of a procedure or a
+% function.
+%
+% - November 28 1996 - revision 2.22
+% add a new macro \SetKwInParam{arg1}{arg2}{arg3}:
+% it defines a macro \arg1{name}{arg} which prints name in keyword
+% style followed byt arg surrounded by arg2 and arg3. The main
+% application is to a function working as \SetKwInput to be used
+% in the head of the algorithm. For example
+% \SetKwInParam{Func}{(}{)} allows
+% \Func{functionname}{list of arguments} which prints:
+% \KwSty{functioname(}list of arguments\KwSty{)}
+%
+%
+% - November 27 1996 - revision 2.21 :
+% minor bug in length of InOut boxes fixed.
+% add algorithm* environment.
+%
+% - July 12 1996 - revision 2.2 : \SetArg and \SetKwArg macros removed.
+%
+% \SetArg has been removed since it never has been
+% documented.
+% \SetKwArg has been removed since \SetKw can now
+% take an argument in order to be consistent with
+% \SetKwData and \SetKwFunction macros.
+%
+% - July 04 1996 - revision 2.1 : still more LaTeX2e! Minor compatibility break
+%
+% Macros use now \newcommand instead of \def, use of \setlength,
+% \newsavebox, ... and other LaTeX2e specific stuff.
+% The compatibility break:
+% - \SetData becomes \SetKwData to be more consistent. So the old
+% \SetKwData becomes \SetKwInput
+% - old macros \titleofalgo, \Freetitleofalgo and \freetitleofalgo
+% from LaTeX209 version which did print a warning message and call
+% \Titleofalgo in version 2.0 are now removed!
+%
+% - March 13 1996 - revision 2.0: first official major revision.
+%
+%
+%%%%%%%%%%%%%%
+%
+% Known bugs:
+% -----------
+% - no more known bugs... all are corrected!
+%
+%%%%%%%%%%%%%%
+%
+% Package options:
+% ---------------
+% - french, english, german, portuguese, czech : for the name of the algorithm, e.g.
+% - boxed, boxruled, ruled, algoruled, plain : layout of the algorithm
+% - algo2e : environment is algorithm2e instead of algorithms
+% and \listofalgorithmes instead of \listofalgorithms
+% - slide : to use when making slides
+% - noline,lined,vlined : how block are designed.
+% - linesnumbered : auto numbering of the algorithm's lines
+% - algopart,algochapter,algosection : algo numbering within part, chapter or section
+% - titlenumbered,titlenotnumbered : numbering of title set by \Titleofalgo
+% - figure : algorithms are figures, numbered as figures, and put in the list of figures.
+% - resetcount, noresetcount : start value of line numbers.
+% - algonl : line numbers preceded by algo number
+% - shortend, longend, noend : short or long end keyword as endif for e.g.
+%
+% defaults are; english,plain,resetcount,titlenotnumbered
+%
+%%%%%%%%%%%%%%
+%
+% Short summary
+% -------------
+%
+% algorithm is an environment for writing algorithm in LaTeX2e
+% It provide macros that allow you to create differents
+% sorts of key words, therefore a set of predefined key word
+% is gived.
+%
+% IT should be used as follows
+%
+% \begin{algorithm}
+% ...
+% ...
+% \end{algorithm}
+%
+%
+% IMPORTANT : each line MUST end with \;
+%
+% Note that if you define macros outside algorithm environment they
+% are avaible in all the document and particulary you can use then
+% inside all algorithms without re-define them.
+%
+% an example:
+%
+% \begin{algorithm}[H]
+% \SetLine
+% \AlgData{this text}
+% \AlgResult{how to write algorithm with \LaTeX2e }
+%
+% initialization\;
+% \While{not at end of this document}{
+% read current section\;
+% \eIf{understand}{
+% go to next section\;
+% current section becomes this one\;
+% }{
+% go back to the beginning of current section\;
+% }
+% }
+% \caption{How to write algorithm}
+% \end{algorithm}
+%
+%
+%%%%%%%%%%%%%% predefined english keywords
+%
+% \AlgData{input}
+% \AlgResult{output}
+% \KwIn{input}
+% \KwOut{output}
+% \KwData{input}
+% \KwResult{output}
+% \Ret{[value]}
+% \KwTo % a simple keyword
+% \Begin{block inside}
+% \If{condition}{Then block} % in a block
+% \uIf{condition}{Then block} % in a block unended
+% \Else{inside Else} % in a block
+% \eIf{condition}{Then Block}{Else block} % in blocks
+% \lIf{condition}{Else text} % on the same line
+% \lElse{Else text} % on the same line
+% \Switch{Condition}{Switch block}
+% \Case{a case}{case block} % in a block
+% \lCase{a case}{case text} % on the same line
+% \Other{otherwise block} % in a block
+% \lOther{otherwise block} % on the same line
+% \For{condition}{text loop} % in a block
+% \lFor{condition}{text} % on the same line
+% \ForEach{condition}{text loop} % in a block
+% \lForEach{condition}{text} % on the same line
+% \Repeat{End condition}{text loop} % in a block
+% \lRepeat{condition}{text} % on the same line
+% \While{condition}{text loop} % in a block
+% \lWhile{condition}{text loop} % on the same line
+%
+%
+%%%%%%%%%%%%%% predefined french keywords
+%
+% \AlgDonnees{input}
+% \AlgRes{input}
+% \Donnees{input}
+% \Res{input}
+% \Retour[valeur]}
+% \Deb{block inside}
+% \KwA % un mot clef simple
+% \Si{condition}{Bloc du Alors} % Dans un bloc
+% \uSi{condition}{Bloc du Alors} % Dans un bloc non termine
+% \eSi{condition}{Bloc du Alors}{Bloc du Sinon} % Dans un bloc
+% \lSi{condition}{texte du Alors} % sur la meme ligne
+% \lSinon{texte du Sinon} % sur la meme ligne
+% \Suivant{Condition}{Bloc de l'instruction}
+% \Cas{cas}{Bloc de ce cas} % Dans un bloc
+% \lCas{cas}{Bloc de ce cas} % sur la meme ligne
+% \Autres{Bloc de l'alternative} % Dans un bloc
+% \lAutres{Bloc de l'alternative} % sur la meme ligne
+% \Pour{condition}{texte de la boucle} % Dans un bloc
+% \lPour{condition}{texte} % sur la meme ligne
+% \PourCh{condition}{texte de la boucle} % Dans un bloc
+% \lPourCh{condition}{texte} % sur la meme ligne
+% \Repeter{End condition}{texte de la boucle} % Dans un bloc
+% \lRepeter{condition}{texte} % sur la meme ligne
+% \Tq{condition}{texte de la boucle} % Dans un bloc
+% \lTq{condition}{texte de la boucle} % sur la meme ligne
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% for more complete informations you can see algorithm2e.tex
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%% Identification Part %%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+\NeedsTeXFormat{LaTeX2e}[1994/12/01]
+%
+\ProvidesPackage{algorithm2e}[2005/10/04 v3.9 algorithms environments]
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%% Initial Code %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+\@makeother\*% some package redefined it as a letter (as color.sty)
+%
+% definition of commands which can be redefined in options of the package.
+%
+\newcounter{AlgoLine}
+\setcounter{AlgoLine}{0}
+%
+\newcommand{\listalgorithmcfname}{}
+\newcommand{\algorithmcfname}{}
+\newcommand{\algocf at typo}{}
+\newcommand{\@algocf at procname}{}
+\newcommand{\@algocf at funcname}{}
+\newcommand{\@algocf at titleofalgoname}{\algorithmcfname}
+\newcommand{\@algocf at algotitleofalgo}{%
+ \renewcommand{\@algocf at titleofalgoname}{\algorithmcfname}}
+\newcommand{\@algocf at proctitleofalgo}{%
+ \renewcommand{\@algocf at titleofalgoname}{\algocf at procname}}
+%
+\newcommand{\algocf at style}{plain}
+\newcommand{\@ResetCounterIfNeeded}{}
+\newcommand{\@titleprefix}{}
+%
+\newcommand{\algocf at numbering}[1]{\newcommand{\algocf at within}{#1}}
+%
+\newcommand{\defaultsmacros at algo}{\algocf at defaults@shortend}
+%
+\newcommand{\algocf at list}{loa}
+\newcommand{\algocf at float}{algocf}
+%
+\newcommand{\algocf at envname}{algorithm}
+\newcommand{\algocf at listofalgorithms}{listofalgorithms}
+%
+%
+%%%%%%%%%%%%%%%%%%%%%% Declaration of Options %%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+\RequirePackage{ifthen}
+%
+\DeclareOption{algo2e}{%
+ \renewcommand{\algocf at envname}{algorithm2e}
+ \renewcommand{\algocf at listofalgorithms}{listofalgorithmes}
+}
+%
+\newboolean{algocf at slide}\setboolean{algocf at slide}{false}
+\DeclareOption{slide}{%
+ \setboolean{algocf at slide}{true}%
+}
+%
+\DeclareOption{figure}{
+\renewcommand{\algocf at list}{lof}
+\renewcommand{\algocf at float}{figure}
+}
+%
+\DeclareOption{english}{%
+\renewcommand{\listalgorithmcfname}{List of Algorithms}%
+\renewcommand{\algorithmcfname}{Algorithm}%
+\renewcommand{\algocf at typo}{}%
+\renewcommand{\@algocf at procname}{Procedure}
+\renewcommand{\@algocf at funcname}{Function}
+}
+%
+\DeclareOption{french}{%
+\renewcommand{\listalgorithmcfname}{Liste des Algorithmes}%
+\renewcommand{\algorithmcfname}{Algorithme}%
+\renewcommand{\algocf at typo}{\ }%
+\renewcommand{\@algocf at procname}{Proc�dure}
+\renewcommand{\@algocf at funcname}{Fonction}
+}
+%
+\DeclareOption{czech}{%
+\renewcommand{\listalgorithmcfname}{Seznam algoritm\v{u}}%
+\renewcommand{\algorithmcfname}{Algoritmus}%
+\renewcommand{\algocf at typo}{}%
+\renewcommand{\@algocf at procname}{Procedura}
+\renewcommand{\@algocf at funcname}{Funkce}
+}
+%
+\DeclareOption{german}{%
+\renewcommand{\listalgorithmcfname}{Liste der Algorithmen}%
+\renewcommand{\algorithmcfname}{Algorithmus}%
+\renewcommand{\algocf at typo}{\ }%
+\renewcommand{\@algocf at procname}{Prozedur}%
+\renewcommand{\@algocf at funcname}{Funktion}%
+}
+%
+\DeclareOption{portugues}{%
+\renewcommand{\listalgorithmcfname}{Lista de Algoritmos}%
+\renewcommand{\algorithmcfname}{Algoritmo}%
+\renewcommand{\algocf at typo}{}%
+\renewcommand{\@algocf at procname}{Procedimento}
+\renewcommand{\@algocf at funcname}{Fun\c{c}\~{a}o}
+}
+%
+% OPTIONs plain, boxed, ruled, algoruled & boxruled
+%
+\newcommand{\algocf at style@plain}{\renewcommand{\algocf at style}{plain}}
+\newcommand{\algocf at style@boxed}{\renewcommand{\algocf at style}{boxed}}
+\newcommand{\algocf at style@ruled}{\renewcommand{\algocf at style}{ruled}}
+\newcommand{\algocf at style@algoruled}{\renewcommand{\algocf at style}{algoruled}}
+\newcommand{\algocf at style@boxruled}{\renewcommand{\algocf at style}{boxruled}}
+\newcommand{\restylealgo}[1]{\csname algocf at style@#1\endcsname}
+\DeclareOption{plain}{\algocf at style@plain}
+\DeclareOption{boxed}{\algocf at style@boxed}
+\DeclareOption{ruled}{\algocf at style@ruled}
+\DeclareOption{algoruled}{\algocf at style@algoruled}
+\DeclareOption{boxruled}{\algocf at style@boxruled}
+%
+% OPTIONs algopart,algochapter & algosection
+%
+\DeclareOption{algopart}{\algocf at numbering{part}} %algo part numbered
+\DeclareOption{algochapter}{\algocf at numbering{chapter}} %algo chapter numbered
+\DeclareOption{algosection}{\algocf at numbering{section}} %algo section numbered
+%
+% OPTIONs resetcount & noresetcount
+%
+\DeclareOption{resetcount}{\renewcommand{\@ResetCounterIfNeeded}{\setcounter{AlgoLine}{0}}}
+\DeclareOption{noresetcount}{\renewcommand{\@ResetCounterIfNeeded}{}}
+%
+% OPTION linesnumbered
+%
+\newboolean{algocf at linesnumbered}\setboolean{algocf at linesnumbered}{false}
+\newcommand{\algocf at linesnumbered}{\relax}
+\DeclareOption{linesnumbered}{%
+ \setboolean{algocf at linesnumbered}{true}
+ \renewcommand{\algocf at linesnumbered}{\everypar={\nl}}
+}
+%
+% OPTION linesnumberedhidden
+%
+\DeclareOption{linesnumberedhidden}{%
+ \setboolean{algocf at linesnumbered}{true}
+ \renewcommand{\algocf at linesnumbered}{\everypar{\stepcounter{AlgoLine}}}
+}
+%
+% OPTION commentsnumbered inoutnumbered
+%
+\newboolean{algocf at commentsnumbered}\setboolean{algocf at commentsnumbered}{false}
+\DeclareOption{commentsnumbered}{\setboolean{algocf at commentsnumbered}{true}}
+\newboolean{algocf at inoutnumbered}\setboolean{algocf at inoutnumbered}{false}
+\DeclareOption{inoutnumbered}{\setboolean{algocf at inoutnumbered}{true}}
+%
+% OPTIONs titlenumbered & titlenotnumbered
+%
+\DeclareOption{titlenumbered}{%
+ \renewcommand{\@titleprefix}{%
+ \refstepcounter{algocf at float}%
+ \AlTitleFnt{\@algocf at titleofalgoname\
+ \expandafter\csname the\algocf at float\endcsname\algocf at typo : }}%
+}
+%
+\DeclareOption{titlenotnumbered}{\renewcommand{\@titleprefix}{%
+ \AlTitleFnt{\@algocf at titleofalgoname\algocf at typo : }}%
+}
+%
+% OPTIONs lined, vlined & noline
+%
+\DeclareOption{lined}{\AtBeginDocument{\SetLine}} % \SetLine
+\DeclareOption{vlined}{\AtBeginDocument{\SetVline}} % \SetVline
+\DeclareOption{noline}{\AtBeginDocument{\SetNoline}} % \Setnoline (default)
+%
+% OPTIONs algonl
+% line numbered with the counter of the algorithm
+%
+\DeclareOption{algonl}{\renewcommand{\theAlgoLine}{\expandafter\csname the\algocf at float\endcsname.\arabic{AlgoLine}}}
+%
+% OPTIONs longend, shotend & noend
+%
+\DeclareOption{longend}{%
+\renewcommand{\defaultsmacros at algo}{\algocf at defaults@longend}}
+\DeclareOption{shortend}{%
+\renewcommand{\defaultsmacros at algo}{\algocf at defaults@shortend}}
+\newboolean{algocf at optnoend}\setboolean{algocf at optnoend}{false}
+\DeclareOption{noend}{%
+ \setboolean{algocf at optnoend}{true}%
+ \renewcommand{\defaultsmacros at algo}{\algocf at defaults@noend}}
+%
+% OPTION dotoc
+%
+\newboolean{algocf at dotocloa}\setboolean{algocf at dotocloa}{false}
+\DeclareOption{dotocloa}{%
+ \setboolean{algocf at dotocloa}{true}
+}
+%
+% OPTION comments
+%
+\newboolean{algocf at optfillcomment}\setboolean{algocf at optfillcomment}{true}
+\DeclareOption{nofillcomment}{%
+ \setboolean{algocf at optfillcomment}{false}%
+}
+\DeclareOption{fillcomment}{%
+ \setboolean{algocf at optfillcomment}{true}%
+}
+%
+% OPTION sidecommments
+%
+\newboolean{algocf at scleft}\setboolean{algocf at scleft}{false}
+\DeclareOption{scleft}{%
+ \setboolean{algocf at scleft}{true}%
+}
+\DeclareOption{sright}{% default
+ \setboolean{algocf at scleft}{false}%
+}
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%% Execution of Options %%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+\ExecuteOptions{english,plain,resetcount,titlenotnumbered}
+%
+\ProcessOptions
+%
+\@algocf at algotitleofalgo % fix name for \Titleofalgo to \algorithmcfname by default
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%% Package Loading %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%\RequirePackage{float}[2001/11/08]
+%
+\RequirePackage{xspace}
+%
+\ifthenelse{\boolean{algocf at slide}}{\RequirePackage{color}}{}
+%
+
+\AtEndOfPackage{%
+ \ifthenelse{\boolean{algocf at dotocloa}}{%
+ \renewcommand{\listofalgorithmes}{\tocfile{\listalgorithmcfname}{loa}}%
+ }{\relax}
+}
+
+% if loa in toc required, load tocbibind package if not already done.
+\ifthenelse{\boolean{algocf at dotocloa}}{%
+ \ifx\@tocextra\undefined%
+ \RequirePackage{tocbibind}
+ \fi%
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Main Part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+\newcommand{\algocf at name}{algorithm2e}
+\newcommand{\algocf at date}{october 04 2005}
+\newcommand{\algocf at version}{Release 3.9}
+\newcommand{\algocf at id}{\algocf at version\space -- \algocf at date\space --}
+\typeout{********************************************************^^JPackage `\algocf at name'\space\algocf at id^^J%
+ - algorithm2e-announce at lirmm.fr mailing list for announcement about releases^^J%
+ - algorithm2e-discussion at lirmm.fr mailing list for discussion about package^^J%
+ subscribe by emailing sympa at lirmm.fr with 'subscribe <list> <firstname name>'^^J%
+ - Author: Christophe Fiorio (fiorio at lirmm.fr)^^J********************************************************}
+%%
+%%
+%%
+%%
+%%
+%%
+%%%% hyperref compatibility tricks: Hyperref package defines H counters from
+ % standard counters (i.e \theHpage from \thepage) and check some particular
+ % counters of some packages, unfortunately it doesn't do the same for
+ % algorithm2e package but act as Hcounter was defined. To avoid errors we
+ % defined \theHalgocf ourself
+%%%%
+% \@ifundefined{theHalgocf}{\def\theHalgocf{\thealgocf}}{}%
+% \@ifundefined{theHAlgoLine}{\def\theHAlgoLine{\theAlgoLine}}{}%
+% \@ifundefined{theHalgocf}{\def\theHalgocf{\thealgocf}}{}%
+% \@ifundefined{theHAlgoLine}{\def\theHAlgoLine{\thealgocf}}{}%
+% \@ifundefined{toclevel at algocf}{\def\toclevel at algocf{0}}{}%
+%%
+%%
+%%
+\newcommand{\@defaultskiptotal}{0.5em}%\Setnlskip{0.5em}
+\newskip\skiptotal\skiptotal=0.5em%\Setnlskip{0.5em}
+\newskip\skiprule
+\newskip\skiphlne
+\newskip\skiptext
+\newskip\skiplength
+\newskip\algomargin
+\newskip\skipalgocfslide\skipalgocfslide=1em
+\newdimen\algowidth
+\newdimen\inoutsize
+\newdimen\inoutline
+%
+\newcommand{\@algoskip}{\smallskip}%
+\newcommand{\SetAlgoSkip}[1]{\renewcommand{\@algoskip}{\csname#1\endcsname}}%
+\newcommand{\@algoinsideskip}{\relax}%
+\newcommand{\SetAlgoInsideSkip}[1]{\renewcommand{\@algoinsideskip}{\csname#1\endcsname}}%
+%
+\newsavebox{\algocf at inoutbox}
+\newsavebox{\algocf at inputbox}
+%%
+%%
+\newcommand{\arg at e}{}
+\newcommand{\arg at space}{\ }
+\newcommand{\BlankLine}{\vskip 1ex}
+%%
+\newcommand{\vespace}{1ex}
+\newcommand{\SetInd}[2]{%
+\skiprule=#1%
+\skiptext=#2%
+\skiplength=\skiptext\advance\skiplength by \skiprule\advance\skiplength by 0.4pt}
+\SetInd{0.5em}{1em}
+\algomargin=\leftskip\advance\algomargin by \parindent
+\newcommand{\incmargin}[1]{\advance\algomargin by #1}
+\newcommand{\decmargin}[1]{\advance\algomargin by -#1}
+\newcommand{\Setnlskip}[1]{%
+\renewcommand{\@defaultskiptotal}{#1}%
+\setlength{\skiptotal}{#1}}
+\newcommand{\setnlskip}[1]{\Setnlskip{#1}}%kept for compatibility issue
+%%
+\newskip\AlCapSkip\AlCapSkip=0ex
+\newskip\AlCapHSkip\AlCapSkip=0ex
+\newcommand{\setalcapskip}[1]{\setlength{\AlCapSkip}{#1}}
+\newcommand{\setalcaphskip}[1]{\setlength{\AlCapHSkip}{#1}}
+\setalcaphskip{.5\algomargin}
+%%
+%%
+\newcommand{\Indentp}[1]{\advance\leftskip by #1}
+\newcommand{\Indp}{\advance\leftskip by 1em}
+\newcommand{\Indpp}{\advance\leftskip by 0.5em}
+\newcommand{\Indm}{\advance\leftskip by -1em}
+\newcommand{\Indmm}{\advance\leftskip by -0.5em}
+%%
+%%
+%% Line Numbering
+%%
+%%
+% number line style
+\newcommand{\nlSty}[1]{\textnormal{\textbf{#1}}}% default definition
+\newcommand{\Setnlsty}[3]{\renewcommand{\nlSty}[1]{\textnormal{\csname#1\endcsname{#2##1#3}}}}
+%
+%
+\newcommand{\algocf at nlhlabel}[2]{%
+ \immediate\write\@auxout{%
+ \string\newlabel{#1}{%
+ {#2}% current label
+ {\thepage}% page
+ {}% current label string
+% {AlgoLine\thealgocfline.\theAlgoLine}% current Href
+ {AlgoLine\thealgocfline.\theAlgoLine}% current Href
+ {}%
+ }%
+ }%
+}
+%
+% nl definitions
+%
+\newcommand{\nl}{%
+ \@ifundefined{href}{% if not hyperref then do a simple refstepcounter
+ \refstepcounter{AlgoLine}%
+ }{% else if hyperref, do the anchor so 2 lines in two differents algorithms cannot have the same href
+% \stepcounter{AlgoLine}\Hy at raisedlink{\hyper at anchorstart{AlgoLine\thealgocfline.\theAlgoLine}\hyper at anchorend}%
+ \stepcounter{AlgoLine}\Hy at raisedlink{\hyper at anchorstart{AlgoLine\thealgocfline.\theAlgoLine}\hyper at anchorend}%
+ }% now we can do the line numbering
+ \strut\vadjust{\kern-\dp\strutbox\vtop to \dp\strutbox{%
+ \baselineskip\dp\strutbox\vss\llap{\scriptsize{\nlSty{\theAlgoLine}\hskip\skiptotal}}\null}}%
+}%
+\newcommand{\nllabel}[1]{%
+ \@ifundefined{href}{\label{#1}}{\algocf at nlhlabel{#1}{\theAlgoLine}}}%
+%
+\newcommand{\enl}{;%
+ \@ifundefined{href}{% if not hyperref then do a simple refstepcounter
+ \refstepcounter{AlgoLine}%
+ }{% else if hyperref, do the anchor so 2 lines in two differents algorithms cannot have the same href
+% \stepcounter{AlgoLine}\Hy at raisedlink{\hyper at anchorstart{AlgoLine\thealgocfline.\theAlgoLine}\hyper at anchorend}%
+ \stepcounter{AlgoLine}\Hy at raisedlink{\hyper at anchorstart{AlgoLine\thealgocfline.\theAlgoLine}\hyper at anchorend}%
+ }% now we can do the line numbering
+ \hfill\rlap{%
+ \scriptsize{\nlSty{\theAlgoLine}}}\par}
+\newcommand{\nlset}[1]{%
+ \hskip 0pt\llap{%
+ \scriptsize{\nlSty{#1}}\hskip\skiptotal}\ignorespaces}
+%
+% lnl definitions
+%
+\@ifundefined{href}{% if not hyperref
+ \newcommand{\lnl}[1]{\nl\label{#1}\ignorespaces}%
+}{% else hyperref
+ \newcommand{\lnl}[1]{\nl\algocf at nlhlabel{#1}{\theAlgoLine}\ignorespaces}%
+}
+%
+% nlset
+%
+\@ifundefined{href}{%
+ \newcommand{\lnlset}[2]{\nlset{#2}\protected at edef\@currentlabel{#2}\label{#1}}%
+}{%else hyperref
+ \newcommand{\lnlset}[2]{\nlset{#2}%
+ \Hy at raisedlink{\hyper at anchorstart{AlgoLine.#2}\hyper at anchorend}\algocf at nlhlabel{#1}{#2}%
+ \ignorespaces%
+ }%
+}
+
+%
+% set char put at end of each line
+%
+\newcommand{\algocf at endline}{\string;}
+\newcommand{\SetEndCharOfAlgoLine}[1]{\renewcommand{\algocf at endline}{#1}}
+%
+% end of line definition
+%
+\newcommand{\@endalgoln}{\algocf at endline\par}% default definition: printsemicolon
+\newcommand{\dontprintsemicolon}{\renewcommand{\@endalgoln}{\par}}
+\newcommand{\printsemicolon}{\renewcommand{\@endalgoln}{\algocf at endline\par}}
+%
+% line numbering
+%
+\newcommand{\linesnumbered}{\setboolean{algocf at linesnumbered}{true}\renewcommand{\algocf at linesnumbered}{\everypar={\nl}}}
+\newcommand{\linesnotnumbered}{%
+ \setboolean{algocf at linesnumbered}{false}%
+ \renewcommand{\algocf at linesnumbered}{\relax}%
+}
+%
+\newcommand{\linesnumberedhidden}{%
+ \setboolean{algocf at linesnumbered}{true}\renewcommand{\algocf at linesnumbered}{\everypar{\stepcounter{AlgoLine}}}}
+\newcommand{\showln}{\nlset{\theAlgoLine}\ignorespaces} % display the line number on this line (without labelling)
+\newcommand{\showlnlabel}[1]{\lnlset{#1}{\theAlgoLine}\ignorespaces} % display the line number and label this line
+%
+%%
+%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% Styling text commands
+%
+\newcommand{\AlTitleFnt}[1]{\textbf{#1}\unskip}% default definition
+\newcommand{\SetAlTitleFnt}[1]{\renewcommand{\AlTitleFnt}[1]{\csname#1\endcsname{##1}\unskip}}%
+\newcommand{\AlFnt}{\relax}% default definition
+\newcommand{\SetAlFnt}[1]{\renewcommand{\AlFnt}{#1}}%
+\newcommand{\AlCapFnt}{\AlFnt{}}% default definition
+\newcommand{\SetAlCapFnt}[1]{\renewcommand{\AlCapFnt}{#1}}%
+\newcommand{\KwSty}[1]{\textnormal{\textbf{#1}}\unskip}% default definition
+\newcommand{\SetKwSty}[1]{\renewcommand{\KwSty}[1]{\textnormal{\csname#1\endcsname{##1}}\unskip}}%
+\newcommand{\ArgSty}[1]{\textnormal{\emph{#1}}\unskip}%\SetArgSty{emph}
+\newcommand{\SetArgSty}[1]{\renewcommand{\ArgSty}[1]{\textnormal{\csname#1\endcsname{##1}}\unskip}}%
+\newcommand{\FuncSty}[1]{\textnormal{\texttt{#1}}\unskip}%\SetFuncSty{texttt}
+\newcommand{\SetFuncSty}[1]{\renewcommand{\FuncSty}[1]{\textnormal{\csname#1\endcsname{##1}}\unskip}}%
+\newcommand{\DataSty}[1]{\textnormal{\textsf{#1}}\unskip}%%\SetDataSty{textsf}
+\newcommand{\SetDataSty}[1]{\renewcommand{\DataSty}[1]{\textnormal{\csname#1\endcsname{##1}}\unskip}}%
+\newcommand{\CommentSty}[1]{\textnormal{\texttt{#1}}\unskip}%%\SetDataSty{texttt}
+\newcommand{\SetCommentSty}[1]{\renewcommand{\CommentSty}[1]{\textnormal{\csname#1\endcsname{##1}}\unskip}}%
+\newcommand{\TitleSty}[1]{#1\unskip}%\SetTitleSty{}{}
+\newcommand{\SetTitleSty}[2]{\renewcommand{\TitleSty}[1]{%
+\csname#1\endcsname{\csname#2\endcsname##1}}\unskip}
+%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% Block basic commands
+%
+\newcommand{\al at push}[1]{\advance\skiptotal by #1\moveright #1}
+\newcommand{\al at pop}[1]{\advance\skiptotal by -#1}
+\newcommand{\al at addskiptotal}{\advance\skiptotal by 0.4pt\advance\hsize by -0.4pt} % 0.4 pt=width of \vrule
+\newcommand{\al at subskiptotal}{\advance\skiptotal by -0.4pt\advance\hsize by 0.4pt} % 0.4 pt=width of \vrule
+%
+\skiphlne=.8ex%
+\newcommand{\Setvlineskip}[1]{\skiphlne=#1}
+\newcommand{\V at line}[1]{% no vskip in between boxes but a strut to separate them,
+ \strut\par\nointerlineskip% then interblock space stay the same whatever is inside it
+ \al at push{\skiprule}% move to the right before the vertical rule
+ \hbox{\vrule%
+ \vtop{\al at push{\skiptext}%move the right after the rule
+ \vtop{\al at addskiptotal\advance\hsize by -\skiplength #1}\Hlne}}\vskip\skiphlne% inside the block
+ \al at pop{\skiprule}%\al at subskiptotal% restore indentation
+ \nointerlineskip}% no vskip after
+%
+\newcommand{\V at sline}[1]{% no vskip in between boxes but a strut to separate them,
+ \strut\par\nointerlineskip% then interblock space stay the same whatever is inside it
+ \al at push{\skiprule}% move to the right before the vertical rule
+ \hbox{\vrule% the vertical rule
+ \vtop{\al at push{\skiptext}%move the right after the rule
+ \vtop{\al at addskiptotal\advance\hsize by -\skiplength #1}}}% inside the block
+ \al at pop{\skiprule}}% restore indentation
+ %\nointerlineskip}% no vskip after
+%
+\newcommand{\H at lne}{\hrule height 0.4pt depth 0pt width .5em}
+%
+\newcommand{\No at line}[1]{% no vskip in between boxes but a strut to separate them,
+ \strut\par\nointerlineskip% then interblock space stay the same whatever is inside it
+ \al at push{\skiprule}%
+ \hbox{%
+ \vtop{\al at push{\skiptext}%
+ \vtop{\advance\hsize by -\skiplength #1}}}% inside the block
+ \al at pop{\skiprule}}%
+ %\nointerlineskip}% no vskip after
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% default=NoLine
+%
+\newcommand{\a@@block}[2]{\No at line{##1}\KwSty{##2}\par}
+\newcommand{\a at block}[2]{\a@@block{#1}{#2}} % this to be redefined as a at group in
+ % case of noend option
+\newcommand{\a at group}[1]{\No at line{##1}}
+\newcommand{\Hlne}{}
+%
+%
+\newcommand{\SetNoline}{%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Noline
+\renewcommand{\a@@block}[2]{\No at line{##1}\KwSty{##2}\strut\par}%
+\renewcommand{\a at group}[1]{\No at line{##1}}
+\renewcommand{\Hlne}{}}
+%
+\newcommand{\SetVline}{%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Vline
+\renewcommand{\a@@block}[2]{\V at line{##1}}%
+\renewcommand{\a at group}[1]{\V at sline{##1}\strut\ignorespaces}
+\renewcommand{\Hlne}{\H at lne}}
+%
+\newcommand{\SetLine}{%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Line
+\renewcommand{\a@@block}[2]{\strut\V at sline{##1}\KwSty{##2}\strut\par}% no skip after a block so garantie at least a line
+\renewcommand{\a at group}[1]{\V at sline{##1}\strut\ignorespaces}
+\renewcommand{\Hlne}{}}
+%
+\newcommand{\SetNothing}{%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Noline
+\renewcommand{\a@@block}[2]{\No at line{##1}\par}%
+%\long
+\renewcommand{\a at group}[1]{\No at line{##1}}
+\renewcommand{\Hlne}{}}
+%
+%%
+%%
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% ``Input :'''s like command
+%
+%%%
+% text staying at the right of the longer keyword of KwInOut commands
+% (text of KwInOut commands are all vertically aligned)
+%
+\newcommand{\algocf at newinout}{\par\parindent=\wd\algocf at inoutbox}% to put right indentation after a \\ in the KwInOut
+\newcommand{\SetKwInOut}[2]{%
+ \sbox\algocf at inoutbox{\hbox{\KwSty{#2}\algocf at typo:\ }}%
+ \expandafter\ifx\csname InOutSizeDefined\endcsname\relax% if first time used
+ \newcommand\InOutSizeDefined{}\setlength{\inoutsize}{\wd\algocf at inoutbox}%
+ \else% else keep the larger dimension
+ \ifdim\wd\algocf at inoutbox>\inoutsize\setlength{\inoutsize}{\wd\algocf at inoutbox}\fi%
+ \fi% the dimension of the box is now defined.
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ \expandafter\algocf at mkcmd\csname#1\endcsname[1]{%
+ \ifthenelse{\boolean{algocf at inoutnumbered}}{\relax}{\everypar={\relax}}
+ {\let\\\algocf at newinout\hangindent=\wd\algocf at inoutbox\hangafter=1\parbox[t]{\inoutsize}{\KwSty{#2}\hfill:\mbox{\ }}##1\par}
+ \algocf at linesnumbered% reset the numbering of the lines
+ }}%
+%
+%% allow to ajust the skip size of InOut
+%%
+\newcommand{\ResetInOut}[1]{%
+ \sbox\algocf at inoutbox{\hbox{\KwSty{#1}\algocf at typo:\ }}%
+ \setlength{\inoutsize}{\wd\algocf at inoutbox}%
+ }
+%
+%
+%%%
+% text staying at the right of the keyword.
+%
+\newcommand{\algocf at newinput}{\par\parindent=\wd\algocf at inputbox}% to put right indentation after a \\ in the KwInput
+\newcommand{\SetKwInput}[2]{%
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ \expandafter\algocf at mkcmd\csname#1\endcsname[1]{%
+ \sbox\algocf at inputbox{\hbox{\KwSty{#2}\algocf at typo: }}%
+ \ifthenelse{\boolean{algocf at inoutnumbered}}{\relax}{\everypar={\relax}}%
+ {\let\\\algocf at newinput\hangindent=\wd\algocf at inputbox\hangafter=1\unhbox\algocf at inputbox##1\par}%
+ \algocf at linesnumbered% reset the numbering of the lines
+ }}%
+\newcommand{\SetKwData}[2]{%
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ \expandafter\algocf at mkcmd\csname @#1\endcsname[1]{\DataSty{#2(}\ArgSty{##1}\DataSty{)}}%
+ \expandafter\algocf at mkcmd\csname#1\endcsname{%
+ \@ifnextchar\bgroup{\csname @#1\endcsname}{\DataSty{#2}\xspace}}%
+ }
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% Comments macros
+%
+%%%%
+% comment in the text, first argument is the name of the macro, second is
+% the text put before the comment, third is the text put at the end of the
+% comment.
+%
+% first side comment justification
+\newcommand{\SetSideCommentLeft}{\setboolean{algocf at scleft}{true}}
+\newcommand{\SetSideCommentRight}{\setboolean{algocf at scleft}{false}}
+\newcommand{\SetNoFillComment}{\setboolean{algocf at optfillcomment}{false}}
+\newcommand{\SetFillComment}{\setboolean{algocf at optfillcomment}{true}}
+%
+% next comment and side comment
+%
+\newcommand{\algocf at endmarkcomment}{\relax}%
+\newcommand{\algocf at fillcomment}{%
+ \ifthenelse{\boolean{algocf at optfillcomment}}{\hfill}{\relax}}%
+%
+\newcommand{\algocf at startcomment}{%
+ \hangindent=\wd\algocf at inputbox\hangafter=1\usebox\algocf at inputbox}%
+\newcommand{\algocf at endcomment}{\algocf at fillcomment\algocf at endmarkcomment\ignorespaces\par}%
+\newcommand{\algocf at endstartcomment}{\algocf at endcomment\algocf at startcomment\ignorespaces}%
+%
+\newboolean{algocf at sidecomment}%
+\newboolean{algocf at altsidecomment}\setboolean{algocf at altsidecomment}{false}%
+\newcommand{\algocf at scpar}{\ifthenelse{\boolean{algocf at altsidecomment}}{\relax}{\par}}%
+\newcommand{\algocf at sclfill}{\ifthenelse{\boolean{algocf at scleft}}{\algocf at fillcomment}{\relax}}%
+\newcommand{\algocf at scrfill}{\ifthenelse{\boolean{algocf at scleft}}{\relax}{\hfill}}
+\newcommand{\algocf at startsidecomment}{\usebox\algocf at inputbox}%
+\newcommand{\algocf at endsidecomment}{\algocf at endmarkcomment\algocf at scpar}%
+\newcommand{\algocf at endstartsidecomment}{%
+ \algocf at sclfill\algocf at endsidecomment%
+ \algocf at scrfill\algocf at startsidecomment\ignorespaces}%
+%
+\newcommand{\SetKwComment}[3]{%
+ % newcommand or renewcommand ?
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ %%% comment definition
+ \expandafter\algocf at mkcmd\csname algocf@#1\endcsname[1]{%
+ \sbox\algocf at inputbox{\CommentSty{\hbox{#2}}}%
+ \ifthenelse{\boolean{algocf at commentsnumbered}}{\relax}{\everypar={\relax}}%
+ {\renewcommand{\algocf at endmarkcomment}{#3}%
+ \let\\\algocf at endstartcomment%
+ \algocf at startcomment\CommentSty{%
+ \strut\ignorespaces##1\strut\algocf at fillcomment#3}\par}%
+ \algocf at linesnumbered% reset the numbering of the lines
+ }%
+ %%% side comment definitions
+ % option or not?
+ \expandafter\algocf at mkcmd\csname algocf@#1 at star\endcsname{%
+ \@ifnextchar [{\csname algocf@#1 at staropt\endcsname}{\csname algocf@#1 at sidecomment\endcsname}%
+ }%
+ % manage option
+ \expandafter\def\csname algocf@#1 at staropt\endcsname[##1]##2{%
+ \ifthenelse{\boolean{algocf at scleft}}{\setboolean{algocf at sidecomment}{true}}{\setboolean{algocf at sidecomment}{false}}%
+ \ifx##1h\setboolean{algocf at altsidecomment}{true}\SetSideCommentLeft\fi%
+ \ifx##1f\setboolean{algocf at altsidecomment}{true}\SetSideCommentRight\fi%
+ \ifx##1l\setboolean{algocf at altsidecomment}{false}\SetSideCommentLeft\fi%
+ \ifx##1r\setboolean{algocf at altsidecomment}{false}\SetSideCommentRight\fi%
+ \csname algocf@#1 at sidecomment\endcsname{##2}% call sidecomment
+ \ifthenelse{\boolean{algocf at sidecomment}}{\setboolean{algocf at scleft}{true}}{\setboolean{algocf at scleft}{false}}%
+ \setboolean{algocf at altsidecomment}{false}%
+ }%
+ % side comment
+ \expandafter\algocf at mkcmd\csname algocf@#1 at sidecomment\endcsname[1]{%
+ \sbox\algocf at inputbox{\CommentSty{\hbox{#2}}}%
+ \ifthenelse{\boolean{algocf at commentsnumbered}}{\relax}{\everypar={\relax}}%
+ {%
+ \renewcommand{\algocf at endmarkcomment}{#3}%
+ \let\\\algocf at endstartsidecomment%
+ % here is the comment
+ \ifthenelse{\boolean{algocf at altsidecomment}}{\relax}{\algocf at endline\ }%
+ \algocf at scrfill\algocf at startsidecomment\CommentSty{%
+ \strut\ignorespaces##1\strut\algocf at sclfill#3}\algocf at scpar%
+ }%
+ \algocf at linesnumbered% reset the numbering of the lines
+ }
+ \expandafter\algocf at mkcmd\csname#1\endcsname{\@ifstar{\csname algocf@#1 at star\endcsname}{\csname algocf@#1\endcsname}}
+}%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% Kw
+%
+\newcommand{\SetKw}[2]{%
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ \expandafter\algocf at mkcmd\csname @#1\endcsname[1]{\KwSty{#2} \ArgSty{##1}}%
+ \expandafter\algocf at mkcmd\csname#1\endcsname{%
+ \@ifnextchar\bgroup{\csname @#1\endcsname}{\KwSty{#2}\xspace}}%
+ }
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% KwFunction
+%
+\newcommand{\SetKwFunction}[2]{%
+ \@ifundefined{#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+ \expandafter\algocf at mkcmd\csname @#1\endcsname[1]{\FuncSty{#2(}\ArgSty{##1}\FuncSty{)}}%
+ \expandafter\algocf at mkcmd\csname#1\endcsname{%
+ \@ifnextchar\bgroup{\csname @#1\endcsname}{\FuncSty{#2}\xspace}}%
+}
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% KwBlock
+%
+\newcommand{\SetKwBlock}[3]{%
+\@ifundefined{algocf@#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#1\endcsname{ %Begin
+ \@ifnextchar({\csname algocf@#1opt\endcsname}{\csname algocf@#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#1opt\endcsname(##1)##2{% \Begin(){}
+ \KwSty{#2} ##1\a at group{##2}\KwSty{#3}%
+ \@ifnextchar({\csname algocf@#1end\endcsname}{\par}}%
+% without side text at the beginning
+\expandafter\algocf at mkcmd\csname algocf@#1\endcsname[1]{% \Begin{}
+ \KwSty{#2}\a at group{##1}\KwSty{#3}\@ifnextchar({\csname algocf@#1end\endcsname}{\par}}%
+% side text at the end
+\expandafter\def\csname algocf@#1end\endcsname(##1){% \Begin{}
+ \ ##1\par}%
+}
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% For Switch
+%
+\newcommand{\SetKwSwitch}[8]{% #1=\Switch #2=\Case #3=\Other #4=swicth #5=case #6=do #7=otherwise #8=endsw
+%%%% Switch
+\@ifundefined{algocf@#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#1\endcsname{ %Switch
+ \@ifnextchar({\csname algocf@#1opt\endcsname}{\csname algocf@#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#1opt\endcsname(##1)##2##3{% \Switch(){}{}
+ \KwSty{#4} \ArgSty{##2} \KwSty{#5} ##1\a at block{##3}{#8}}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#1\endcsname[2]{% \Switch{}{}
+ \KwSty{#4} \ArgSty{##1} \KwSty{#5}\a at block{##2}{#8}}%
+% side text at the end
+\expandafter\def\csname algocf@#1end\endcsname(##1){% \Switch{}{}()
+
+}
+
+%%%% Case
+\@ifundefined{algocf@#2}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#2\endcsname{ %Case
+ \@ifnextchar({\csname algocf@#2opt\endcsname}{\csname algocf@#2\endcsname}}
+\expandafter\def\csname u#2\endcsname{ %uCase
+ \@ifnextchar({\csname algocf at u#2opt\endcsname}{\csname algocf at u#2\endcsname}}
+\expandafter\def\csname l#2\endcsname{ %lCase
+ \@ifnextchar({\csname algocf at l#2opt\endcsname}{\csname algocf at l#2\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#2opt\endcsname(##1)##2##3{% \Case(){}{}
+ \KwSty{#6} \ArgSty{##2} ##1\a at block{##3}{#8}}%
+\expandafter\def\csname algocf at u#2opt\endcsname(##1)##2##3{% \uCase(){}{}
+ \KwSty{#6} \ArgSty{##2} ##1\a at group{##3}}%
+\expandafter\def\csname algocf at l#2opt\endcsname(##1)##2##3{% \lCase(){}{}
+ \KwSty{#6} \ArgSty{##2} ##3\algocf at endline\ ##1\par}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#2\endcsname[2]{% \Case{}{}
+ \KwSty{#6} \ArgSty{##1}\a at block{##2}{#8}}%
+\expandafter\algocf at mkcmd\csname algocf at u#2\endcsname[2]{% \uCase{}{}
+ \KwSty{#6} \ArgSty{##1}\a at group{##2}}%
+\expandafter\algocf at mkcmd\csname algocf at l#2\endcsname[2]{% \lCase{}{}
+ \KwSty{#6} \ArgSty{##1} ##2}%
+%%%% Other
+\@ifundefined{algocf@#3}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#3\endcsname{ %Other
+ \@ifnextchar({\csname algocf@#3opt\endcsname}{\csname algocf@#3\endcsname}}
+\expandafter\def\csname l#3\endcsname{ %Other
+ \@ifnextchar({\csname algocf at l#3opt\endcsname}{\csname algocf at l#3\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#3opt\endcsname(##1)##2{% \Other(){}{}
+ \KwSty{#7} ##1\a at block{##2}{#8}}%
+\expandafter\def\csname algocf at l#3opt\endcsname(##1)##2{% \Other(){}{}
+ \KwSty{#7} ##2\algocf at endline\ ##1\par}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#3\endcsname[1]{% default
+ \KwSty{#7}\a at block{##1}{#8}}%
+\expandafter\algocf at mkcmd\csname algocf at l#3\endcsname[1]{% ldefault
+ \KwSty{#7} ##1}%
+}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% If macros
+%
+\newcommand{\SetKwIF}[8]{% #1=\If #2=\ElseIf #3=\Else #4=if #5=then #6=elseif si #7=else #8=endif
+%
+% common text
+\@ifundefined{#1 at ifthen}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+\expandafter\algocf at mkcmd\csname #1 at ifthen\endcsname[1]{%
+ \KwSty{#4} \ArgSty{##1} \KwSty{#5}}%
+\expandafter\algocf at mkcmd\csname #1 at endif\endcsname[1]{\a at block{##1}{#8}}%
+\expandafter\algocf at mkcmd\csname #1 at noend\endcsname[1]{\a at group{##1}}%
+\expandafter\algocf at mkcmd\csname #1 at else\endcsname[1]{\a at group{##1}\KwSty{#7}}%
+\@ifundefined{#2 at elseif}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+\expandafter\algocf at mkcmd\csname #2 at elseif\endcsname[1]{%
+ \KwSty{#6} \ArgSty{##1} \KwSty{#5}}%
+\@ifundefined{#3 at else}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+\expandafter\algocf at mkcmd\csname #3 at else\endcsname{\KwSty{#7}}%
+%%%% If then { } endif
+%
+\@ifundefined{algocf@#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#1\endcsname{%
+ \@ifnextchar({\csname algocf@#1opt\endcsname}{\csname algocf@#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#1opt\endcsname(##1)##2##3{% \If(){}{}
+ \csname #1 at ifthen\endcsname{##2} ##1\csname #1 at endif\endcsname{##3}}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#1\endcsname[2]{% \If{}{}
+ \csname #1 at ifthen\endcsname{##1}\csname #1 at endif\endcsname{##2}}%
+%
+%%%% If then {} else {} endif
+%
+% side text or not?
+\expandafter\def\csname e#1\endcsname{%
+ \@ifnextchar({\csname algocf at e#1opt\endcsname}{\csname algocf at e#1optif\endcsname}}
+% with side text after if
+\expandafter\def\csname algocf at e#1opt\endcsname(##1)##2##3{% \eIf()
+ \csname #1 at ifthen\endcsname{##2} ##1\csname #1 at else\endcsname{##3}%
+ \csname algocf at e#1opte\endcsname}
+% without side text after if
+\expandafter\def\csname algocf at e#1optif\endcsname##1##2{% \eIf()
+ \csname #1 at ifthen\endcsname{##1}\csname #1 at else\endcsname{##2}%
+ \csname algocf at e#1opte\endcsname}%
+% side text after else or not ?
+\expandafter\def\csname algocf at e#1opte\endcsname{%
+ \@ifnextchar({\csname algocf at e#1optopt\endcsname}{\csname algocf at e#1\endcsname}}
+% else with a side text
+\expandafter\def\csname algocf at e#1optopt\endcsname(##1)##2{%
+ ##1\csname #1 at endif\endcsname{##2}}
+% else without side text
+\expandafter\algocf at mkcmd\csname algocf at e#1\endcsname[1]{%
+ \csname #1 at endif\endcsname{##1}}
+%
+%%%% If then
+%
+% side text or not?
+\expandafter\def\csname l#1\endcsname{% lif
+ \@ifnextchar({\csname algocf at l#1opt\endcsname}{\csname algocf at l#1\endcsname}}
+\expandafter\def\csname u#1\endcsname{% uif
+ \@ifnextchar({\csname algocf at u#1opt\endcsname}{\csname algocf at u#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf at l#1opt\endcsname(##1)##2##3{% \lIf(){}{}
+ \csname #1 at ifthen\endcsname{##2} ##3\algocf at endline\ ##1\par}%
+\expandafter\def\csname algocf at u#1opt\endcsname(##1)##2##3{% \uIf(){}{}
+ \csname #1 at ifthen\endcsname{##2} ##1\csname#1 at noend\endcsname{##3}}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf at l#1\endcsname[2]{% \lIf{}{}
+ \csname #1 at ifthen\endcsname{##1} ##2}%
+\expandafter\algocf at mkcmd\csname algocf at u#1\endcsname[2]{% \uIf{}{}
+ \csname #1 at ifthen\endcsname{##1}\csname#1 at noend\endcsname{##2}}%
+%
+%%%% ElseIf {} endif
+%
+\@ifundefined{algocf@#2}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#2\endcsname{% ElseIf
+ \@ifnextchar({\csname algocf@#2opt\endcsname}{\csname algocf@#2\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#2opt\endcsname(##1)##2##3{% \ElseIf(){}{}
+ \csname #2 at elseif\endcsname{##2} ##1\csname #1 at endif\endcsname{##3}}
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#2\endcsname[2]{% \ElseIf{}{}
+ \csname #2 at elseif\endcsname{##1}\csname #1 at endif\endcsname{##2}}
+%
+%%%% ElseIf
+%
+% side text or not?
+\expandafter\def\csname l#2\endcsname{% lElseIf
+ \@ifnextchar({\csname algocf at l#2opt\endcsname}{\csname algocf at l#2\endcsname}}
+\expandafter\def\csname u#2\endcsname{% uElseIf
+ \@ifnextchar({\csname algocf at u#2opt\endcsname}{\csname algocf at u#2\endcsname}}
+% with side text
+\expandafter\def\csname algocf at l#2opt\endcsname(##1)##2##3{% \lElseIf(){}{}
+ \csname #2 at elseif\endcsname{##2} ##3\algocf at endline\ ##1\par}
+\expandafter\def\csname algocf at u#2opt\endcsname(##1)##2##3{% \uElseIf(){}{}
+ \csname #2 at elseif\endcsname{##2} ##1\csname #1 at noend\endcsname{##3}}
+% without side text
+\expandafter\algocf at mkcmd\csname algocf at l#2\endcsname[2]{% \lElseIf{}{}
+ \csname #2 at elseif\endcsname{##1} ##2}%
+\expandafter\algocf at mkcmd\csname algocf at u#2\endcsname[2]{% \uElseIf{}{}
+ \csname #2 at elseif\endcsname{##1}\csname #1 at noend\endcsname{##2}}
+%
+%%%% Else {} endif
+%
+\@ifundefined{algocf@#3}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#3\endcsname{% Else
+ \@ifnextchar({\csname algocf@#3opt\endcsname}{\csname algocf@#3\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#3opt\endcsname(##1)##2{% \Else(){}
+ \csname #3 at else\endcsname\ ##1\csname #1 at endif\endcsname{##2}}
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#3\endcsname[1]{% \Else{}
+ \csname #3 at else\endcsname\csname #1 at endif\endcsname{##1}}%
+%
+%%%% Else
+%
+% side text or not?
+\expandafter\def\csname l#3\endcsname{% lElse
+ \@ifnextchar({\csname algocf at l#3opt\endcsname}{\csname algocf at l#3\endcsname}}
+\expandafter\def\csname u#3\endcsname{% uElse
+ \@ifnextchar({\csname algocf at u#3opt\endcsname}{\csname algocf at u#3\endcsname}}
+% with side text
+\expandafter\def\csname algocf at l#3opt\endcsname(##1)##2{% \lElse(){}
+ \csname #3 at else\endcsname\ ##2\algocf at endline\ ##1\par}
+\expandafter\def\csname algocf@#3opt\endcsname(##1)##2{% \uElse(){}
+ \csname #3 at else\endcsname\ ##1\csname #1 at noend\endcsname{##2}}
+% without side text
+\expandafter\algocf at mkcmd\csname algocf at l#3\endcsname[1]{% \lElse{}
+ \csname #3 at else\endcsname\ ##1}%
+\expandafter\algocf at mkcmd\csname algocf at u#3\endcsname[1]{% \uElse{}
+ \csname #3 at else\endcsname\csname #1 at noend\endcsname{##1}}%
+}
+%
+% old for backward compatibility
+\newcommand{\SetKwIf}[6]{%
+ \SetKwIF{#1}{cf at dumb}{#2}{#3}{#4}{cf at dumb}{#5}{#6}%
+ \typeout{**** WARNING: SetKwIf deprecated: use SetKwIF instead*****^^J}%
+}%
+\newcommand{\SetKwIfElseIf}[8]{%
+ \SetKwIF{#1}{#2}{#3}{#4}{#5}{#6}{#7}{#8}%
+ \typeout{**** WARNING: SetKwIfElseIf deprecated: use SetKwIF instead*****^^J}%
+}%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% For macros
+%
+\newcommand{\SetKwFor}[4]{%
+\@ifundefined{algocf@#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#1\endcsname{ %For
+ \@ifnextchar({\csname algocf@#1opt\endcsname}{\csname algocf@#1\endcsname}}
+\expandafter\def\csname l#1\endcsname{ %For
+ \@ifnextchar({\csname algocf at l#1opt\endcsname}{\csname algocf at l#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#1opt\endcsname(##1)##2##3{% \For(){}{}
+ \KwSty{#2} \ArgSty{##2} \KwSty{#3} ##1\a at block{##3}{#4}}%
+\expandafter\def\csname algocf at l#1opt\endcsname(##1)##2##3{% \lFor(){}{}
+ \KwSty{#2} \ArgSty{##2} \KwSty{#3} ##3\algocf at endline\ ##1\par}
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#1\endcsname[2]{% \For{}{}
+ \KwSty{#2} \ArgSty{##1} \KwSty{#3}\a at block{##2}{#4}}%
+\expandafter\algocf at mkcmd\csname algocf at l#1\endcsname[2]{% \lFor{}{}
+ \KwSty{#2} \ArgSty{##1} \KwSty{#3} ##2}%
+}
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% Repeat macros
+%
+\newcommand{\SetKwRepeat}[3]{%
+\@ifundefined{algocf@#1}{\let\algocf at mkcmd=\newcommand}{\let\algocf at mkcmd=\renewcommand}%
+% side text or not?
+\expandafter\def\csname#1\endcsname{ %Repeat
+ \@ifnextchar({\csname algocf@#1opt\endcsname}{\csname algocf@#1\endcsname}}
+\expandafter\def\csname l#1\endcsname{ %lRepeat
+ \@ifnextchar({\csname algocf at l#1opt\endcsname}{\csname algocf at l#1\endcsname}}
+% with side text
+\expandafter\def\csname algocf@#1opt\endcsname(##1)##2##3{% \Repeat(){}{}
+ \KwSty{#2} ##1\a at group{##3}\KwSty{#3} \ArgSty{##2}%
+ \@ifnextchar({\csname algocf@#1optopt\endcsname}{\@endalgoln}%
+}%
+\expandafter\def\csname algocf@#1optopt\endcsname(##1){% \Repeat(){}{}()
+ ##1\@endalgoln}%
+\expandafter\def\csname algocf at l#1opt\endcsname(##1)##2##3{% \lRepeat(){}{}
+ \KwSty{#2} ##3 \KwSty{#3} \ArgSty{##2}\algocf at endline\ ##1\par}%
+% without side text
+\expandafter\algocf at mkcmd\csname algocf@#1\endcsname[2]{% \Repeat{}{}
+ \KwSty{#2}\a at group{##2}\KwSty{#3} \ArgSty{##1}
+ \@ifnextchar({\csname algocf@#1optopt\endcsname}{\@endalgoln}%
+}%
+\expandafter\algocf at mkcmd\csname algocf at l#1\endcsname[2]{% \lRepeat{}{}
+ \KwSty{#2} ##2 \KwSty{#3} \ArgSty{##1}}%
+}
+%
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%% Environments definitions %%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%%
+%% Caption management
+%%
+% for the following macros:
+% #1 is given by caption and is equal to fnum at algocf
+% #2 is the text given in argument by the user in the \caption macro
+%
+%%%%% text of caption
+\newcommand{\algocf at captiontext}[2]{#1\algocf at typo: \AlCapFnt{}#2} % text of caption
+%
+%%%%% default caption of algorithm: used if no specific style caption is defined
+\newcommand{\algocf at makecaption}[2]{%
+ \addtolength{\hsize}{\algomargin}%
+ \sbox\@tempboxa{\algocf at captiontext{#1}{#2}}%
+ \ifdim\wd\@tempboxa >\hsize% % if caption is longer than a line
+ \hskip .5\algomargin%
+ \parbox[t]{\hsize}{\algocf at captiontext{#1}{#2}}% then caption is not centered
+ \else%
+ \global\@minipagefalse%
+ \hbox to\hsize{\hfil\box\@tempboxa\hfil}% else caption is centered
+ \fi%
+ \addtolength{\hsize}{-\algomargin}%
+}
+%
+\newsavebox\algocf at capbox
+\newcommand{\algocf at makecaption@plain}[2]{%
+ \global\sbox\algocf at capbox{\algocf at makecaption{#1}{#2}}}%
+\newcommand{\algocf at makecaption@boxed}[2]{%
+ \addtolength{\hsize}{-\algomargin}%
+ \global\sbox\algocf at capbox{\algocf at makecaption{#1}{#2}}
+ \addtolength{\hsize}{\algomargin}%
+ }%
+%
+\newcommand{\algocf at makecaption@algoruled}[2]{\algocf at makecaption@ruled{#1}{#2}}%
+\newcommand{\algocf at makecaption@boxruled}[2]{\algocf at makecaption@ruled{#1}{#2}}%
+\newcommand{\algocf at makecaption@ruled}[2]{%
+ \global\sbox\algocf at capbox{\hskip\AlCapHSkip% .5\algomargin%
+ \parbox[t]{\hsize}{\algocf at captiontext{#1}{#2}}}% then caption is not centered
+}
+%
+\newcommand{\algocf at caption@plain}{\vskip\AlCapSkip\box\algocf at capbox}%
+\newcommand{\algocf at caption@boxed}{\vskip\AlCapSkip\box\algocf at capbox}%
+\newcommand{\algocf at caption@ruled}{\box\algocf at capbox\kern2pt\hrule height.8pt depth0pt\kern2pt}%
+\newcommand{\algocf at caption@algoruled}{\algocf at caption@ruled}%
+\newcommand{\algocf at caption@boxruled}{%
+ \addtolength{\hsize}{-0.8pt}%
+ \hbox to\hsize{%
+ \vrule%\hskip-0.35pt%
+ \vbox{%
+ \hrule\vskip2\lineskip%
+ \hbox to\hsize{\unhbox\algocf at capbox\hfill}\vskip2\lineskip%
+ }%
+ %\hskip-0.35pt%
+ \vrule%
+ }\vskip-2\lineskip\nointerlineskip%
+ \addtolength{\hsize}{0.8pt}%
+}
+%
+%
+%%%% set caption for the environment
+
+% beamer define is own caption overrinding latex caption!
+% as we need it, we have put here the original definition
+\long\def\algocf at latexcaption#1[#2]#3{% original definition of caption
+ \par
+ \addcontentsline{\csname ext@#1\endcsname}{#1}%
+ {\protect\numberline{\csname the#1\endcsname}{\ignorespaces #2}}%
+ \begingroup
+ \@parboxrestore
+ \if at minipage
+ \@setminipage
+ \fi
+ \normalsize
+ \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par
+ \endgroup%
+}
+
+\ifx\beamer at makecaption\undefined%
+\else% beamer detected
+\ifx\@makecaption\undefined%
+\newcommand{\@makecaption}[2]{\relax}%
+\fi%
+\fi
+
+%
+% more and more packages redefine \@caption instead of just \@makecaption which makes algorithm2e
+% caption not works since based on standard \@caption. So we force the definition of \@caption to be
+% the standard one (the one from LaTeX) inside algorithm environment.
+%
+\newcommand{\algocf at setcaption}{%
+ \let\algocf at savecaption=\@caption%
+ \let\@caption=\algocf at latexcaption%
+ \let\algocf at oldmakecaption=\@makecaption%
+ \renewcommand{\@makecaption}[2]{%
+ \expandafter\csname algocf at makecaption@\algocf at style\endcsname{##1}{##2}}%
+}
+%
+%%%%% reset caption
+%
+% since we have force the LaTeX caption for algorithm environment, we must go back to the caption
+% used in the text.
+\newcommand{\algocf at resetcaption}{%
+ \let\@caption=\algocf at savecaption%
+ \let\@makecaption=\algocf at oldmakecaption%
+}
+%
+%%%%% nocaptionofalgo and restorecaptionofalgo --
+\newcommand{\nocaptionofalgo}{%
+ \let\@old at algocf@captiontext=\algocf at captiontext%
+ \renewcommand{\algocf at captiontext}[2]{\AlCapFnt{}##2}%
+}
+\newcommand{\restorecaptionofalgo}{%
+ \let\algocf at captiontext=\@old at algocf@captiontext%
+}
+%
+% ---------------------- algocf environment
+%
+\newcounter{algocfline} % new counter to make lines numbers be internally
+\setcounter{algocfline}{0} % different in different algorithms
+%
+\expandafter\ifx\csname algocf at within\endcsname\relax% if \algocf at within doesn't exist
+\newcounter{algocf} % just define a new counter
+\renewcommand\thealgocf{\@arabic\c at algocf} % and the way it is printed
+\else% else
+\newcounter{algocf}[\algocf at within] % counter is numbered within \algocf at within
+\renewcommand\thealgocf{\csname the\algocf at within\endcsname.\@arabic\c at algocf}
+\fi
+%
+\def\fps at algocf{htbp} % default
+\def\ftype at algocf{10} % float type
+\def\ext at algocf{\algocf at list} % loa by default, lof if figure option used
+\def\fnum at algocf{{\AlCapFnt\AlTitleFnt{\algorithmcfname\nobreakspace\thealgocf}}}
+\newenvironment{algocf}% % float environment for algorithms
+ {\@float{algocf}}%
+ {\end at float}
+\newenvironment{algocf*}% % float* environment for algorithms
+ {\@dblfloat{algocf}}
+ {\end at dblfloat}
+
+\ifx\l at chapter\undefined%
+\newcommand\listofalgocfs{ % list of algorithms
+ \section*{\listalgorithmcfname}%
+ \@mkboth{\MakeUppercase\listalgorithmcfname}%
+ {\MakeUppercase\listalgorithmcfname}%
+ \@starttoc{loa}%
+ }
+\else%
+\newcommand\listofalgocfs{%
+ \if at twocolumn
+ \@restonecoltrue\onecolumn
+ \else
+ \@restonecolfalse
+ \fi
+ \chapter*{\listalgorithmcfname}%
+ \@mkboth{\MakeUppercase\listalgorithmcfname}%
+ {\MakeUppercase\listalgorithmcfname}%
+ \@starttoc{loa}%
+ \if at restonecol\twocolumn\fi
+ }
+\fi
+
+
+\newcommand*\l at algocf{\@dottedtocline{1}{1em}{2.3em}}% line of the list
+%
+% ---------------------- algorithm environment
+%
+%%%%%%%
+%%
+%% Algorithm environment definition
+%%
+%%%%%%%
+%%
+%
+\newsavebox\algocf at algoframe
+\def\@algocf at pre@plain{\relax}% action to be done before printing the algo.
+\def\@algocf at post@plain{\relax}% action to be done after printing the algo.
+\def\@algocf at capt@plain{bottom}% where the caption should be localized.
+\def\@algocf at pre@boxed{\noindent\begin{lrbox}{\algocf at algoframe}}
+\def\@algocf at post@boxed{\end{lrbox}\framebox[\hsize]{\box\algocf at algoframe}\par}%
+\def\@algocf at capt@boxed{under}%
+\def\@algocf at pre@ruled{\hrule height.8pt depth0pt\kern2pt}%
+\def\@algocf at post@ruled{\kern2pt\hrule\relax}%
+\def\@algocf at capt@ruled{top}%
+\def\@algocf at pre@algoruled{\hrule height.8pt depth0pt\kern2pt}%
+\def\@algocf at post@algoruled{\kern2pt\hrule\relax}%
+\def\@algocf at capt@algoruled{top}%
+\def\@algocf at pre@boxruled{\noindent\begin{lrbox}{\algocf at algoframe}}%
+\def\@algocf at post@boxruled{\end{lrbox}\framebox[\hsize]{\box\algocf at algoframe}\par}%
+\def\@algocf at capt@boxruled{above}%
+%
+%% before algocf or figure environment
+\newcommand{\@algocf at init@caption}{%
+ \@algocf at algotitleofalgo% fix name for \Titleofalgo to \algorithmcfname
+ \algocf at setcaption% set caption to our caption style
+}%
+\newcommand{\@algocf at init}{%
+ \refstepcounter{algocfline}%
+ \ifthenelse{\boolean{algocf at optnoend}}{%
+ \renewcommand{\a at block}[2]{\a at group{##1}}%
+ }{%
+ \renewcommand{\a at block}[2]{\a@@block{##1}{##2}}%
+ }%
+}
+%% after the end of algocf or figure environment
+\newcommand{\@algocf at term@caption}{%
+ \algocf at resetcaption% restore original caption
+}%
+\newcommand{\@algocf at term}{%
+ \setboolean{algocf at algoH}{false}% no H by default
+ \ifthenelse{\boolean{algocf at optnoend}}{%
+ \renewcommand{\a at block}[2]{\a@@block{##1}{##2}}
+ }{%
+ \renewcommand{\a at block}[2]{\a at group{##1}}%
+ }%
+}
+%
+%%%%%%%%%%%%%%%%%
+%% makethealgo: macro which print effectively the algo in its box
+%%
+\newsavebox\algocf at algobox
+\newcommand{\algocf at makethealgo}{%
+ \vtop{%
+ % place caption above if needed bye the style
+ \ifthenelse{\equal{\csname @algocf at capt@\algocf at style\endcsname}{above}}%
+ {\csname algocf at caption@\algocf at style\endcsname}{}%
+ %
+ % precommand according to the style
+ \csname @algocf at pre@\algocf at style\endcsname%
+ % place caption at top if needed bye the style
+ \ifthenelse{\equal{\csname @algocf at capt@\algocf at style\endcsname}{top}}%
+ {\csname algocf at caption@\algocf at style\endcsname}{}%
+ %
+ \box\algocf at algobox% the algo
+ % place caption at bottom if needed bye the style
+ \ifthenelse{\equal{\csname @algocf at capt@\algocf at style\endcsname}{bottom}}%
+ {\csname algocf at caption@\algocf at style\endcsname}{}%
+ % postcommand according to the style
+ \csname @algocf at post@\algocf at style\endcsname%
+ % place caption under if needed bye the style
+ \ifthenelse{\equal{\csname @algocf at capt@\algocf at style\endcsname}{under}}
+ {\csname algocf at caption@\algocf at style\endcsname}{}%
+ }%
+}
+%%%%%%%%%%%%%%%%%%%
+%
+%% at the beginning of algocf or figure environment
+\newcommand{\@algocf at start}{%
+ \@algoskip%
+ \begin{lrbox}{\algocf at algobox}%
+ \setlength{\algowidth}{\hsize}%
+ \vbox\bgroup% save all the algo in a box
+ \hbox to\algowidth\bgroup\hbox to \algomargin{\hfill}\vtop\bgroup%
+ \ifthenelse{\boolean{algocf at slide}}{\parskip 0.5ex\color{black}}{}%
+ % initialization
+ \addtolength{\hsize}{-1.5\algomargin}%
+ \let\@mathsemicolon=\;\def\;{\ifmmode\@mathsemicolon\else\@endalgoln\fi}%
+ \raggedright\AlFnt{}%
+ \ifthenelse{\boolean{algocf at slide}}{\incmargin{\skipalgocfslide}}{}%
+ \@algoinsideskip%
+ %
+}
+%
+%% at the end of algocf or figure environment
+\newcommand{\@algocf at finish}{%
+ \@algoinsideskip%
+ \egroup%end of vtop which contain all the text
+ \egroup%end of hbox wich contains [margin][vtop]
+ \ifthenelse{\boolean{algocf at slide}}{\decmargin{\skipalgocfslide}}{}%
+ %
+ \egroup%end of main vbox
+ \end{lrbox}%
+ %\egroup% end of algo box
+ \algocf at makethealgo% print the algo
+ \@algoskip%
+ % restore dimension and macros
+ \setlength{\hsize}{\algowidth}%
+ \lineskip\normallineskip\setlength{\skiptotal}{\@defaultskiptotal}%
+ \let\;=\@mathsemicolon%
+ %
+}
+%%%%%%%%%%%%%%%%%%%%
+%% basic definition of the environment algorithm
+%%
+
+\newboolean{algocf at algoH}\setboolean{algocf at algoH}{false}
+\newenvironment{algocf at Here}{\noindent%
+ \def\@captype{algocf}% if not defined, caption exit with an error
+% \hbox\bgroup%
+ \begin{minipage}{\hsize}
+}{%
+ \end{minipage}
+% \egroup%
+}%
+\newenvironment{\algocf at envname}[1][htbp]{%
+ \@algocf at init%
+ \ifthenelse{\equal{\algocf at float}{figure}}%
+ {\begin{figure}[#1]}%
+ {\@algocf at init@caption\ifthenelse{\equal{#1}{H}}%
+ {\setboolean{algocf at algoH}{true}\begin{algocf at Here}}%
+ {\begin{algocf}[#1]}%
+ }%
+ \@algocf at start%
+ \@ResetCounterIfNeeded%
+ \algocf at linesnumbered%
+}{%
+ \@algocf at finish%
+ \ifthenelse{\equal{\algocf at float}{figure}}%
+ {\end{figure}}%
+ {\@algocf at term@caption\ifthenelse{\boolean{algocf at algoH}}%
+ {\end{algocf at Here}}%
+ {\end{algocf}}%
+ }%
+ \@algocf at term
+}
+%%%
+%%% algorithm*
+%%%
+\newenvironment{\algocf at envname*}[1][htbp]{%
+ \@algocf at init%
+ \ifthenelse{\equal{\algocf at float}{figure}}%
+ {\begin{figure*}[#1]}%
+ {\begin{algocf*}[#1]}%
+ \@algocf at start%
+ \@ResetCounterIfNeeded%
+ \algocf at linesnumbered%
+}{
+ \@algocf at finish%
+ \ifthenelse{\equal{\algocf at float}{figure}}%
+ {\end{figure*}}%
+ {\end{algocf*}}%
+ \@algocf at term%
+}
+%
+%%%%%%%%%%%%%%%%%%%%%%%
+%%%
+%
+\expandafter\newcommand\csname\algocf at listofalgorithms\endcsname{%
+ \ifthenelse{\equal{\algocf at float}{figure}}{\listoffigures}{\listofalgocfs}
+}
+%%%
+%%%
+%
+% ---------------------- procedure and function environments
+%
+%
+% -- new style (used in particular in the caption of function and procedure environments)
+%
+\newcommand{\ProcNameSty}[1]{\FuncSty{#1}}%
+\newcommand{\SetProcNameSty}[1]{\renewcommand{\ProcNameSty}[1]{\textnormal{\csname#1\endcsname{##1}}}}
+\newcommand{\ProcArgSty}[1]{\ArgSty{#1}}%
+\newcommand{\SetProcArgSty}[1]{\renewcommand{\ProcArgSty}[1]{\textnormal{\csname#1\endcsname{##1}}}}
+% three macros to extract parts of the caption
+\gdef\algocf at captname#1(#2)#3@{#1} % keep characters before the first brace
+\gdef\algocf at captparam#1(#2)#3@{#2} % keep character in between the braces
+\gdef\algocf at captother#1(#2)#3@{#3} % keep character after the braces
+%
+%%% Text of caption for Procedure or Function
+\newcommand{\algocf at captionproctext}[2]{%
+ {\AlCapFnt{}\AlTitleFnt{\algocf at procname} %
+ \ProcNameSty{\algocf at captname #2@}% Name of the procedure in ProcName Style.
+ \ifthenelse{\equal{\algocf at captparam #2@}{\arg at e}}{}% if no argument, write nothing
+ {% else put arguments in ProcArgSty:
+ \ProcNameSty{(}\ProcArgSty{\algocf at captparam #2@}\ProcNameSty{)}%
+ }% endif
+ \algocf at captother #2@%
+ }
+}
+%%%% set caption for the environment
+% unfortunately, makecaption is called with \ignorespace #3 so
+% we can't do the @currentlabel definition inside \algocf at captionproctext
+\long\def\algocf at caption@proc#1[#2]#3{%
+ \gdef\@currentlabel{\algocf at captname #3@}%
+ \algocf at old@caption{#1}[\algocf at procname\nobreakspace #2]{\ #3}%
+}%
+\newcommand{\algocf at setcaptionproc}{%
+ \let\algocf at oldcaptiontext=\algocf at captiontext%
+ \renewcommand{\algocf at captiontext}[2]{%
+ \algocf at captionproctext{##1}{##2}}%
+ \let\algocf at old@caption=\@caption%
+ \let\@caption=\algocf at caption@proc%
+}
+%%%%% reset caption
+\newcommand{\algocf at resetcaptionproc}{%
+ \let\algocf at captiontext=\algocf at oldcaptiontext%
+ \let\@caption=\algocf at old@caption%
+}
+%
+%
+%%%%% algocf at proc is the generic environment for procedure and function environment.
+%
+\newboolean{algocf at procstar}\setboolean{algocf at procstar}{false}
+\newenvironment{algocf at proc}[1][htbp]{%
+ \@algocf at proctitleofalgo% set Titleofalgo to Procedure: or Function:
+ % accordingly to the environment
+ \let\old at thealgocf=\thealgocf%\renewcommand{\thealgocf}{--}%
+ \algocf at setcaptionproc% set the text of caption to proc
+ \algocf at setcaption% set caption to our caption style
+ \refstepcounter{algocfline}%
+ \ifthenelse{\equal{\algocf at float}{figure}}{%
+ \ifthenelse{\boolean{algocf at procstar}}{\begin{figure*}[#1]}{\begin{figure}[#1]}%
+ }{%
+ \ifthenelse{\boolean{algocf at procstar}}%
+ {\begin{algocf*}[#1]}%
+ {\ifthenelse{\equal{#1}{H}}%
+ {\setboolean{algocf at algoH}{true}\begin{algocf at Here}}%
+ {\begin{algocf}[#1]}%
+ }%
+ }%
+ \@algocf at start%
+ \@ResetCounterIfNeeded%
+ \algocf at linesnumbered%
+}{%
+ \@algocf at finish%
+ \ifthenelse{\equal{\algocf at float}{figure}}{%
+ \ifthenelse{\boolean{algocf at procstar}}{\end{figure*}}{\end{figure}}%
+ }{%
+ \ifthenelse{\boolean{algocf at procstar}}
+ {\end{algocf*}}
+ {\ifthenelse{\boolean{algocf at algoH}}
+ {\end{algocf at Here}}%
+ {\end{algocf}}%
+ }%
+ }%
+ \let\thealgocf=\old at thealgocf%
+ \@algocf at term% restore original caption and H boolean
+ \algocf at resetcaptionproc%
+}
+
+%
+% -- procedure and function environments are defined from algocf at proc environment
+%
+\newenvironment{procedure}[1][htbp]%
+{\setboolean{algocf at procstar}{false}%
+ \newcommand{\algocf at procname}{\@algocf at procname}\begin{algocf at proc}[#1]}%
+{\end{algocf at proc}}
+\newenvironment{function}[1][htbp]%
+{\setboolean{algocf at procstar}{false}%
+ \newcommand{\algocf at procname}{\@algocf at funcname}\begin{algocf at proc}[#1]}%
+{\end{algocf at proc}}
+%
+\newenvironment{procedure*}[1][htbp]%
+{\setboolean{algocf at procstar}{true}%
+ \newcommand{\algocf at procname}{\@algocf at procname}\begin{algocf at proc}[#1]}%
+{\end{algocf at proc}}
+\newenvironment{function*}[1][htbp]%
+{\setboolean{algocf at procstar}{true}%
+ \newcommand{\algocf at procname}{\@algocf at funcname}\begin{algocf at proc}[#1]}%
+{\end{algocf at proc}}
+%
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%
+\newcommand{\Titleofalgo}[1]{\@titleprefix\TitleSty{#1}\par\smallskip}
+%
+%
+% ------------------------- Default Definitions
+%
+%%
+%%
+%
+\newcommand{\algocf at defaults@common}{
+%\SetKwInOut{AlgDonnees}{Donn\'ees}\SetKwInOut{AlgRes}{R\'esultat}
+\SetKwInput{Donnees}{Donn\'ees}%
+\SetKwInput{Res}{R\'esultat}%
+\SetKwInput{Entree}{Entr\'ees}%
+\SetKwInput{Sortie}{Sorties}%
+\SetKw{KwA}{\`a}%
+\SetKw{Retour}{retourner}%
+\SetKwBlock{Deb}{d\'ebut}{fin}%
+\SetKwRepeat{Repeter}{r\'ep\'eter}{jusqu'\`a}%
+%
+\SetKwComment{tcc}{/* }{ */}
+\SetKwComment{tcp}{// }{}
+%
+%\SetKwInOut{AlgData}{Data}\SetKwInOut{AlgResult}{Result}
+\SetKwInput{KwIn}{Input}%
+\SetKwInput{KwOut}{Output}%
+\SetKwInput{KwData}{Data}%
+\SetKwInput{KwResult}{Result}%
+\SetKw{KwTo}{to}
+\SetKw{KwRet}{return}%
+\SetKw{Return}{return}%
+\SetKwBlock{Begin}{begin}{end}%
+\SetKwRepeat{Repeat}{repeat}{until}%
+%
+% --- German keywords
+%
+% \SetKwInOut{AlgDaten}{Daten}%AlgData
+% \SetKwInOut{AlgErgebnis}{Ergebnis}%AlgResult
+\SetKwInput{Ein}{Eingabe}%KwIn
+\SetKwInput{Aus}{Ausgabe}%KwOut
+\SetKwInput{Daten}{Daten}%KwData
+\SetKwInput{Ergebnis}{Ergebnis}%KwResult
+\SetKw{Bis}{bis}%KwTo
+\SetKw{KwZurueck}{zur\"uck}%KwRet
+\SetKw{Zurueck}{zur\"uck}%Return
+\SetKwBlock{Beginn}{Beginn}{Ende}%Begin
+\SetKwRepeat{Wiederh}{wiederhole}{bis}%Repeat
+%
+% --- Czech keywords
+%
+% \SetKwInOut{AlgVst}{Vstup}\SetKwInOut{AlgVyst}{V\'{y}stup}
+\SetKwInput{Vst}{Vstup}%
+\SetKwInput{Vyst}{V\'{y}stup}%
+\SetKwInput{Vysl}{V\'{y}sledek}%
+%
+% --- Portuguese keywords
+%
+% \SetKwInOut{AlgDados}{Dados}\SetKwInOut{AlgResultado}{Result.}
+\SetKwInput{Entrada}{Entrada}%
+\SetKwInput{Saida}{Sa\'{i}da}%
+\SetKwInput{Dados}{Dados}%
+\SetKwInput{Resultado}{Resultado}%
+\SetKw{Ate}{at\'{e}}
+\SetKw{KwRetorna}{retorna}%
+\SetKw{Retorna}{retorna}%
+\SetKwBlock{Inicio}{in\'{i}cio}{fim}%
+\SetKwRepeat{Repita}{repita}{at\'{e}}%
+% --- End
+}
+%
+%
+\newcommand{\algocf at defaults@longend}{%
+\algocf at defaults@common
+\SetKwIF{gSi}{gSinonSi}{gSinon}{si}{alors}{sinon si}{sinon}{finsi}%
+\SetKwIF{Si}{SinonSi}{Sinon}{si}{alors}{sinon si}{sinon}{finsi}%
+\SetKwSwitch{Suivant}{Cas}{Autre}{suivant}{faire}{cas o\`u}{autres cas}{fin d'alternative}%
+\SetKwFor{Pour}{pour}{faire}{finpour}%
+\SetKwFor{PourPar}{pour}{faire en parall�le}{finpour}%
+\SetKwFor{PourCh}{pour chaque}{faire}{finprch}%
+\SetKwFor{PourTous}{pour tous les}{faire}{finprts}%
+\SetKwFor{Tq}{tant que}{faire}{fintq}%
+%
+\SetKwIF{gIf}{gElsIf}{gElse}{if}{then}{else if}{else}{endif}%
+\SetKwIF{If}{ElseIf}{Else}{if}{then}{else if}{else}{endif}%
+\SetKwSwitch{Switch}{Case}{Other}{switch}{do}{case}{otherwise}{endsw}%
+\SetKwFor{For}{for}{do}{endfor}%
+\SetKwFor{ForPar}{for}{do in parallel}{endfpar}
+\SetKwFor{ForEach}{foreach}{do}{endfch}%
+\SetKwFor{ForAll}{forall the}{do}{endfall}%
+\SetKwFor{While}{while}{do}{endw}%
+%
+% --- German for longend
+%
+\SetKwIF{gWenn}{gSonstWenn}{gSonst}{wenn}{dann}{sonst wenn}{sonst}{Ende-wenn}%gIf
+\SetKwIF{Wenn}{SonstWenn}{Sonst}{wenn}{dann}{sonst wenn}{sonst}{Ende-wenn}%gIf
+\SetKwSwitch{Unterscheide}{Fall}{Anderes}{unterscheide}{tue}{Fall}{sonst}{Ende-Unt.}%Switch
+\SetKwFor{Fuer}{f\"ur}{tue}{Ende-f\"ur}%For
+\SetKwFor{FuerPar}{f\"ur}{tue gleichzeitig}{Ende-gleichzeitig}%ForPar
+\SetKwFor{FuerJedes}{f\"ur jedes}{tue}{Ende-f\"ur}%ForEach
+\SetKwFor{FuerAlle}{f\"ur alle}{tue}{Ende-f\"ur}%ForAll
+\SetKwFor{Solange}{solange}{tue}{Ende-solange}%While
+%
+% --- Portuguese
+%
+\SetKwIF{gSe}{gSenaoSe}{gSenao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{fim se}%
+\SetKwIF{Se}{SenaoSe}{Senao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{fim se}%
+\SetKwSwitch{Selec}{Caso}{Outro}{selecione}{fa\c{c}a}{caso}{sen\~{a}o}{fim selec}%
+\SetKwFor{Para}{para}{fa\c{c}a}{fim para}%
+\SetKwFor{ParaPar}{para}{fa\c{c}a em paralelo}{fim para}
+\SetKwFor{ParaCada}{para cada}{fa\c{c}a}{fim para cada}%
+\SetKwFor{ParaTodo}{para todo}{fa\c{c}a}{fim para todo}%
+\SetKwFor{Enqto}{enquanto}{fa\c{c}a}{fim enqto}%
+}
+%
+%
+\newcommand{\algocf at defaults@shortend}{%
+\algocf at defaults@common
+\SetKwIF{gSi}{gSinonSi}{gSinon}{si}{alors}{sinon si}{sinon}{fin}%
+\SetKwIF{Si}{SinonSi}{Sinon}{si}{alors}{sinon si}{sinon}{fin}%
+\SetKwSwitch{Suivant}{Cas}{Autre}{suivant}{faire}{cas o\`u}{autres cas}{fin}%
+\SetKwFor{Pour}{pour}{faire}{fin}%
+\SetKwFor{PourPar}{pour}{faire en parall�le}{fin}%
+\SetKwFor{PourCh}{pour chaque}{faire}{fin}%
+\SetKwFor{PourTous}{pour tous les}{faire}{fin}%
+\SetKwFor{Tq}{tant que}{faire}{fin}%
+%
+%
+\SetKwIF{gIf}{gElsIf}{gElse}{if}{then}{else if}{else}{end}%
+\SetKwIF{If}{ElseIf}{Else}{if}{then}{else if}{else}{end}%
+\SetKwSwitch{Switch}{Case}{Other}{switch}{do}{case}{otherwise}{end}%
+\SetKwFor{For}{for}{do}{end}%
+\SetKwFor{ForPar}{for}{do in parallel}{end}
+\SetKwFor{ForEach}{foreach}{do}{end}%
+\SetKwFor{ForAll}{forall}{do}{end}%
+\SetKwFor{While}{while}{do}{end}%
+%
+% --- German for shortend
+%
+\SetKwIF{gWenn}{gSonstWenn}{gSonst}{wenn}{dann}{sonst wenn}{sonst}{Ende}%gIf
+\SetKwIF{Wenn}{SonstWenn}{Sonst}{wenn}{dann}{sonst wenn}{sonst}{Ende}%gIf
+\SetKwSwitch{Unterscheide}{Fall}{Anderes}{unterscheide}{tue}{Fall}{sonst}{}%Switch
+\SetKwFor{Fuer}{f\"ur}{tue}{Ende}%For
+\SetKwFor{FuerPar}{f\"ur}{tue gleichzeitig}{Ende}%ForPar
+\SetKwFor{FuerJedes}{f\"ur jedes}{tue}{Ende}%ForEach
+\SetKwFor{FuerAlle}{f\"ur alle}{tue}{Ende}%ForAll
+\SetKwFor{Solange}{solange}{tue}{Ende}%While
+%
+% --- Portuguese
+%
+\SetKwIF{gSe}{gSenaoSe}{gSenao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{fim}%
+\SetKwIF{Se}{SenaoSe}{Senao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{fim}%
+\SetKwSwitch{Selec}{Caso}{Outro}{selecione}{fa\c{c}a}{caso}{sen\~{a}o}{fim}%
+\SetKwFor{Para}{para}{fa\c{c}a}{fim}%
+\SetKwFor{ParaPar}{para}{fa\c{c}a em paralelo}{fim}
+\SetKwFor{ParaCada}{para cada}{fa\c{c}a}{fim}%
+\SetKwFor{ParaTodo}{para todo}{fa\c{c}a}{fim}%
+\SetKwFor{Enqto}{enquanto}{fa\c{c}a}{fim}%
+}
+%
+%
+\newcommand{\algocf at defaults@noend}{%
+\renewcommand{\a at block}[2]{\a at group{##1}}
+\algocf at defaults@common
+\SetKwIF{gSi}{gSinonSi}{gSinon}{si}{alors}{sinon si}{sinon}{}%
+\SetKwIF{Si}{SinonSi}{Sinon}{si}{alors}{sinon si}{sinon}{}%
+\SetKwSwitch{Suivant}{Cas}{Autre}{suivant}{faire}{cas o�}{autres cas}{}%
+\SetKwFor{Pour}{pour}{faire}{}%
+\SetKwFor{PourPar}{pour}{faire en parall�le}{}%
+\SetKwFor{PourCh}{pour chaque}{faire}{}%
+\SetKwFor{PourTous}{pour tous les}{faire}{}%
+\SetKwFor{Tq}{tant que}{faire}{}%
+%
+\SetKwIF{gIf}{gElsIf}{gElse}{if}{then}{else if}{else}{}%
+\SetKwIF{If}{ElsIf}{Else}{if}{then}{else if}{else}{}%
+\SetKwSwitch{Switch}{Case}{Other}{switch}{do}{case}{otherwise}{}%
+\SetKwFor{For}{for}{do}{}%
+\SetKwFor{ForPar}{for}{do in parallel}{}
+\SetKwFor{ForEach}{foreach}{do}{}%
+\SetKwFor{ForAll}{forall}{do}{}%
+\SetKwFor{While}{while}{do}{}%
+% --- German for noend
+\SetKwIF{gWenn}{gSonstWenn}{gSonst}{wenn}{dann}{sonst wenn}{sonst}{}%gIf
+\SetKwIF{Wenn}{SonstWenn}{Sonst}{wenn}{dann}{sonst wenn}{sonst}{}%gIf
+\SetKwSwitch{Unterscheide}{Fall}{Anderes}{unterscheide}{tue}{Fall}{sonst}{}%Switch
+\SetKwFor{Fuer}{f\"ur}{tue}{}%For
+\SetKwFor{FuerPar}{f\"ur}{tue gleichzeitig}{}%ForPar
+\SetKwFor{FuerJedes}{f\"ur jedes}{tue}{}%ForEach
+\SetKwFor{FuerAlle}{f\"ur alle}{tue}{}%ForAll
+\SetKwFor{Solange}{solange}{tue}{}%While
+% --- Portuguese
+\SetKwIF{gSe}{gSenaoSe}{gSenao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{}%
+\SetKwIF{Se}{SenaoSe}{Senao}{se}{ent\~{a}o}{sen\~{a}o se}{sen\~{a}o}{}%
+\SetKwSwitch{Selec}{Caso}{Outro}{selecione}{fa\c{c}a}{caso}{sen\~{a}o}{}%
+\SetKwFor{Para}{para}{fa\c{c}a}{}%
+\SetKwFor{ParaPar}{para}{fa\c{c}a em paralelo}{}
+\SetKwFor{ParaCada}{para cada}{fa\c{c}a}{}%
+\SetKwFor{ParaTodo}{para todo}{fa\c{c}a}{}%
+\SetKwFor{Enqto}{enquanto}{fa\c{c}a}{}%
+}
+%
+%%
+%%
+%%
+%
+% default macros are:
+\defaultsmacros at algo
+\SetNoline
+%
+%
+%
+%%
+%%%
+%%%% END
diff --git a/doc/contrafold.pdf b/doc/contrafold.pdf
new file mode 100644
index 0000000..d2d53b6
Binary files /dev/null and b/doc/contrafold.pdf differ
diff --git a/doc/logo.jpg b/doc/logo.jpg
new file mode 100644
index 0000000..e1cbee3
Binary files /dev/null and b/doc/logo.jpg differ
diff --git a/doc/macros.sty b/doc/macros.sty
new file mode 100644
index 0000000..327cd79
--- /dev/null
+++ b/doc/macros.sty
@@ -0,0 +1,334 @@
+\RequirePackage{graphicx}
+\RequirePackage{amsmath}
+\RequirePackage{amssymb}
+\RequirePackage{amsthm}
+\RequirePackage{url}
+\RequirePackage{pstricks}
+\RequirePackage{pst-node}
+\RequirePackage{pst-plot}
+
+% Macros which don't take arguments
+
+\newenvironment{itemize-compact}{\begin{itemize}\itemsep=0pt\topsep=0pt\partopsep=0pt\parskip=0pt\parsep=0pt}{\end{itemize}}
+\newenvironment{enumerate-compact}{\begin{enumerate}\itemsep=0pt\topsep=0pt\partopsep=0pt\parskip=0pt\parsep=0pt}{\end{enumerate}}
+\newenvironment{optimization}{\begin{array}{cll}}{\end{array}}
+\newcommand{\minimize}{\displaystyle\operatornamewithlimits{minimize}}
+\newcommand{\maximize}{\displaystyle\operatornamewithlimits{maximize}}
+\newcommand{\subjectto}{\operatorname{subject\ to}}
+\newcommand{\grad}{\nabla}
+
+% Macros which take arguments
+
+\newcommand{\at}[1]{^{({#1})}}
+\newcommand{\parens}[1]{{\left({#1}\right)}}
+\newcommand{\brackets}[1]{{\left[{#1}\right]}}
+\newcommand{\anglebrackets}[1]{{\left\langle{#1}\right\rangle}}
+\newcommand{\inner}[2]{\anglebrackets{{#1},{#2}}}
+\newcommand{\ind}[1]{{\mathbf{I}\set{{#1}}}}
+\newcommand{\zero}{{\mathbf{0}}}
+\newcommand{\one}{{\mathbf{1}}}
+\newcommand{\kronecker}[2]{{\delta_{{#1}{#2}}}}
+\newcommand{\set}[1]{{\left\{{#1}\right\}}}
+\newcommand{\abs}[1]{{\left|{#1}\right|}}
+\newcommand{\reals}{{\mathbb{R}}}
+\newcommand{\complex}{{\mathbb{C}}}
+\newcommand{\norm}[1]{\left|\left|{#1}\right|\right|}
+\newcommand{\deriv}[1]{{\frac{d}{d{#1}}}}
+\newcommand{\pd}[1]{{\frac{\partial}{\partial {#1}}}}
+\newcommand{\pdfrac}[2]{{\frac{\partial {#1}}{\partial {#2}}}}
+\newcommand{\ppd}[1]{{\frac{\partial^2}{\partial {#1}^2}}}
+\newcommand{\pdpd}[2]{{\frac{\partial^2}{\partial {#1}\partial {#2}}}}
+\newcommand{\commentout}[1]{}
+\newcommand{\nullspace}[1]{{\mathcal{N}({#1})}}
+\newcommand{\range}[1]{{\mathcal{R}({#1})}}
+\newcommand{\floor}[1]{{\left\lfloor {#1} \right\rfloor}}
+\newcommand{\ceil}[1]{{\left\lceil {#1} \right\rceil}}
+\newcommand{\remark}[1]{{\red \textbf{[[#1]]}}}
+\newcommand{\draft}[1]{\title{#1 \\ \textbf{\normalsize [[Draft: Do not circulate.]]}}}
+\newcommand{\mathbi}[1]{\textbf{\em #1}}
+
+% Operators
+
+\newcommand{\argmin}{\operatornamewithlimits{arg\,min}}
+\newcommand{\argmax}{\operatornamewithlimits{arg\,max}}
+\newcommand{\rank}{\operatorname{rank}}
+\newcommand{\sign}{\operatorname{sign}}
+\newcommand{\dom}{\operatorname{dom}}
+\newcommand{\diag}{\operatorname{diag}}
+\newcommand{\trace}{\operatorname{tr}}
+
+% The alphabet!
+
+\newcommand{\cA}{{\mathcal{A}}}
+\newcommand{\cB}{{\mathcal{B}}}
+\newcommand{\cC}{{\mathcal{C}}}
+\newcommand{\cD}{{\mathcal{D}}}
+\newcommand{\cE}{{\mathcal{E}}}
+\newcommand{\cF}{{\mathcal{F}}}
+\newcommand{\cG}{{\mathcal{G}}}
+\newcommand{\cH}{{\mathcal{H}}}
+\newcommand{\cI}{{\mathcal{I}}}
+\newcommand{\cJ}{{\mathcal{J}}}
+\newcommand{\cK}{{\mathcal{K}}}
+\newcommand{\cL}{{\mathcal{L}}}
+\newcommand{\cM}{{\mathcal{M}}}
+\newcommand{\cN}{{\mathcal{N}}}
+\newcommand{\cO}{{\mathcal{O}}}
+\newcommand{\cP}{{\mathcal{P}}}
+\newcommand{\cQ}{{\mathcal{Q}}}
+\newcommand{\cR}{{\mathcal{R}}}
+\newcommand{\cS}{{\mathcal{S}}}
+\newcommand{\cT}{{\mathcal{T}}}
+\newcommand{\cU}{{\mathcal{U}}}
+\newcommand{\cV}{{\mathcal{V}}}
+\newcommand{\cW}{{\mathcal{W}}}
+\newcommand{\cX}{{\mathcal{X}}}
+\newcommand{\cY}{{\mathcal{Y}}}
+\newcommand{\cZ}{{\mathcal{Z}}}
+
+\newcommand{\bA}{{\mathbf{A}}}
+\newcommand{\bB}{{\mathbf{B}}}
+\newcommand{\bC}{{\mathbf{C}}}
+\newcommand{\bD}{{\mathbf{D}}}
+\newcommand{\bE}{{\mathbf{E}}}
+\newcommand{\bF}{{\mathbf{F}}}
+\newcommand{\bG}{{\mathbf{G}}}
+\newcommand{\bH}{{\mathbf{H}}}
+\newcommand{\bI}{{\mathbf{I}}}
+\newcommand{\bJ}{{\mathbf{J}}}
+\newcommand{\bK}{{\mathbf{K}}}
+\newcommand{\bL}{{\mathbf{L}}}
+\newcommand{\bM}{{\mathbf{M}}}
+\newcommand{\bN}{{\mathbf{N}}}
+\newcommand{\bO}{{\mathbf{O}}}
+\newcommand{\bP}{{\mathbf{P}}}
+\newcommand{\bQ}{{\mathbf{Q}}}
+\newcommand{\bR}{{\mathbf{R}}}
+\newcommand{\bS}{{\mathbf{S}}}
+\newcommand{\bT}{{\mathbf{T}}}
+\newcommand{\bU}{{\mathbf{U}}}
+\newcommand{\bV}{{\mathbf{V}}}
+\newcommand{\bW}{{\mathbf{W}}}
+\newcommand{\bX}{{\mathbf{X}}}
+\newcommand{\bY}{{\mathbf{Y}}}
+\newcommand{\bZ}{{\mathbf{Z}}}
+
+\newcommand{\bbA}{{\mathbb{A}}}
+\newcommand{\bbB}{{\mathbb{B}}}
+\newcommand{\bbC}{{\mathbb{C}}}
+\newcommand{\bbD}{{\mathbb{D}}}
+\newcommand{\bbE}{{\mathbb{E}}}
+\newcommand{\bbF}{{\mathbb{F}}}
+\newcommand{\bbG}{{\mathbb{G}}}
+\newcommand{\bbH}{{\mathbb{H}}}
+\newcommand{\bbI}{{\mathbb{I}}}
+\newcommand{\bbJ}{{\mathbb{J}}}
+\newcommand{\bbK}{{\mathbb{K}}}
+\newcommand{\bbL}{{\mathbb{L}}}
+\newcommand{\bbM}{{\mathbb{M}}}
+\newcommand{\bbN}{{\mathbb{N}}}
+\newcommand{\bbO}{{\mathbb{O}}}
+\newcommand{\bbP}{{\mathbb{P}}}
+\newcommand{\bbQ}{{\mathbb{Q}}}
+\newcommand{\bbR}{{\mathbb{R}}}
+\newcommand{\bbS}{{\mathbb{S}}}
+\newcommand{\bbT}{{\mathbb{T}}}
+\newcommand{\bbU}{{\mathbb{U}}}
+\newcommand{\bbV}{{\mathbb{V}}}
+\newcommand{\bbW}{{\mathbb{W}}}
+\newcommand{\bbX}{{\mathbb{X}}}
+\newcommand{\bbY}{{\mathbb{Y}}}
+\newcommand{\bbZ}{{\mathbb{Z}}}
+
+\newcommand{\fA}{{\mathfrak{A}}}
+\newcommand{\fB}{{\mathfrak{B}}}
+\newcommand{\fC}{{\mathfrak{C}}}
+\newcommand{\fD}{{\mathfrak{D}}}
+\newcommand{\fE}{{\mathfrak{E}}}
+\newcommand{\fF}{{\mathfrak{F}}}
+\newcommand{\fG}{{\mathfrak{G}}}
+\newcommand{\fH}{{\mathfrak{H}}}
+\newcommand{\fI}{{\mathfrak{I}}}
+\newcommand{\fJ}{{\mathfrak{J}}}
+\newcommand{\fK}{{\mathfrak{K}}}
+\newcommand{\fL}{{\mathfrak{L}}}
+\newcommand{\fM}{{\mathfrak{M}}}
+\newcommand{\fN}{{\mathfrak{N}}}
+\newcommand{\fO}{{\mathfrak{O}}}
+\newcommand{\fP}{{\mathfrak{P}}}
+\newcommand{\fQ}{{\mathfrak{Q}}}
+\newcommand{\fR}{{\mathfrak{R}}}
+\newcommand{\fS}{{\mathfrak{S}}}
+\newcommand{\fT}{{\mathfrak{T}}}
+\newcommand{\fU}{{\mathfrak{U}}}
+\newcommand{\fV}{{\mathfrak{V}}}
+\newcommand{\fW}{{\mathfrak{W}}}
+\newcommand{\fX}{{\mathfrak{X}}}
+\newcommand{\fY}{{\mathfrak{Y}}}
+\newcommand{\fZ}{{\mathfrak{Z}}}
+
+\newcommand{\vA}{{\vec{A}}}
+\newcommand{\vB}{{\vec{B}}}
+\newcommand{\vC}{{\vec{C}}}
+\newcommand{\vD}{{\vec{D}}}
+\newcommand{\vE}{{\vec{E}}}
+\newcommand{\vF}{{\vec{F}}}
+\newcommand{\vG}{{\vec{G}}}
+\newcommand{\vH}{{\vec{H}}}
+\newcommand{\vI}{{\vec{I}}}
+\newcommand{\vJ}{{\vec{J}}}
+\newcommand{\vK}{{\vec{K}}}
+\newcommand{\vL}{{\vec{L}}}
+\newcommand{\vM}{{\vec{M}}}
+\newcommand{\vN}{{\vec{N}}}
+\newcommand{\vO}{{\vec{O}}}
+\newcommand{\vP}{{\vec{P}}}
+\newcommand{\vQ}{{\vec{Q}}}
+\newcommand{\vR}{{\vec{R}}}
+\newcommand{\vS}{{\vec{S}}}
+\newcommand{\vT}{{\vec{T}}}
+\newcommand{\vU}{{\vec{U}}}
+\newcommand{\vV}{{\vec{V}}}
+\newcommand{\vW}{{\vec{W}}}
+\newcommand{\vX}{{\vec{X}}}
+\newcommand{\vY}{{\vec{Y}}}
+\newcommand{\vZ}{{\vec{Z}}}
+
+\newcommand{\ba}{{\mathbi{a}}}
+\newcommand{\bb}{{\mathbi{b}}}
+\newcommand{\bc}{{\mathbi{c}}}
+\newcommand{\bd}{{\mathbi{d}}}
+\newcommand{\be}{{\mathbi{e}}}
+\newcommand{\boldf}{{\mathbi{f}}}
+\newcommand{\bg}{{\mathbi{g}}}
+\newcommand{\bh}{{\mathbi{h}}}
+\newcommand{\bi}{{\mathbi{i}}}
+\newcommand{\bj}{{\mathbi{j}}}
+\newcommand{\bk}{{\mathbi{k}}}
+\newcommand{\bl}{{\mathbi{l}}}
+\newcommand{\bm}{{\mathbi{m}}}
+\newcommand{\bn}{{\mathbi{n}}}
+\newcommand{\bo}{{\mathbi{o}}}
+\newcommand{\bp}{{\mathbi{p}}}
+\newcommand{\bq}{{\mathbi{q}}}
+\newcommand{\br}{{\mathbi{r}}}
+\newcommand{\bs}{{\mathbi{s}}}
+\newcommand{\bt}{{\mathbi{t}}}
+\newcommand{\bu}{{\mathbi{u}}}
+\newcommand{\bv}{{\mathbi{v}}}
+\newcommand{\bw}{{\mathbi{w}}}
+\newcommand{\bx}{{\mathbi{x}}}
+\newcommand{\by}{{\mathbi{y}}}
+\newcommand{\bz}{{\mathbi{z}}}
+
+\newcommand{\va}{{\vec{a}}}
+\newcommand{\vb}{{\vec{b}}}
+\newcommand{\vc}{{\vec{c}}}
+\newcommand{\vd}{{\vec{d}}}
+\newcommand{\ve}{{\vec{e}}}
+\newcommand{\vf}{{\vec{f}}}
+\newcommand{\vg}{{\vec{g}}}
+\newcommand{\vh}{{\vec{h}}}
+\newcommand{\vi}{{\vec{i}}}
+\newcommand{\vj}{{\vec{j}}}
+\newcommand{\vk}{{\vec{k}}}
+\newcommand{\vl}{{\vec{l}}}
+\newcommand{\vm}{{\vec{m}}}
+\newcommand{\vn}{{\vec{n}}}
+\newcommand{\vo}{{\vec{o}}}
+\newcommand{\vp}{{\vec{p}}}
+\newcommand{\vq}{{\vec{q}}}
+\newcommand{\vr}{{\vec{r}}}
+\newcommand{\vs}{{\vec{s}}}
+\newcommand{\vt}{{\vec{t}}}
+\newcommand{\vu}{{\vec{u}}}
+\newcommand{\vv}{{\vec{v}}}
+\newcommand{\vw}{{\vec{w}}}
+\newcommand{\vx}{{\vec{x}}}
+\newcommand{\vy}{{\vec{y}}}
+\newcommand{\vz}{{\vec{z}}}
+
+\newcommand{\valpha}{{\vec{\alpha}}}
+\newcommand{\vbeta}{{\vec{\beta}}}
+\newcommand{\vgamma}{{\vec{\gamma}}}
+\newcommand{\vdelta}{{\vec{\delta}}}
+\newcommand{\vepsilon}{{\vec{\epsilon}}}
+\newcommand{\vvarepsilon}{{\vec{\varepslion}}}
+\newcommand{\vzeta}{{\vec{\zeta}}}
+\newcommand{\veta}{{\vec{\eta}}}
+\newcommand{\vtheta}{{\vec{\theta}}}
+\newcommand{\vvartheta}{{\vec{\vartheta}}}
+\newcommand{\viota}{{\vec{\iota}}}
+\newcommand{\vkappa}{{\vec{\kappa}}}
+\newcommand{\vlambda}{{\vec{\lambda}}}
+\newcommand{\vmu}{{\vec{\mu}}}
+\newcommand{\vnu}{{\vec{\nu}}}
+\newcommand{\vxi}{{\vec{\xi}}}
+\newcommand{\vpi}{{\vec{\pi}}}
+\newcommand{\vvarpi}{{\vec{\varpi}}}
+\newcommand{\vrho}{{\vec{\rho}}}
+\newcommand{\vvarrho}{{\vec{\varrho}}}
+\newcommand{\vsigma}{{\vec{\sigma}}}
+\newcommand{\vvarsigma}{{\vec{\varsigma}}}
+\newcommand{\vtau}{{\vec{\tau}}}
+\newcommand{\vupsilon}{{\vec{\upsilon}}}
+\newcommand{\vphi}{{\vec{\phi}}}
+\newcommand{\vvarphi}{{\vec{\varphi}}}
+\newcommand{\vchi}{{\vec{\chi}}}
+\newcommand{\vpsi}{{\vec{\psi}}}
+\newcommand{\vomega}{{\vec{\omega}}}
+
+\newcommand{\balpha}{{\boldsymbol{\alpha}}}
+\newcommand{\bbeta}{{\boldsymbol{\beta}}}
+\newcommand{\bgamma}{{\boldsymbol{\gamma}}}
+\newcommand{\bdelta}{{\boldsymbol{\delta}}}
+\newcommand{\bepsilon}{{\boldsymbol{\epsilon}}}
+\newcommand{\bvarepsilon}{{\boldsymbol{\barepslion}}}
+\newcommand{\bzeta}{{\boldsymbol{\zeta}}}
+\newcommand{\boldeta}{{\boldsymbol{\eta}}}
+\newcommand{\btheta}{{\boldsymbol{\theta}}}
+\newcommand{\bvartheta}{{\boldsymbol{\bartheta}}}
+\newcommand{\biota}{{\boldsymbol{\iota}}}
+\newcommand{\bkappa}{{\boldsymbol{\kappa}}}
+\newcommand{\blambda}{{\boldsymbol{\lambda}}}
+\newcommand{\bmu}{{\boldsymbol{\mu}}}
+\newcommand{\bnu}{{\boldsymbol{\nu}}}
+\newcommand{\bxi}{{\boldsymbol{\xi}}}
+\newcommand{\bpi}{{\boldsymbol{\pi}}}
+\newcommand{\bvarpi}{{\boldsymbol{\barpi}}}
+\newcommand{\brho}{{\boldsymbol{\rho}}}
+\newcommand{\bvarrho}{{\boldsymbol{\barrho}}}
+\newcommand{\bsigma}{{\boldsymbol{\sigma}}}
+\newcommand{\bvarsigma}{{\boldsymbol{\barsigma}}}
+\newcommand{\btau}{{\boldsymbol{\tau}}}
+\newcommand{\bupsilon}{{\boldsymbol{\upsilon}}}
+\newcommand{\bphi}{{\boldsymbol{\phi}}}
+\newcommand{\bvarphi}{{\boldsymbol{\varphi}}}
+\newcommand{\bchi}{{\boldsymbol{\chi}}}
+\newcommand{\bpsi}{{\boldsymbol{\psi}}}
+\newcommand{\bomega}{{\boldsymbol{\omega}}}
+
+\newcommand{\vGamma}{{\vec{\Gamma}}}
+\newcommand{\vDelta}{{\vec{\Delta}}}
+\newcommand{\vTheta}{{\vec{\Theta}}}
+\newcommand{\vLambda}{{\vec{\Lambda}}}
+\newcommand{\vXi}{{\vec{\Xi}}}
+\newcommand{\vPi}{{\vec{\Pi}}}
+\newcommand{\vSigma}{{\vec{\Sigma}}}
+\newcommand{\vUpsilon}{{\vec{\Upsilon}}}
+\newcommand{\vPhi}{{\vec{\Phi}}}
+\newcommand{\vPsi}{{\vec{\Psi}}}
+\newcommand{\vOmega}{{\vec{\Omega}}}
+
+\newcommand{\bGamma}{{\boldsymbol{\Gamma}}}
+\newcommand{\bDelta}{{\boldsymbol{\Delta}}}
+\newcommand{\bTheta}{{\boldsymbol{\Theta}}}
+\newcommand{\bLambda}{{\boldsymbol{\Lambda}}}
+\newcommand{\bXi}{{\boldsymbol{\Xi}}}
+\newcommand{\bPi}{{\boldsymbol{\Pi}}}
+\newcommand{\bSigma}{{\boldsymbol{\Sigma}}}
+\newcommand{\bUpsilon}{{\boldsymbol{\Upsilon}}}
+\newcommand{\bPhi}{{\boldsymbol{\Phi}}}
+\newcommand{\bPsi}{{\boldsymbol{\Psi}}}
+\newcommand{\bOmega}{{\boldsymbol{\Omega}}}
+
diff --git a/doc/manual.pdf b/doc/manual.pdf
new file mode 100644
index 0000000..63ad29c
Binary files /dev/null and b/doc/manual.pdf differ
diff --git a/doc/manual.tex b/doc/manual.tex
new file mode 100644
index 0000000..b916402
--- /dev/null
+++ b/doc/manual.tex
@@ -0,0 +1,836 @@
+\documentclass{article}
+\usepackage{macros,palatino,fancyhdr,lastpage}
+
+\lhead{\textbf{CONTRAfold 2.02 User Manual}}
+\chead{}
+\rhead{\thepage\ of \pageref{LastPage}}
+\lfoot{}
+\cfoot{}
+\rfoot{}
+
+\begin{document}
+
+ \begin{center}
+ \includegraphics[height=10.0cm]{logo.jpg}
+ \end{center}
+ \rule{5in}{0.15cm}
+ \begin{center}
+ \textbf{\Huge CONTRAfold 2.02} \\
+ \end{center}
+ \begin{center}
+ \textbf{\LARGE User Manual} \\
+ \end{center}
+ \vskip 1.0cm
+ \begin{center}
+ (Last modified: August 14, 2008)
+ \end{center}
+
+ \newpage
+
+ \pagestyle{fancy}
+ \setcounter{page}{1}
+
+ \ \vskip 1.0cm
+
+ \tableofcontents
+
+ \newpage
+ \section{Description}
+
+ CONTRAfold is a novel algorithm for the prediction of RNA secondary
+ structure based on conditional log-linear models (CLLMs). Unlike
+ previous secondary structure prediction programs, CONTRAfold is the
+ first fully probabilistic algorithm to achieve state-of-the-art
+ accuracy in RNA secondary structure prediction.
+
+ The CONTRAfold program was developed by Chuong Do at Stanford
+ University in collaboration with Daniel Woods, Serafim Batzoglou.
+ The source code for CONTRAfold is available for download from
+ \begin{center}
+ \emph{http://contra.stanford.edu/contrafold/}
+ \end{center}
+ under the BSD license. The CONTRAfold logo was designed by Marina
+ Sirota.
+
+ Any comments or suggestions regarding the program should be sent
+ to Chuong Do (\emph{chuongdo at cs.stanford.edu}).
+
+ \newpage
+ \section{License (BSD)}
+
+ \noindent
+ Copyright \copyright\ 2006, Chuong Do \\
+ All rights reserved.\\
+ \\
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ \begin{itemize}
+ \item
+ Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ \item
+ Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ \item
+ Neither the name of Stanford University nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+ \end{itemize}
+
+ \noindent
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ \newpage
+ \section{Installation}
+
+ At the moment, CONTRAfold is only available for Unix-based systems
+ (e.g., Linux). We will be porting CONTRAfold to other architectures
+ and making the binaries available.
+
+ \subsection{*nix installation}
+
+ To compile CONTRAfold from the source code (for a *nix machine):
+ \begin{enumerate}
+ \item
+ Download the latest version of the CONTRAfold source code from
+ \begin{center}
+ \emph{http://contra.stanford.edu/contrafold/download.html}
+ \end{center}
+ \item
+ Decompress the archive:
+ \begin{verbatim}
+ $ tar zxvf contrafold_v#_##.tar.gz\end{verbatim}
+ where the \#'s are replaced with the appropriate version
+ numbers for the tar.gz you want to install. This will create
+ a subdirectory called \texttt{contrafold} inside of the current
+ directory.
+ \item
+ Change to the \texttt{contrafold/src} subdirectory and compile the
+ program.
+ \begin{verbatim}
+ $ cd contrafold/src
+ $ make clean
+ $ make\end{verbatim}
+ \end{enumerate}
+ Now, your installation is complete!
+
+ \newpage
+ \section{Supported file formats}
+
+ In this section, we describe the input and output file formats supported by
+ the CONTRAfold program.
+
+ \subsection{Input file formats}
+ \label{sec:input-general}
+
+ CONTRAfold accepts input files which either contain only RNA
+ sequences or contain both sequences and (partial) structural
+ annotations.
+
+ For the file formats that support specification of (partial)
+ structural annotations (in particular, FASTA and BPSEQ), the
+ provided structures must obey the following properties:
+ \begin{enumerate}
+ \item Each position in the RNA sequence is marked as either
+ unpaired, paired to some specific nucleotide, or unknown.
+ \item If position $i$ is marked as pairing with position $j$, then
+ position $j$ must be marked as pairing with position $i$.
+ \item The (partial) structures specified must not have pseudoknots.
+ \item A position $i$ cannot be marked as pairing unless its specific
+ base-pairing partner has been specified.
+ \end{enumerate}
+ These structural annotations are generally ignored when performing
+ predictions, unless the \texttt{--constraints} flag is specified on
+ the command-line. These structural annotations are required for training CONTRAfold.
+
+ The three specific input file formats supported by
+ CONTRAfold are plain text, FASTA and BPSEQ. We describe each of
+ these formats in turn.
+
+ \subsubsection{Plain text format}
+ \label{sec:plain}
+
+ A plain text format file consists of one or more lines containing
+ RNA sequence data. Each of these lines may contain the letters `A',
+ `C', `G', `T', `U', or `N' in either upper or lower case (the output
+ of the program will retain the case of the input). Any T's are
+ automatically converted to U's. Any other letters are automatically
+ converted to N's. All whitespace (space, tab, newline) is ignored.
+ N's are treated as masked sequence positions which are ignored
+ during all calculations (i.e., any scoring terms involving an N will
+ be skipped). Other non-whitespace characters are not permitted.
+ Plain text files cannot contain any secondary structural annotation.
+
+ For example, the following is a valid plain text file:
+ \begin{verbatim}
+ NACGACAGUGUAUCACUAGUAcuuA
+ GUAUGUACUAUC
+
+ AGUAGUUGUUGUAGUUC\end{verbatim}
+ Note that the blank third line will be ignored, and the initial `N'
+ character will be treated as a placeholder character which appears
+ in the output folded RNA but makes no contribution to the computations.
+
+ \subsubsection{FASTA format}
+ \label{sec:fasta}
+
+ A FASTA format file consists of:
+ \begin{enumerate}
+ \item A \textbf{single header line} beginning with the character `$>$' followed by
+ a text description of the RNA sequence. Note that the description
+ must fit on the same line as the `$>$' character.
+ \item One or more lines containing \textbf{RNA sequence data}. Each
+ of these lines may contain the letters `A', `C', `G', `T', `U' or
+ `N' in either upper or lower case (the output of the program will
+ retain the case of the input). Any T's are automatically
+ converted to U's. Any other letters are automatically converted
+ to N's. All whitespace (space, tab, newline) is ignored. N's are
+ treated as masked sequence positions which are ignored during all
+ calculations (i.e., any scoring terms involving an N will be
+ skipped). Other non-whitespace characters are not permitted.
+ \item (Optional) A \textbf{structural annotation} for the sequence provided above.
+ The structural annotation requires:
+ \begin{enumerate}
+ \item A \textbf{single header line} beginning
+ with the character `$>$' followed by a description (any text after the
+ description is ignored)
+ \item One or more lines of \textbf{parenthesized structural
+ annotation}. These lines provided a structural annotation for
+ each nucleotide in the RNA sequence using a sequence of `(',
+ `)', `.', and `?' characters. A nucleotide annotated with `('
+ pairs with the nucleotide annotated with the matching `)'. A
+ `.' character indicates that the corresponding nucleotide is
+ unpaired. Finally, a `?' indicates a position for which the
+ proper matching (either paired or unpaired) is unknown. Observe
+ that the parentheses in the input file must be well-balanced,
+ i.e., for each left parenthesis, the corresponding pairing
+ position must be marked with a right parenthesis (not a `?'),
+ and vice versa. Since CONTRAfold generates only
+ non-pseudoknotted structure predictions, the proper pairing will
+ always be unambiguous.
+ \end{enumerate}
+ \end{enumerate}
+
+ For example, the following is a valid FASTA file:
+ \begin{verbatim}
+ >sequence
+ acggagaGUGUUGAU
+ CUGUGUGUUACUACU
+ caucuguaguucuag
+ uugua\end{verbatim}
+
+ Similarly, the following is a valid FASTA file with a structural annotation:
+ \begin{verbatim}
+ >sequence
+ acguuggcu
+ >structure
+ (??(..).)\end{verbatim}
+
+ But the following is not (starts with the wrong header character):
+ \begin{verbatim}
+ # sequence
+ ATGACGGT\end{verbatim}
+
+ Also, the following file is not valid (because the parenthesized structure is not
+ properly balanced):
+ \begin{verbatim}
+ >sequence
+ acguuggcu
+ >structure
+ (..(..).?\end{verbatim}
+
+ Finally, the following file is not valid (because the structural information header is
+ missing):
+ \begin{verbatim}
+ >sequence
+ acguuggcu
+ (??(..).)\end{verbatim}
+
+ \subsubsection{BPSEQ format}
+ \label{sec:bpseq}
+
+ A BPSEQ format file is used for describing a single RNA sequence and its annotated
+ secondary structure. This file format contains exactly one line for each nucleotide in
+ an RNA sequence. The $i$th line of the file contains three items
+ separated by single spaces:
+ \begin{enumerate}
+ \item The integer $i$ (with $i=1$ representing the first nucleotide).
+ \item The $i$th character of the RNA sequence (which may be `A',
+ `C', `G', `T', `U', or `N' in either upper or lower case; the
+ output of the program will retain the case of the input; any T's
+ are automatically converted to U's; any other letters are
+ automatically converted to N's). N's are treated as masked
+ sequence positions which are ignored during all calculations
+ (i.e., any scoring terms involving an N will be skipped)
+ \item The index of the character to which the $i$th character base
+ pairs, if known. If the character is known to be unpaired, then 0
+ appears here. If it is unknown whether this character base-pairs,
+ then a -1 appears here. Note if the BPSEQ file specifies that
+ character $i$ base-pairs with character $j$, then it must also
+ specify that character $j$ base-pairs with character $i$.
+ \end{enumerate}
+
+ For example, the following is a BPSEQ format file:
+ \begin{verbatim}
+ 1 A 7
+ 2 G -1
+ 3 U -1
+ 4 C 0
+ 5 c -1
+ 6 c -1
+ 7 u 1\end{verbatim}
+ in which it is known that the first and last positions base pair,
+ and the middle position does not base pair. However, the folding of
+ the other positions is unknown.
+
+ However, the following is not a valid BPSEQ format file:
+ \begin{verbatim}
+ 2 G -1
+ 3 U -1
+ 1 A 7
+ 4 C 0
+ 5 C -1
+ 6 C -1
+ 7 U 1\end{verbatim}
+ since all nucleotides in the file must appear in order.
+
+ Finally, the following is also not a valid BPSEQ format file:
+ \begin{verbatim}
+ 1 A 7
+ 2 G -1
+ 3 U -1
+ 4 C 0
+ 5 c -1
+ 6 c -1
+ 7 u -1\end{verbatim}
+ since the first position is specified as pairing with the last
+ position, but not vice versa.
+
+ \subsection{Output formats}
+
+ The results of a CONTRAfold secondary structure prediction are given
+ in either FASTA, BPSEQ, or posteriors format. We describe each
+ of these in detail.
+
+ \subsubsection{FASTA format}
+
+ The FASTA output format is identical to the FASTA input format (see
+ Section~\ref{sec:fasta}) with structures. Since CONTRAfold provides
+ predictions for the pairing or non-pairing of every single
+ nucleotides, no ?'s will appear in the output.
+
+ The output will always consist of exactly four lines, where the
+ first and third lines are FASTA headers for the sequence and
+ structure, respectively, the second line specifies the sequence
+ data, and the fourth line specifies the parenthesized structure.
+ If a FASTA file is provided as input, then the header in the FASTA input
+ file will be used as the first line header in the output file; otherwise,
+ the (relative) path to the input file is used as the header.
+ The FASTA header for the structure will always be ``structure.''
+ Since CONTRAfold generates only non-pseudoknotted structure predictions,
+ the proper pairing will always be unambiguous.
+
+ For example, the following parenthesized structure is a completion of the
+ valid BPSEQ file from Section~\ref{sec:bpseq}, assuming that the input file
+ is specified in the file \texttt{data/input}.
+ \begin{verbatim}
+ >data/input
+ AGUCccu
+ >structure
+ ((...))\end{verbatim}
+
+ \subsubsection{BPSEQ format}
+
+ The BPSEQ output format is identical to the BPSEQ input format (see
+ Section~\ref{sec:bpseq}). Since CONTRAfold provides predictions for
+ the pairing or non-pairing of every single nucleotide, no -1's will
+ appear in the output.
+
+ \subsubsection{Posteriors format}
+ \label{sec:posteriors}
+
+ The posteriors output format is distinct from the BPSEQ and FASTA
+ formats in that it does \emph{not} provide a single prediction of RNA
+ secondary structure. Instead, it provides a sparse representation of the
+ base pairing posterior probabilities for pairs of letters in the RNA
+ sequence. Specifically, the $i$th line contains
+ \begin{enumerate}
+ \item The integer $i$.
+ \item The $i$th character of the file.
+ \item A space-separated list of base-pairing probabilities of the form
+ $j$:$p_{ij}$, where $j>i$ is the index of nucleotide to which the $i$th
+ nucleotide might pair, and $p_{ij}$ is the probability that this
+ base pairing occurs.
+ \end{enumerate}
+ For example, the following is a posteriors format output:
+ \begin{verbatim}
+ 1 A 7:0.035 9:0.10
+ 2 G 6:0.036 8:0.11
+ 3 U
+ 4 C
+ 5 C
+ 6 C
+ 7 U
+ 8 C
+ 9 A\end{verbatim}
+ In the above, we see that nucleotide 2 has an 11\% probability of pairing
+ to nucleotide 8. Note that each pairing probability is reported only once
+ (i.e., on the $i$th line, we show only the pairing probabilities to nucleotides
+ $j > i$ which appear \emph{after} the $i$th position in the RNA sequence).
+
+ \newpage
+ \section{Usage}
+ \label{sec:usage}
+
+ CONTRAfold has two modes of operation: prediction mode and training
+ mode.
+ \begin{itemize}
+ \item In ``prediction'' mode, CONTRAfold folds new RNA
+ sequences using either the default parameters or a CONTRAfold-format
+ parameter file.
+ \item In ``training'' mode, CONTRAfold learns new parameters from
+ training data consisting of RNA sequences with pre-existing structural annotations.
+ \end{itemize}
+ Most users of this software will likely only ever need to use
+ CONTRAfold's prediction functionality. The optimization procedures
+ used in the training algorithm are fairly computationally expensive;
+ for this purpose, the CONTRAfold program is designed to support
+ automatic training in a parallel computing environment via MPI
+ (Message Passing Interface).
+
+ \subsection{Prediction mode}
+
+ In prediction mode, CONTRAfold predicts the secondary structure of one
+ or more unfolded input RNA sequence, and prints the result to either
+ the console or output files. The basic syntax for running CONTRAfold
+ in prediction mode is
+ \begin{verbatim}
+ $ ./contrafold predict [OPTIONS] INFILE(s)\end{verbatim}
+
+ \subsubsection{A single input file}
+
+ For single sequence prediction, CONTRAfold generates FASTA
+ output (see Section~\ref{sec:fasta}) to the console (i.e., stdout) by default.
+
+ For example, suppose the file ``seq.fasta'' contains a FASTA
+ formatted sequence to be folded. Then the command
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta\end{verbatim}
+ will fold the sequence and display the results to the console in
+ FASTA format.
+
+ CONTRAfold can also write parenthesized FASTA, BPSEQ, or posteriors formatted
+ output to an output file. To write FASTA output to a file,
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --parens seq.parens\end{verbatim}
+ To write BPSEQ output to a file,
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --bpseq seq.bpseq\end{verbatim}
+ To write all posterior pairing probabilities greater than
+ 0.001 to a file,
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --posteriors \
+ 0.001 seq.posteriors\end{verbatim}
+ Note that here, the backslash character is used to denote that a
+ command-line is broken over several lines; it is not necessary if you
+ type everything on a single line.
+
+ Finally, it is also possible to obtain multiple different types of output
+ simultaneously. For example, the command
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --parens \
+ seq.parens --bpseq seq.bpseq --posteriors \
+ 0.001 seq.posteriors\end{verbatim}
+ will generate three different output files simultaneously.
+
+ \subsubsection{Multiple input files}
+
+ For multiple input files, CONTRAfold generates FASTA
+ output (see Section~\ref{sec:fasta}) to the console by default.
+ The output is presented in the order of the input files on the
+ command-line. Using console output is not allowed when MPI is
+ enabled, or when certain other options are selected; in general,
+ we recommend the usage of explicitly specified output files or
+ subdirectories when dealing with multiple input files (see below).
+
+ CONTRAfold can also write FASTA, BPSEQ, or posteriors formatted
+ output to several output files. In particular, CONTRAfold creates
+ a subdirectory (whose name is specified by the user) in which to store
+ the results, and writes each prediction to a file in that subdirectory
+ of the same name as the original file being processed.
+
+ For example, suppose that the files ``seq1.fasta'' and ``seq2.fasta'' each
+ contain a FASTA formatted sequence to be folded. Then the command
+ \begin{verbatim}
+ $ ./contrafold predict seq1.fasta seq2.fasta \
+ --parens output\end{verbatim}
+ will create a subdirectory called \texttt{output} and will place the results
+ in the files \texttt{output/seq1.fasta} and \texttt{output/seq2.fasta}.
+
+ Alternatively,
+ \begin{verbatim}
+ $ ./contrafold predict seq1.fasta seq2.fasta \
+ --bpseq output\end{verbatim}
+ and
+ \begin{verbatim}
+ $ ./contrafold predict seq1.fasta seq2.fasta \
+ --posteriors 0.001 output\end{verbatim}
+ generate BPSEQ and posteriors formatted outputs instead.
+
+ Observe that if multiple input files have the same base name, then
+ overwriting of output may occur. For example, if the input files list
+ contains two different files called \texttt{seq/input} and \texttt{input},
+ the output subdirectory will contain only a single file called \texttt{input}.
+
+ Finally, you may also generate multiple types of output simultaneously,
+ as before. Remember, however, to use different output subdirectory names
+ for each. The command
+ \begin{verbatim}
+ $ ./contrafold predict seq1.fasta seq2.fasta --parens \
+ parens_output --bpseq bpseq_output --posteriors \
+ 0.001 posteriors_output\end{verbatim}
+ generates three different output subdirectories (parens\_output,
+ bpseq\_output, and posteriors\_output) each containing two files
+ (seq1.fasta, seq2.fasta).
+
+ \subsubsection{Optional arguments}
+
+ CONTRAfold accepts a number of optional arguments, which alter the default behavior
+ of the program. To use any of these options, simply pass the option to the CONTRAfold
+ program on the command line. For example,
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --viterbi \
+ --noncomplementary\end{verbatim}
+ The optional arguments include:
+ \begin{description}
+ \item \texttt{--gamma $\gamma$} \\
+ \\
+ This option sets the sensitivity/specificity tradeoff parameter for the maximum
+ expected accuracy decoding algorithm. In particular, consider a scoring
+ system in which each nucleotide which is correctly base paired gets a
+ score of $\gamma$, and each nucleotide which is correctly not base paired gets
+ a score of 1. Then, CONTRAfold finds the folding of the input sequence with
+ maximum \emph{expected accuracy} with respect to this scoring system.
+
+ Intuitively,
+ \begin{itemize}
+ \item If $\gamma > 1$, the parsing algorithm emphasizes sensitivity.
+ \item If $0 \le \gamma \le 1$, the parsing algorithm emphasizes specificity.
+ \end{itemize}
+ In addition, if the user specifies any value of $\gamma < 0$, then CONTRAfold
+ tries trade-off parameters of $2^k$ for $k \in \set{-5,-4,\ldots,10}$, and
+ generates one output file for each trade-off parameter. Note that this
+ must be used in conjunction with either
+ \texttt{--parens}, \texttt{--bpseq}, or \texttt{--posteriors} in order to
+ allow for writing to output files.
+
+ For example, the command
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --gamma 100000\end{verbatim}
+ runs the maximum expected accuracy placing almost all emphasis on sensitivity
+ (predict correct base pairs).
+
+ The naming convention used by CONTRAfold when $\gamma < 0$ follows somewhat
+ different conventions from normal. Running
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --gamma -1 \
+ --bpseq output\end{verbatim}
+ will create the files
+ \begin{verbatim}
+ output/output.gamma=0.031250
+ output/output.gamma=0.062500
+ ...
+ output/output.gamma=1024.000000\end{verbatim}
+ For multiple input files,
+ \begin{verbatim}
+ $ ./contrafold predict seq1.fasta seq2.fasta \
+ --gamma -1 --bpseq output\end{verbatim}
+ will generate
+ \begin{verbatim}
+ output/output.gamma=0.031250/seq1.fasta
+ output/output.gamma=0.031250/seq2.fasta
+ ...
+ output/output.gamma=1024.000000/seq1.fasta
+ output/output.gamma=1024.000000/seq2.fasta.\end{verbatim}
+
+ Like before, multiple types of output (parens, BPSEQ, posteriors) may
+ be requested simultaneously.
+
+ \item \texttt{--viterbi} \\
+ \\
+ This option uses the Viterbi algorithm to compute structures rather than the
+ maximum expected accuracy (posterior decoding) algorithm. The structures generated
+ by the Viterbi option tend to be of slightly lower accuracy than posterior decoding,
+ so this option is not enabled by default.
+ \item \texttt{--noncomplementary} \\
+ \\
+ This option uses a folding model that allows non \texttt{AU}/\texttt{CG}/\texttt{GU}
+ pairings in the CONTRAfold output. This option is slower and generally slightly less
+ accurate than the default option of allowing only ``canonical'' base-pairings.
+ \item \texttt{--constraints} \\
+ \\
+ This option requires the use of BPSEQ format input files.
+ By default, any base pairings that are included in the BPSEQ file
+ above are ignored. However, if the \texttt{--constraints} flag is used,
+ then any base pairings in an input BPSEQ file are treated as
+ constraints on the allowed structures. In particular,
+ \begin{enumerate}
+ \item A nucleotide mapping to a positive index i is constrained to
+ base-pair with nucleotide i.
+ \item A nucleotide mapping to 0 is constrained to be unpaired.
+ \item A nucleotide mapping to -1 is unconstrained.
+ \end{enumerate}
+ For example, given the following input BPSEQ file:
+ \begin{verbatim}
+ 1 A -1
+ 2 C -1
+ 3 G -1
+ 4 U 7
+ 5 U 0
+ 6 C 0
+ 7 G 4
+ 8 C -1
+ 9 G -1
+ 10 U -1\end{verbatim}
+ and the \texttt{--constraints} flag, then CONTRAfold will assume that
+ positions 4 and 7 are constrained to be base-pairing, while positions
+ 5 and 6 are constrained to be unpaired. The base-pairing of the
+ remaining positions is decided by CONTRAfold. The constraints must follow
+ the restrictions described in Section~\ref{sec:input-general}.
+ \item \texttt{--params PARAMSFILE} \\
+ \\
+ This option uses a trained CONTRAfold parameter file instead of the
+ default program parameters. The format of the parameter file should
+ be the same as the \texttt{contrafold.params.complementary} file in the CONTRAfold
+ source code; each line contains a single parameter name and a parameter
+ value.
+ \item \texttt{--version} \\
+ \\
+ Display the program version number.
+ \item \texttt{--verbose} \\
+ \\
+ Show detailed console output.
+ \item \texttt{--partition} \\
+ \\
+ Compute the log partition function for the input sequence. This option
+ may be used in conjunction with the \texttt{--constraints} option in order
+ to determine the CONTRAfold ``energy'' of a given RNA secondary structure
+ specified in a BPSEQ file. For example, to compute the energy of a
+ Viterbi parse generated via
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --viterbi \
+ --bpseq seq.bpseq\end{verbatim}
+ we can simply run
+ \begin{verbatim}
+ $ ./contrafold predict seq.bpseq --constraints \
+ --partition\end{verbatim}
+ Some quick notes regarding the partition function:
+ \begin{itemize}
+ \item
+ When used in conjunction with partial constraints (i.e., only some of the
+ mappings in the input BPSEQ file are -1's; see above), then this option
+ computes the log of the summed unnormalized probabilities for all structures consistent
+ with the partial constraints.
+ \item
+ In order to compute the log of the summed \emph{probabilities} (which are
+ normalized as opposed to the quantities mentioned above), you must also
+ run
+ \begin{verbatim}
+ $ ./contrafold predict seq.bpseq --partition\end{verbatim}
+ and subtract this log partition value from the previous log partition
+ value described above. Note that this quantity will always be greater than
+ or equal to the log-partition above, implying that the log of the summed
+ probabilities is necessarily non-positive (which makes sense as probabilities
+ are at most 1).
+ \end{itemize}
+ \end{description}
+
+ \subsection{Training mode}
+
+ In training mode, CONTRAfold infers a parameter set using RNA sequences
+ with known (or partially known) secondary structures in BPSEQ format. By
+ default, CONTRAfold uses the L-BFGS algorithm for optimization.
+
+ For example, suppose \texttt{input/*.bpseq} refers to a collection of 100 files
+ which represent sequences with known structures. Calling
+ \begin{verbatim}
+ $ ./contrafold train input/*.bpseq\end{verbatim}
+ instructs CONTRAfold to learn parameters for predict all structures
+ in
+ \begin{verbatim}
+ input/*.bpseq\end{verbatim}
+ without using any regularization. The learned parameters
+ after each iteration of the optimization algorithm are stored in
+ \begin{verbatim}
+ optimize.params.iter1
+ optimize.params.iter2
+ ...\end{verbatim}
+ in the current directory. The final parameters are stored in
+ \begin{verbatim}
+ optimize.params.final\end{verbatim}
+ and a log file describing the optimization is stored in
+ \begin{verbatim}
+ optimize.log
+ \end{verbatim}
+ \emph{In general, running CONTRAfold without regularization is almost always
+ a bad idea because of overfitting}. There are currently two ways to
+ use regularization that are supported in the CONTRAfold program:
+ \begin{enumerate}
+ \item
+ Regularization may be \emph{manually specified}. The current build of
+ CONTRAfold uses 15 regularization hyperparameters, each of which is
+ used for some subset of the parameters. To specify a single value shared
+ between all of the regularization hyperparameters manually, one can use the \texttt{--regularize} flag.
+ For example,
+ \begin{verbatim}
+ $ ./contrafold train --regularize 1 input/*.bpseq\end{verbatim}
+ uses a regularization constant of 1 for each hyperparameter. In
+ general, we recommend that you do not perform training yourself
+ unless you know what you are doing; also do not hesitate to ask us.
+ \item
+ The recommended usage is to use CONTRAfold's holdout cross-validation
+ procedure to \emph{automatically select} regularization constants.
+ To reserve a fraction $p$ of the training data as a holdout set, run
+ CONTRAfold with the \texttt{--holdout $p$} flag.
+
+ For example, to reserve $1/4^\text{th}$ of the training set for holdout
+ cross-validation, use
+ \begin{verbatim}
+ $ ./contrafold train --holdout 0.25 \
+ input/*.bpseq\end{verbatim}
+ Note that the \texttt{--holdout} and \texttt{--regularize} flags should
+ not be used simultaneously.
+ \end{enumerate}
+
+ \newpage
+ \section{Visualization of folded RNAs}
+
+ Besides the main program, the CONTRAfold package contains some additional
+ tools for visualization of folded RNAs:
+ \begin{itemize}
+ \item \texttt{make\_coords}: generates a set of coordinates for plotting
+ a CONTRAfold BPSEQ file.
+ \item \texttt{plot\_rna}: converts a set of coordinates and a BPSEQ file
+ into a viewable PNG.
+ \end{itemize}
+ In the following subsections, we describe the installation and use of
+ these two tools for RNA visualization.
+
+ \subsection{Installation}
+
+ Currently, only UNIX installation is supported.
+
+ \subsubsection{*nix installation}
+
+ To compile CONTRAfold visualization tools from the source code
+ (for a *nix machine):
+ \begin{enumerate}
+ \item
+ Install the \texttt{libgd} graphics development library, available
+ from
+ \begin{center}
+ \textit{http://www.boutell.com/gd/}
+ \end{center}
+ \item
+ Install the \texttt{libpng} PNG image library, available from
+ \begin{center}
+ \textit{http://www.libpng.org/pub/png/libpng.html}
+ \end{center}
+ \item
+ Compile the visualization tools:
+ \begin{verbatim}
+ $ make viz\end{verbatim}
+ \end{enumerate}
+
+ \subsection{Usage}
+
+ Given an input FASTA file, generating an image of the predicted
+ CONTRAfold structure involves three steps:
+ \begin{enumerate}
+ \item
+ Generate a secondary structure prediction in BPSEQ format:
+ \begin{verbatim}
+ $ ./contrafold predict seq.fasta --bpseq \
+ seq.bpseq\end{verbatim}
+ \item
+ Run the \texttt{make\_coords} program to generate an RNA layout:
+ \begin{verbatim}
+ $ ./make_coords output.bpseq output.coords\end{verbatim}
+ The resulting coordinates are placed in the \texttt{output.coords}
+ file.
+ \item
+ Run the \texttt{plot\_rna} program to convert the
+ layout into a PNG image:
+ \begin{verbatim}
+ $ ./plot_rna output.bpseq output.coords \
+ --png output.png\end{verbatim}
+ The resulting PNG is placed in the \texttt{output.png} file and
+ can be viewed with a web browser such as Mozilla Firefox.
+ Alternatively, EPS format output is also available:
+ \begin{verbatim}
+ $ ./plot_rna output.bpseq output.coords \
+ --eps output.eps\end{verbatim}
+ \end{enumerate}
+
+ \subsection{Additional options}
+
+ The \texttt{plot\_rna} has a couple of options which you can
+ use to control the generated PNG files:
+ \begin{description}
+ \item \texttt{--posteriors $posteriorsfile$} \\
+ \\
+ If a CONTRAfold posteriors file is also available,then
+ using the above option will generate a PNG file in which
+ the letters of each RNA nucleotide is colored according
+ to posterior probability confidence. Black letters indicate
+ high confidence structure whereas lighter gray letters indicate
+ lower confidence structure.
+ \item \texttt{--title "$title$"} \\
+ \\
+ This option allows the user to annotate the generated
+ RNA image with a title. Note that the title string should
+ be surrounded with double quotation marks so as to ensure
+ that it is interpreted as a single argument to the program.
+ \end{description}
+
+ In general, the CONTRAfold visualization tools generate RNA layouts which
+ tend to be visually pleasing. The layout algorithm uses a simple
+ deterministic layout rule, followed by a gradient-based optimization
+ procedure. This type of procedure is not guaranteed to generate
+ non-overlapping layouts for all RNA structures; in practice, however
+ the visualization tools can provide reasonable visualizations for
+ a large range of RNA structures.
+
+ \newpage
+ \section{Citing CONTRAfold}
+
+ If you use CONTRAfold in your work, please cite:
+ \begin{quote}
+ Do, C.B., Woods, D.A., and Batzoglou, S. (2006) CONTRAfold: RNA
+ secondary structure prediction without physics-based models.
+ \emph{Bioinformatics}, 22(14): e90-e98.
+ \end{quote}
+
+ \noindent
+ Other relevant references include:
+ \begin{quote}
+ Do, C.B., Foo, C.-S., Ng, A.Y. (2007) Efficient multiple
+ hyperparameter learning for log-linear models. In \emph{Advances
+ in Neural Information Processing Systems} 20.
+ \end{quote}
+
+\end{document}
diff --git a/doc/supplementary.pdf b/doc/supplementary.pdf
new file mode 100644
index 0000000..8cfad34
Binary files /dev/null and b/doc/supplementary.pdf differ
diff --git a/doc/supplementary.tex b/doc/supplementary.tex
new file mode 100644
index 0000000..0b41d14
--- /dev/null
+++ b/doc/supplementary.tex
@@ -0,0 +1,1054 @@
+\documentclass{article}
+
+\usepackage{macros,fullpage}
+\usepackage[boxed,linesnumbered]{algorithm2e}
+
+\newcommand{\A}{\texttt{A}}
+\newcommand{\U}{\texttt{U}}
+\newcommand{\G}{\texttt{G}}
+\newcommand{\C}{\texttt{C}}
+\newcommand{\N}{\texttt{N}}
+\newcommand{\doouter}{\varphi_\text{do outer}}
+\newcommand{\Adoouter}{\alpha_\text{do outer}}
+\newcommand{\Bdoouter}{\beta_\text{do outer}}
+\newcommand{\outerunpaired}{\phi_\text{outer unpaired}}
+\newcommand{\outerbranch}{\phi_\text{outer branch}}
+\newcommand{\vhairpin}{\varphi_\text{hairpin}}
+\newcommand{\hairpinlength}{\phi_\text{hairpin length}}
+\newcommand{\hairpinbase}{\phi_\text{hairpin base}}
+\newcommand{\hairpinextend}{\phi_\text{hairpin extend}}
+\newcommand{\internallength}{\phi_\text{internal length}}
+\newcommand{\internalfull}{\phi_\text{internal full}}
+\newcommand{\internalasymmetry}{\phi_\text{internal asymmetry}}
+\newcommand{\loopbase}{\phi_\text{loop base}}
+\newcommand{\loopmultiplier}{\phi_\text{loop multiplier}}
+\newcommand{\mismatch}{\phi_\text{terminal mismatch}}
+\newcommand{\multimismatch}{\varphi_\text{multi mismatch}}
+\newcommand{\sbpstackingleft}{\phi_\text{single base pair stacking left}}
+\newcommand{\sbpstackingright}{\phi_\text{single base pair stacking right}}
+\newcommand{\closing}{\phi_\text{terminal closing}}
+\newcommand{\helix}{\varphi_\text{helix}}
+\newcommand{\helixstacking}{\phi_\text{helix stacking}}
+\newcommand{\helixbasepair}{\phi_\text{helix base pair}}
+\newcommand{\helixextend}{\phi_\text{helix extend}}
+\newcommand{\helixclosing}{\phi_\text{helix closing}}
+\newcommand{\helixchange}{\phi_\text{helix change}}
+\newcommand{\helixlength}{\varphi_\text{helix length}}
+\newcommand{\doloop}{\varphi_\text{do loop}}
+\newcommand{\Adoloop}{\alpha_\text{do loop}}
+\newcommand{\Bdoloop}{\beta_\text{do loop}}
+\newcommand{\single}{\varphi_\text{single}}
+\newcommand{\dosingle}{\varphi_\text{do single}}
+\newcommand{\Adosingle}{\alpha_\text{do single}}
+\newcommand{\Bdosingle}{\beta_\text{do single}}
+\newcommand{\multi}{\varphi_\text{multi}}
+\newcommand{\domulti}{\varphi_\text{do multi}}
+\newcommand{\Adomulti}{\alpha_\text{do multi}}
+\newcommand{\Bdomulti}{\beta_\text{do multi}}
+\newcommand{\startmulti}{\varphi_\text{start multi}}
+\newcommand{\Astartmulti}{\alpha_\text{start multi}}
+\newcommand{\Bstartmulti}{\beta_\text{start multi}}
+\newcommand{\multibase}{\phi_\text{multi base}}
+\newcommand{\multiunpaired}{\phi_\text{multi unpaired}}
+\newcommand{\multipaired}{\phi_\text{multi paired}}
+\newcommand{\generic}{\varphi_\text{loop}}
+\newcommand{\bulge}{\varphi_\text{bulge}}
+\newcommand{\bulgelength}{\phi_\text{bulge length}}
+\newcommand{\internal}{\varphi_\text{internal}}
+\newcommand{\dohelix}{\varphi_\text{do helix}}
+\newcommand{\Adohelix}{\alpha_\text{do helix}}
+\newcommand{\Bdohelix}{\beta_\text{do helix}}
+\newcommand{\Endpair}{\varphi_\text{end pair}}
+\newcommand{\AEndpair}{\alpha_\text{end pair}}
+\newcommand{\BEndpair}{\beta_\text{end pair}}
+
+\begin{document}
+
+ \begin{center}
+ \textbf{\large Supplementary Material for \vskip 0.5cm \Large CONTRAfold: RNA Secondary Structure Prediction without Physics-Based Models} \\
+ \vskip 0.5cm
+ Chuong B. Do, Daniel A. Woods, and Serafim Batzoglou \\
+ \vskip 0.5cm
+ Stanford University, Stanford, CA 94305, USA, \\
+ \texttt{\{chuongdo,danwoods,serafim\}@cs.stanford.edu}, \\
+ WWW home page: \texttt{http://contra.stanford.edu/contrafold/}
+ \end{center}
+ \vskip 0.5cm
+
+ \begin{abstract}
+ In this supplementary material, we describe in full the structured
+ conditional log-linear model (structured CLLM) used in the CONTRAfold
+ program. We also provide detailed pseudocode explicitly showing the
+ dynamic programming recurrences needed to reproduce the CONTRAfold
+ algorithm, specifically CONTRAfold version 1.10.
+ \end{abstract}
+
+ \section{Preliminaries}
+
+ Let $\Sigma = \set{\A,\C,\G,\U,\N}$ be an alphabet, and consider
+ some string $x \in \Sigma^L$ of length $L$. In the RNA secondary
+ structure prediction problem, $x$ represents an unfolded RNA string,
+ and $x_i$ refers to the $i$th character of $x$, for $i=1,\ldots,L$.
+ For ease of notation, we say that there are $L+1$
+ \emph{positions} corresponding to $x$---one position at each of the two
+ ends of $x$, and $L-1$ positions between consecutive nucleotides of $x$.
+ We will assign indices ranging from 0 to $L$ for each position. This is
+ illustrated in Figure~\ref{fig:positions}.
+
+ Let $\cY$ be the space of all possible structures of a sequence $x$.
+ Structured conditional log-linear models (structured CLLMs) define the
+ conditional probability of a structure $y \in \cY$ given an input RNA sequence $x$ as
+ \begin{align}
+ P(y \mid x; \bw) &= \frac{\exp (\bw^T \bF(x,y))}{\sum_{y' \in \cY} \exp (\bw^T \bF(x,y'))} \\
+ &= \frac{1}{Z(x)} \cdot \exp (\bw^T \bF(x,y)) \label{cllmdef}
+ \end{align}
+ where $\bF(x,y) \in \reals^n$ is an $n$-dimensional vector of feature counts describing $x$ and $y$,
+ $\bw \in \reals^n$ is an $n$-dimensional vector
+ of parameters, and $Z(x)$ (known as the \emph{partition function} of a sequence $x$) is a normalization
+ constant ensuring that $P(y \mid x; \bw)$ forms a legal probability distribution over the
+ space of possible structures $\cY$. In this representation, the ``weight'' associated with a structure
+ $y$ for a sequence $x$ is $\exp(\bw^T\bF(x,y))$. Because the \emph{logarithm} of the weight is
+ a \emph{linear} function of the features $\bF(x,y)$, this is typically known as the
+ \emph{log-linear} representation of a CRF.
+
+ Now, consider the following reparameterization of \eqref{cllmdef}. For each entry $w_i$ of $\bw$,
+ define $\phi_i = \exp(w_i)$. It follows that \eqref{cllmdef} may be rewritten as
+ \begin{align}
+ P(y \mid x; \bw) &= \frac{1}{Z(x)} \cdot \prod_{i=1}^n \phi_i^{F_i(x,y)}
+ \end{align}
+ where $F_i(x,y)$ is the $i$th component of $\bF(x,y)$. In this alternative representation, the
+ weight associated with a structure $y$ for a sequence $x$ is a product, $\prod_{i=1}^n \phi_i^{F_i(x,y)}$.
+ We refer to this as the \emph{potential} representation of a CRF, where each parameter $\phi_i$ is called
+ a \emph{potential}.
+
+ In Figure~\ref{fig:potentials},
+ we list all of the potentials $\set{\phi_i}$ involved in scoring a structure $y$. Then, in Section~\ref{sec:featurecounts}, we
+ define the feature counts $\set{F_i(x,y)}$ for a sequence $x$ and its structure $y$. Finally, in the remaining sections,
+ we describe the dynamic programming recurrences needed to perform inference using our probabilistic model.
+
+ \begin{figure}[t]
+ \centering
+ \rput{0}(-2.0,-1){\texttt{.}}
+ \rput{0}(-1.8,-1){\texttt{A}}
+ \rput{0}(-1.6,-1){\texttt{.}}
+ \rput{0}(-1.4,-1){\texttt{G}}
+ \rput{0}(-1.2,-1){\texttt{.}}
+ \rput{0}(-1.0,-1){\texttt{A}}
+ \rput{0}(-0.8,-1){\texttt{.}}
+ \rput{0}(-0.6,-1){\texttt{G}}
+ \rput{0}(-0.4,-1){\texttt{.}}
+ \rput{0}(-0.2,-1){\texttt{A}}
+ \rput{0}(0.0,-1){\texttt{.}}
+ \rput{0}(0.2,-1){\texttt{C}}
+ \rput{0}(0.4,-1){\texttt{.}}
+ \rput{0}(0.6,-1){\texttt{U}}
+ \rput{0}(0.8,-1){\texttt{.}}
+ \rput{0}(1.0,-1){\texttt{U}}
+ \rput{0}(1.2,-1){\texttt{.}}
+ \rput{0}(1.4,-1){\texttt{C}}
+ \rput{0}(1.6,-1){\texttt{.}}
+ \rput{0}(1.8,-1){\texttt{U}}
+ \rput{0}(2.0,-1){\texttt{.}}
+ \rput{0}(-2.5,-2){position 0}
+ \rput{0}(2.5,-2){position $L$}
+ \rput{0}(-1,-2.5){position 4}
+ \rput{0}(1,-2.5){position 5}
+ \rput{0}(0,0){nucleotide 5}
+ \psline{->}(0,-0.25)(-0.15,-0.75)
+ \psline{->}(-1,-2.25)(-0.45,-1.25)
+ \psline{->}(1,-2.25)(0.1,-1.25)
+ \psline{->}(-2.5,-1.75)(-2.1,-1.25)
+ \psline{->}(2.5,-1.75)(2.1,-1.25)
+ \vskip 2.85cm
+ \caption{Positions in a sequence of length $L=10$.}
+ \label{fig:positions}
+ \end{figure}
+
+ \begin{figure}
+ \begin{center}
+ \begin{tabular}{lll}
+ $\hairpinbase$ & $\hairpinlength[\cdot]$ & $\helixbasepair(\cdot,\cdot)$ \\
+ $\hairpinextend$ & $\helixchange[\cdot]$ & $\helixclosing(\cdot,\cdot)$ \\
+ $\helixextend$ & $\bulgelength[\cdot]$ & $\sbpstackingleft((\cdot,\cdot),\cdot)$ \\
+ $\multibase$ & $\internallength[\cdot]$ & $\sbpstackingright((\cdot,\cdot),\cdot)$ \\
+ $\multiunpaired$ & $\internalasymmetry[\cdot]$ & $\mismatch((\cdot,\cdot),\cdot,\cdot)$ \\
+ $\multipaired$ & $\internalfull[\cdot][\cdot]$ & $\helixstacking((\cdot,\cdot),(\cdot,\cdot))$ \\
+ \end{tabular}
+ \end{center}
+ \caption{List of all potentials used in the CONTRAfold model.}
+ \label{fig:potentials}
+ \end{figure}
+
+ \section{Basic feature set}
+ \label{sec:featurecounts}
+
+ In this section, we define the feature counts $\set{F_i(x,y)}$ for a sequence
+ $x$ and a structure $y$. One way to do this is to give, for each potential $\phi_i$ shown
+ in Figure~\ref{fig:potentials}, a formula \emph{explicitly} specifying how to compute the corresponding
+ feature $F_i(x,y)$.
+
+ Here, we will instead define feature counts \emph{implicitly} by
+ \begin{enumerate}
+ \item decomposing a secondary structure $y$ into four fundamental
+ types of substructures: hairpins, single-branched loops, helices, and multi-branched loops;
+ \item defining a \emph{factor}\footnote{
+ To be clear, a \emph{factor} is simply a collection of potentials that are associated with
+ the presence of a particular secondary structure subunit in a structure $y$. For example,
+ the factor associated with a hairpin loop is simply the product of the parameter potentials which
+ are involved in ``scoring'' the hairpin loop.
+ } for each type of substructure as a product of potentials from Figure~\ref{fig:potentials};
+ \item defining the product $\prod_{i=1}^n \phi_i^{F_i(x,y)}$ as a product of factors for
+ each substructure in $y$.
+ \end{enumerate}
+ By specifying which potentials are included in the computation of the factor for each type of
+ substructure, we thus define the feature counts $\set{F_i(x,y)}$ implicitly as the \emph{number of times
+ each potential $\phi_i$ is used in the product of factors for a structure $y$}.
+
+ \subsection{Hairpins}
+
+ A hairpin is a loop with only one adjacent base pair, known as
+ its \emph{closing base pair}. For $1 \le i \le j < L$, we say that a
+ hairpin spans positions $i$ to $j$ if
+ $x_i$ and $x_{j+1}$ form the closing base pair (see Figure~\ref{fig:hairpin}).
+ For hairpins, energy-based secondary structure folding algorithms such as
+ Mfold assign free energy increments for each of the following:
+ \begin{itemize-compact}
+ \item energies corresponding to the length of the loop (i.e., a hairpin
+ spanning positions $i$ to $j$ has length $j-i$),
+ \item terminal mismatch stacking energies as a function of the closing base
+ pair $(x_i,x_{j+1})$ and the first unpaired nucleotides in the loop,
+ $x_{i+1}$ and $x_j$,
+ \item bonus free energies for loops containing specific nucleotide sequences, and
+ \item other special cases.
+ \end{itemize-compact}
+ CONTRAfold uses a simplified scoring model for hairpins which ignores
+ the latter two cases. In particular, the factor $\vhairpin(i,j)$ of a hairpin
+ spanning positions $i$ to $j$ is
+ \begin{align}
+ &\vhairpin(i,j) = {} \nonumber \\
+ &\qquad\mismatch((x_i,x_{j+1}),x_{i+1},x_j) \nonumber \\
+ &\qquad{}\cdot\begin{cases}
+ \hairpinlength[j-i] & \text{if $0 \le j-i \le 30$} \\
+ \hairpinbase \cdot \left(\hairpinextend\right)^{\ln(j-i)} & \text{if $j-i > 30$}.
+ \end{cases}
+ \end{align}
+ In the above expression, the first term accounts for
+ terminal mismatches arising from the fact that $(x_i,x_{j+1})$ are paired, but
+ $x_{i+1}$ and $x_j$ are not.\footnote{
+ Here, note that the order of the arguments is important so as to ensure that the
+ parameters are invariant with respect to the orientation of the substructure.
+ For example, we expect the parameter for $\A\G$ stacking on top of $\C\U$ to be
+ the same as the parameter for $\U\C$ stacking on top of $\G\A$.
+ } The second term
+ scores the hairpin based on its length.
+ For loops under size 30, potentials are read directly from a table.
+ For longer loops, the factor above directly imitates
+ typical energy-based scoring schemes, which estimate the free energy increment of
+ a loop of length $j-i$ as
+ \begin{align}
+ a + b \cdot \ln (j-i),
+ \end{align}
+ for fixed constants $a$ and $b$. By analogy, we have
+ \begin{align}
+ &\hairpinbase \cdot \left(\hairpinextend\right)^{\ln(j-i)} \nonumber \\
+ &\qquad{} = \exp(\ln(\hairpinbase) + \ln(\hairpinextend) \cdot \ln(j-i)) \\
+ &\qquad{} = \exp(a' + b' \cdot \ln(j-i))
+ \end{align}
+ where
+ \begin{align}
+ a' &= \ln(\hairpinbase) \\
+ b' &= \ln(\hairpinextend).
+ \end{align}
+
+ \begin{figure}[t]
+ \centering
+ \vskip 0.5cm
+ \rput{0}(-2.0,-1){\texttt{.}}
+ \rput{0}(-1.8,-1){\texttt{A}}
+ \rput{0}(-1.6,-1){\texttt{.}}
+ \rput{0}(-1.4,-1){\texttt{G}}
+ \rput{0}(-1.2,-1){\texttt{.}}
+ \rput{0}(-1.0,-1){\texttt{A}}
+ \rput{0}(-1.8,-1.4){\texttt{|}}
+ \rput{0}(-1.4,-1.4){\texttt{|}}
+ \rput{0}(-1.0,-1.4){\texttt{|}}
+ \rput{0}(-2.0,-1.8){\texttt{.}}
+ \rput{0}(-1.8,-1.8){\texttt{U}}
+ \rput{0}(-1.6,-1.8){\texttt{.}}
+ \rput{0}(-1.4,-1.8){\texttt{C}}
+ \rput{0}(-1.2,-1.8){\texttt{.}}
+ \rput{0}(-1.0,-1.8){\texttt{U}}
+ \rput{0}(-0.8,-0.8){\texttt{.}}
+ \rput{0}(-0.8,-2.0){\texttt{.}}
+ \rput{0}(-0.6,-0.7){\texttt{A}}
+ \rput{0}(-0.6,-2.1){\texttt{G}}
+ \rput{0}(-0.4,-0.7){\texttt{.}}
+ \rput{0}(-0.4,-2.2){\texttt{.}}
+ \rput{0}(-0.2,-0.7){\texttt{A}}
+ \rput{0}(-0.2,-2.1){\texttt{U}}
+ \rput{0}(-0.0,-0.8){\texttt{.}}
+ \rput{0}(-0.0,-2.0){\texttt{.}}
+ \rput{0}(0.2,-1.0){\texttt{G}}
+ \rput{0}(0.2,-1.8){\texttt{A}}
+ \rput{0}(0.3,-1.4){\texttt{.}}
+ \psline{->}(-2.5,0)(-1.2,-0.8)
+ \psline{->}(-2.5,-2.8)(-1.2,-2.0)
+ \psline{->}(-0.8, 0.25)(-0.8,-0.65)
+ \psline{->}(-0.8,-3.05)(-0.8,-2.15)
+ \rput{0}(-3,0.25){nucleotide $i$}
+ \rput{0}(-3,-3.05){nucleotide $j+1$}
+ \rput{0}(-0.8,0.5){position $i$}
+ \rput{0}(-0.8,-3.3){position $j$}
+ \vskip 3.5cm
+ \caption{A hairpin loop of length 6 spanning positions $i$ to $j$.}
+ \label{fig:hairpin}
+ \end{figure}
+
+ \subsection{Single-branched loops}
+
+ \begin{figure}[t]
+ \centering
+ \vskip 0.5cm
+ \rput{0}(-2.0,-1){\texttt{.}}
+ \rput{0}(-1.8,-1){\texttt{A}}
+ \rput{0}(-1.6,-1){\texttt{.}}
+ \rput{0}(-1.4,-1){\texttt{G}}
+ \rput{0}(-1.2,-1){\texttt{.}}
+ \rput{0}(-1.0,-1){\texttt{A}}
+ \rput{0}(-1.8,-1.4){\texttt{|}}
+ \rput{0}(-1.4,-1.4){\texttt{|}}
+ \rput{0}(-1.0,-1.4){\texttt{|}}
+ \rput{0}(-2.0,-1.8){\texttt{.}}
+ \rput{0}(-1.8,-1.8){\texttt{U}}
+ \rput{0}(-1.6,-1.8){\texttt{.}}
+ \rput{0}(-1.4,-1.8){\texttt{C}}
+ \rput{0}(-1.2,-1.8){\texttt{.}}
+ \rput{0}(-1.0,-1.8){\texttt{U}}
+ \rput{0}(-0.8,-0.8){\texttt{.}}
+ \rput{0}(-0.7,-2.0){\texttt{.}}
+ \rput{0}(-0.6,-0.7){\texttt{A}}
+% \rput{0}(-0.6,-2.1){\texttt{G}}
+ \rput{0}(-0.4,-0.7){\texttt{.}}
+ \rput{0}(-0.4,-2.0){\texttt{G}}
+ \rput{0}(-0.2,-0.7){\texttt{A}}
+% \rput{0}(-0.2,-2.1){\texttt{U}}
+ \rput{0}(-0.0,-0.8){\texttt{.}}
+ \rput{0}(-0.1,-2.0){\texttt{.}}
+ \rput{0}(0.2,-1.4){\texttt{|}}
+ \rput{0}(0.6,-1.4){\texttt{|}}
+ \rput{0}(1.0,-1.4){\texttt{|}}
+ \rput{0}(0.2,-1.0){\texttt{G}}
+ \rput{0}(0.2,-1.8){\texttt{U}}
+ \rput{0}(0.4,-1.0){\texttt{.}}
+ \rput{0}(0.4,-1.8){\texttt{.}}
+ \rput{0}(0.6,-1.0){\texttt{G}}
+ \rput{0}(0.6,-1.8){\texttt{C}}
+ \rput{0}(0.8,-1.0){\texttt{.}}
+ \rput{0}(0.8,-1.8){\texttt{.}}
+ \rput{0}(1.0,-1.0){\texttt{U}}
+ \rput{0}(1.0,-1.8){\texttt{A}}
+ \rput{0}(1.2,-1.0){\texttt{.}}
+ \rput{0}(1.2,-1.8){\texttt{.}}
+ \rput{0}(1.3,-1.0){\texttt{.}}
+ \rput{0}(1.3,-1.8){\texttt{.}}
+ \rput{0}(1.4,-1.0){\texttt{.}}
+ \rput{0}(1.4,-1.8){\texttt{.}}
+ \psline{->}(-2.5,0)(-1.2,-0.8)
+ \psline{->}(-2.5,-2.8)(-1.2,-2.0)
+ \psline{->}(-1.4, 0.25)(-0.8,-0.65)
+ \psline{->}(-1.4,-3.05)(-0.8,-2.15)
+ \psline{->}(0.6, 0.25)(-0.0,-0.65)
+ \psline{->}(0.6,-3.05)(-0.0,-2.15)
+ \psline{->}(1.7,0)(0.4,-0.8)
+ \psline{->}(1.7,-2.8)(0.4,-2.0)
+ \rput{0}(-3,0.25){nucleotide $i$}
+ \rput{0}(-3,-3.05){nucleotide $j+1$}
+ \rput{0}(-1.4,0.7){position $i$}
+ \rput{0}(-1.4,-3.5){position $j$}
+ \rput{0}(0.8,0.7){position $i'$}
+ \rput{0}(0.8,-3.5){position $j'$}
+ \rput{0}(2.4,0.25){nucleotide $i'+1$}
+ \rput{0}(2.4,-3.05){nucleotide $j'$}
+ \vskip 3.5cm
+ \caption{A single-branched (internal) loop of lengths 2 and 1 spanning positions $i$ to $i'$ and $j'$ to $j$. Here, $\A$-$\U$ is the external closing base
+ pair and $\G$-$\U$ is the internal closing base pair.}
+ \label{fig:single}
+ \end{figure}
+
+ A single-branched loop is a loop which has two adjacent base pairs.
+ The outermost base pair is called the \emph{external closing base pair}
+ whereas the innermost base pair is called the
+ \emph{internal closing base pair}. Suppose $1 \le i \le i'$ and $j' \le j < L$.
+ We say that a single-branched loop spans positions $i$ to $i'$ and $j'$ to $j$ if
+ $x_i$ and $x_{j+1}$ form the external closing base pair and
+ $x_{i'+1}$ and $x_{j'}$ form the internal closing base pair. To ensure
+ that the internal closing base pair is well-defined, we require that $i'+2 \le j'$
+ (see Figure~\ref{fig:single}).
+
+ A single-branched loop for which $i'=i$ and $j=j'$ is called a \emph{stacking pair}.
+ A single-branched loop for which either $i'=i$ or $j=j'$ (but not both) is called a \emph{bulge}.
+ Finally, a single-branched loop for which both $i'>i$ and $j>j'$ is called an $\ell_1 \times \ell_2$ \emph{internal loop},
+ where $\ell_1 = i'-i$ and $\ell_2 = j-j'$.
+ For now, we will treat the problem of only scoring bulges and internal loops; we
+ consider the scoring of stacking pairs separately in the next section.
+
+ Energy-based scoring methods typically score internal loops and bulges by accounting for the
+ following:
+ \begin{itemize-compact}
+ \item energies based on the total loop length, $\ell_1 + \ell_2$,
+ \item energies based on the asymmetry in sizes of each side of the loop, $|\ell_1 - \ell_2|$,
+ \item special corrections for highly asymmetric $1 \times \ell$ (or $\ell \times 1$) loops
+ \item terminal mismatch stacking energies for the external closing base
+ pair $(x_i,x_{j+1})$ and its adjacent nucleotides in the loop,
+ $x_{i+1}$ and $x_j$,
+ \item terminal mismatch stacking energies for the internal closing base
+ pair $(x_{j'},x_{i'+1})$ and its adjacent nucleotides in the loop,
+ $x_{j'+1}$ and $x_{i'}$, and
+ \item specific free energy increments for $1 \times 1$, $1 \times 2$, and $2 \times 2$ interior loops
+ as a function of their closing base pairs and the nucleotides in the loop.
+ \end{itemize-compact}
+ For computational tractability, many programs such as Mfold limit total loop lengths of
+ single-branched loops to a small constant $c$ (typically, $c=30$).
+
+ In CONTRAfold, the total loop length, loop asymmetry, and terminal mismatch stacking interaction
+ terms are retained. The special corrections for asymmetric interior loops are replaced with a more general
+ two-dimensional table for scoring $\ell_1 \times \ell_2$ interior loops.
+ Finally, the large lookup tables which exhaustively characterize the energies of
+ all $1 \times 1$, $1 \times 2$, and $2 \times 2$ interior loops are omitted.
+
+ Specifically, for all $1 \le i \le i'$ and $j' \le j \le L-1$ such that $i'+2\le j'$
+ and $1 \le i'-i+j-j' \le c$, the factor $\single(i,j,i',j')$ for a bulge or internal loop
+ is given by
+ \begin{align}
+ &\single(i,j,i',j') = \nonumber \\
+ &\qquad
+ \begin{cases}
+ \bulgelength[i'-i+j-j'] & \text{if $i'-i=0$ or $j-j'=0$} \\
+ \internallength[i'-i+j-j'] & \text{if $i'>i$ and $j>j'$} \\
+ \qquad{} \cdot \internalasymmetry[|(i'-i)-(j-j')|] \\
+ \qquad{} \cdot \internalfull[i'-i][j-j']
+ \end{cases} \nonumber \\
+ &\qquad {} \cdot \mismatch((x_i,x_{j+1}),x_{i+1},x_j) \nonumber \\
+ &\qquad {} \cdot \mismatch((x_{j'},x_{i'+1}),x_{j'+1},x_{i'}).
+ \end{align}
+ Like most energy-based methods, we use $c=30$ for computational tractability.
+
+ \subsection{Helices}
+
+ \begin{figure}[t]
+ \centering
+ \vskip 0.5cm
+ \rput{0}(-2.0,-1){\texttt{.}}
+ \rput{0}(-1.8,-1){\texttt{A}}
+ \rput{0}(-1.6,-1){\texttt{.}}
+ \rput{0}(-1.4,-1){\texttt{G}}
+ \rput{0}(-1.2,-1){\texttt{.}}
+ \rput{0}(-1.0,-1){\texttt{A}}
+ \rput{0}(-0.8,-1){\texttt{.}}
+ \rput{0}(-0.6,-1){\texttt{C}}
+ \rput{0}(-0.4,-1){\texttt{.}}
+ \rput{0}(-0.2,-1){\texttt{U}}
+ \rput{0}(-0.0,-1){\texttt{.}}
+ \rput{0}(0.2,-1){\texttt{G}}
+ \rput{0}(-2.0,-1.8){\texttt{.}}
+ \rput{0}(-1.8,-1.8){\texttt{U}}
+ \rput{0}(-1.6,-1.8){\texttt{.}}
+ \rput{0}(-1.4,-1.8){\texttt{C}}
+ \rput{0}(-1.2,-1.8){\texttt{.}}
+ \rput{0}(-1.0,-1.8){\texttt{U}}
+ \rput{0}(-0.8,-1.8){\texttt{.}}
+ \rput{0}(-0.6,-1.8){\texttt{G}}
+ \rput{0}(-0.4,-1.8){\texttt{.}}
+ \rput{0}(-0.2,-1.8){\texttt{A}}
+ \rput{0}(-0.0,-1.8){\texttt{.}}
+ \rput{0}(0.2,-1.8){\texttt{C}}
+ \rput{0}(-1.8,-1.4){\texttt{|}}
+ \rput{0}(-1.4,-1.4){\texttt{|}}
+ \rput{0}(-1.0,-1.4){\texttt{|}}
+ \rput{0}(-0.6,-1.4){\texttt{|}}
+ \rput{0}(-0.2,-1.4){\texttt{|}}
+ \rput{0}(0.2,-1.4){\texttt{|}}
+ \rput{0}(0.4,-0.8){\texttt{.}}
+ \rput{0}(0.4,-2.0){\texttt{.}}
+ \rput{0}(0.6,-0.7){\texttt{A}}
+ \rput{0}(0.6,-2.1){\texttt{G}}
+ \rput{0}(0.8,-0.7){\texttt{.}}
+ \rput{0}(0.8,-2.2){\texttt{.}}
+ \rput{0}(0.9,-0.7){\texttt{.}}
+ \rput{0}(0.9,-2.2){\texttt{.}}
+ \rput{0}(1.0,-0.7){\texttt{.}}
+ \rput{0}(1.0,-2.2){\texttt{.}}
+ \psline{->}(-3.5,0)(-2.2,-0.8)
+ \psline{->}(-3.5,-2.8)(-2.2,-2.0)
+ \psline{->}(-1.8, 0.25)(-1.8,-0.65)
+ \psline{->}(-1.8,-3.05)(-1.8,-2.15)
+ \psline{->}(0.2, 0.25)(0.2,-0.65)
+ \psline{->}(0.2,-3.05)(0.2,-2.15)
+ \rput{0}(-4,0.25){position $i$}
+ \rput{0}(-4,-3.05){position $j$}
+ \rput{0}(-2,0.5){nucleotide $i+1$}
+ \rput{0}(-2,-3.3){nucleotide $j$}
+ \rput{0}(1,0.5){nucleotide $i+\ell$}
+ \rput{0}(1,-3.3){nucleotide $j-\ell+1$}
+ \vskip 3.5cm
+ \caption{A helix of length $\ell=6$ spanning positions $i$ to $j$.}
+ \label{fig:helices}
+ \end{figure}
+
+ A single-branched loop for which $i'=i$ and $j=j'$ is known as
+ a \emph{stacking pair}. A sequence of one or more consecutive
+ stacking pairs is called a \emph{helix} (or stem); informally
+ then, a helix consists of several consecutive nucleotides of an RNA molecule
+ directly base pairing to a set of consecutive nucleotides which appear later
+ in the RNA sequence.
+
+ Now, consider a helix that matches nucleotides $x_{i+1}x_{i+2}\ldots x_{i+\ell}$
+ in a sequence $x$ to nucleotides $x_{j-\ell+1}x_{j-\ell+2}\ldots x_{j}$ which appear
+ later in the sequence. We say that this is a
+ helix of length $\ell$ starting at positions $i$ and $j$. Nucleotides
+ $x_{i+1}$ and $x_j$ form the \emph{external closing base pair} of the helix
+ whereas nucleotides $x_{i+\ell}$ and $x_{j-\ell+1}$ form the
+ \emph{internal closing base pair} (see Figure~\ref{fig:helices}).
+
+ Traditional energy-based methods such as Mfold score helices using
+ \begin{itemize-compact}
+ \item a sum of interaction terms for each stacking pair, and
+ \item penalties for each non-GC terminal closing base pair.
+ \end{itemize-compact}
+ Since stacking pair interaction terms are based on the nearest neighbor model,
+ only Watson-Crick and wobble $\G\U$ base pairs are allowed; other pairings
+ are necessarily treated as small symmetric interior loops.
+
+ CONTRAfold extends on traditional energy-based methods by including penalties
+ for all possible closing base pairs (not just the ``canonical'' pairings). CONTRAfold
+ also considers the interaction of every pair of bases in the stem rather than ignoring
+ the non-canonical/non-$\G\U$ base pairs which are not found in the regular nearest
+ neighbor energy rules. Finally, CONTRAfold includes scores for helix lengths, allowing
+ arbitrary scores for helix lengths of at most $d$ (in practice, we set $d=5$), and assigning
+ affine scores for helices of length greater than $d$.
+
+ In particular, for $0 \le i \le i + 2\ell+2 \le j \le L$,
+ the factor $\helix(i,j,\ell)$ for a helix of length $\ell$ starting at
+ $i$ and $j$ is:
+ \begin{align}
+ &\helix(i,j,\ell) = {} \nonumber \\
+ &\qquad \helixclosing(x_{i+1},x_j) \nonumber \\
+ &\qquad{} \cdot \helixclosing(x_{j-\ell+1},x_{i+\ell}) \nonumber \\
+ &\qquad{} \cdot \prod_{k=1}^{\ell}
+ \helixbasepair(x_{i+k},x_{j-k+1}) \nonumber \\
+ &\qquad{} \cdot \prod_{k=1}^{\ell-1}
+ \helixstacking((x_{i+k},x_{j-k+1}),(x_{i+k+1},x_{j-k})) \nonumber \\
+ &\qquad{} \cdot \helixlength(\ell),
+ \end{align}
+ where
+ \begin{align}
+ \helixlength(\ell) =
+ \left(\prod_{i=1}^{\min(d,\ell)} \helixchange[i]\right) \cdot
+ \left(\helixextend\right)^{\max(\ell-d,0)}.
+ \end{align}
+ In this formulation, $\helixclosing(x_i,x_j)$ scores the
+ use of a particular base pair for closing a helix.
+ Similarly, $\helixstacking((x_i,x_j),(x_{i+1},x_{j-1}))$ scores the
+ interaction for stacking $(x_{i+1},x_{j-1})$ on top of $(x_i,x_j)$.
+ Finally, the helix length score $\helixlength(\ell)$ is designed so that
+ the length component of the score for any helix of length $\ell \le d$ is given explicitly as
+ \begin{align}
+ (\helixchange[1]) \cdot (\helixchange[2]) \cdot \ldots \cdot (\helixchange[\ell]),
+ \end{align}
+ and helices of length $\ell > d$ have a correction potential of
+ $\helixextend$ applied for each additional base pair.
+
+ \subsection{Multi-branched loops}
+
+ \begin{figure}[t]
+ \centering
+ \vskip 0.5cm
+ \rput{0}(-2.0,-1){\texttt{.}}
+ \rput{0}(-1.8,-1){\texttt{A}}
+ \rput{0}(-1.6,-1){\texttt{.}}
+ \rput{0}(-1.4,-1){\texttt{G}}
+ \rput{0}(-1.2,-1){\texttt{.}}
+ \rput{0}(-1.0,-1){\texttt{A}}
+ \rput{0}(-1.8,-1.4){\texttt{|}}
+ \rput{0}(-1.4,-1.4){\texttt{|}}
+ \rput{0}(-1.0,-1.4){\texttt{|}}
+ \rput{0}(-2.0,-1.8){\texttt{.}}
+ \rput{0}(-1.8,-1.8){\texttt{U}}
+ \rput{0}(-1.6,-1.8){\texttt{.}}
+ \rput{0}(-1.4,-1.8){\texttt{C}}
+ \rput{0}(-1.2,-1.8){\texttt{.}}
+ \rput{0}(-1.0,-1.8){\texttt{U}}
+%
+ \rput{0}(-0.85,-2.1){\texttt{.}}
+ \rput{0}(-0.65,-2.2){\texttt{A}}
+ \rput{0}(-0.4,-2.3){\texttt{.}}
+ \rput{0}(-0.15,-2.3){\texttt{A}}
+ \rput{90}(0.15,-2.3){\texttt{|}}
+ \rput{0}(0.45,-2.3){\texttt{A}}
+% \rput{0}(0.5,-2.35){\texttt{.}}
+ \rput{0}(0.7,-2.1){\texttt{.}}
+% \rput{0}(0.85,-2){\texttt{.}}
+%
+ \rput{0}(-0.85,-0.8){\texttt{.}}
+ \rput{0}(-0.7,-0.6){\texttt{A}}
+ \rput{0}(-0.5,-0.45){\texttt{.}}
+ \rput{0}(-0.3,-0.35){\texttt{A}}
+ \rput{90}(-0.1,-0.35){\texttt{.}}
+ \rput{0}(0.1,-0.35){\texttt{A}}
+ \rput{0}(0.3,-0.45){\texttt{.}}
+ \rput{0}(0.5,-0.6){\texttt{A}}
+ \rput{0}(0.65,-0.8){\texttt{.}}
+%
+ \rput{0}(2.0,-1){\texttt{.}}
+ \rput{0}(1.9,-1){\texttt{.}}
+ \rput{0}(1.8,-1){\texttt{.}}
+ \rput{0}(1.6,-1){\texttt{A}}
+ \rput{0}(1.4,-1){\texttt{.}}
+ \rput{0}(1.2,-1){\texttt{G}}
+ \rput{0}(1.0,-1){\texttt{.}}
+ \rput{0}(0.8,-1){\texttt{A}}
+ \rput{0}(1.6,-1.4){\texttt{|}}
+ \rput{0}(1.2,-1.4){\texttt{|}}
+ \rput{0}(0.8,-1.4){\texttt{|}}
+ \rput{0}(2.0,-1.8){\texttt{.}}
+ \rput{0}(1.9,-1.8){\texttt{.}}
+ \rput{0}(1.8,-1.8){\texttt{.}}
+ \rput{0}(1.6,-1.8){\texttt{U}}
+ \rput{0}(1.4,-1.8){\texttt{.}}
+ \rput{0}(1.2,-1.8){\texttt{C}}
+ \rput{0}(1.0,-1.8){\texttt{.}}
+ \rput{0}(0.8,-1.8){\texttt{U}}
+%
+ \rput{0}(-0.15,-2.55){\texttt{.}}
+ \rput{0}(0.45,-2.55){\texttt{.}}
+ \rput{0}(-0.15,-2.8){\texttt{A}}
+ \rput{90}(0.15,-2.8){\texttt{|}}
+ \rput{0}(0.45,-2.8){\texttt{A}}
+ \rput{0}(-0.15,-3.05){\texttt{.}}
+ \rput{0}(0.45,-3.05){\texttt{.}}
+ \rput{0}(-0.15,-3.3){\texttt{A}}
+ \rput{90}(0.15,-3.3){\texttt{|}}
+ \rput{0}(0.45,-3.3){\texttt{A}}
+ \rput{0}(-0.15,-3.55){\texttt{.}}
+ \rput{0}(0.45,-3.55){\texttt{.}}
+ \rput{0}(-0.15,-3.65){\texttt{.}}
+ \rput{0}(0.45,-3.65){\texttt{.}}
+ \rput{0}(-0.15,-3.75){\texttt{.}}
+ \rput{0}(0.45,-3.75){\texttt{.}}
+ \psline{->}(-2.3,-2.8)(-1.0,-2.15)
+ \psline{->}(-1.4, 0.25)(-0.9,-0.65)
+ \psline{->}(-1.1,-3.05)(-0.5,-2.4)
+ \psline{->}(1.6, 0.25)(0.8,-0.65)
+ \psline{->}(1.1,-3.05)(0.8,-2.25)
+ \psline{->}(2.2,-2.8)(0.85,-2.2)
+ \rput{0}(-3,-3.05){position $j$}
+ \rput{0}(-1.4,0.7){position $i$}
+ \rput{0}(-1.4,-3.5){position $j_2$}
+ \rput{0}(1.7,0.7){position $i_1$}
+ \rput{0}(1.7,-3.5){position $i_2$}
+ \rput{0}(3,-3.05){position $j_1$}
+ \vskip 4cm
+ \caption{A multi-branched loop spanning positions $i$ to $i_1$, $j_1$ to $i_2$, and $j_2$ to $j$.}
+ \label{fig:multi}
+ \end{figure}
+
+ A multi-branched loop is a loop containing at least three adjacent base pairs.
+ More formally, suppose $i \le i_1 \le j_1 \le i_2 \le j_2 \le \ldots \le i_m \le j_m \le j$
+ where $m \ge 2$ and $i_k + 2 \le j_k$ for $k=1,\ldots,m$. We say that a multibranch
+ loop spans positions $i$ to $i_1$, $j_1$ to $i_2$, \ldots, and $j_m$ to $j$ if
+ nucleotides $(x_i,x_{j+1})$ form the external closing base pair, and $(x_{j_k},x_{i_k+1})$
+ form the internal closing base pairs for $k=1,\ldots,m$ (see Figure~\ref{fig:multi}).
+
+ Let the length $\ell$ of a multi-branched loop be the number of unpaired bases,
+ \begin{align}
+ \ell = i_1-i + j-j_m + \sum_{k=2}^{m} (i_k-j_{k-1}).
+ \end{align}
+ For computational tractability, most programs score multi-branched loops using
+ \begin{itemize-compact}
+ \item energy terms dependent on the length of the loop.
+ \item single base pair stacking energies describing the attachment of each helix to the multi-branched loop,
+ \item coaxial stacking terms for helices on the multi-branched loop that are separated by at most one unpaired position
+ \end{itemize-compact}
+
+ CONTRAfold uses a similar scoring scheme for multi-branched loops which ignores coaxial
+ stacking. Specifically, if $1 \le i \le i_1 \le i_1 + 2 \le j_1 \le i_2 \le \ldots \le j \le L-1$,
+ then the factor associated with a multi-branched loop spanning
+ positions $i$ to $i_1$, $j_1$ to $i_2$, \ldots, and $j_m$ to $j$ is
+ \begin{align}
+ &\multi(i,j,i_1,j_1,\ldots,i_m,j_m) = {} \nonumber \\
+ &\qquad\multibase \cdot \left(\multiunpaired\right)^\ell \cdot \left(\multipaired\right)^{m+1} \nonumber \\
+ &\qquad{} \cdot \multimismatch((x_i,x_{j+1}),x_{i+1},x_j) \nonumber \\
+ &\qquad{} \cdot \prod_{k=1}^m \multimismatch((x_{j_k},x_{i_k+1}),x_{j_k+1},x_{i_k}).
+ \end{align}
+ where
+ \begin{align}
+ &\multimismatch((x_i,x_{j+1}),x_{i+1},x_j) = {} \nonumber \\
+ &\qquad\sbpstackingleft((x_i,x_{j+1}),x_{i+1}) \cdot \sbpstackingright((x_i,x_{j+1}),x_j)
+ \end{align}
+ This mirrors the affine energy models typically used for multi-branched loops
+ in energy-based methods.
+
+ \newpage
+ \section{The Viterbi algorithm}
+
+ We now specify the Viterbi algorithm for computing the most likely structure via
+ dynamic programming recurrences. Let $c$ be the maximum length of an internal loop or
+ bulge.
+
+ \subsection{Definitions}
+
+ We define the following factors:
+
+ \begin{itemize}
+ \item $\doouter(i)$, $0 \le i \le L$: the best possible score for folding the
+ substring $x_{i+1} x_{i+2} \cdots x_L$, assuming that the ends of this substring
+ belong to the exterior loop of the RNA.
+ \item $\dohelix(i,j,n)$, $0 \le i \le j \le L$
+ \begin{itemize}
+ \item $0 \le n < d$: the best possible score for folding the substring
+ $x_{i+1} x_{i+2} \cdots x_j$, assuming that exactly $n$ letters on each side of the substring
+ are paired in a helix -- i.e., $(x_i,x_{j+1}), (x_{i-1},x_{j+2}), \ldots, (x_{i-n+1},x_{j+n})$ all form
+ base pairs, but $x_{i-n}$ and $x_{j+n+1}$ do not base pair.
+ \item $n=d$: the best possible score for folding the substring
+ $x_{i+1} x_{i+2} \cdots x_j$, assuming that at least $d$ letters on each side of the substring
+ are paired in a helix -- i.e., $(x_i,x_{j+1}), (x_{i-1},x_{j+2}), \ldots, (x_{i-d+1},x_{j+d})$ all form
+ base pairs, and possibly more.
+ \end{itemize}
+ \item $\domulti(i,j,n)$, $0 \le i \le j \le L$
+ \begin{itemize}
+ \item $0 \le n < 2$: the best possible score for folding the substring
+ $x_{i+1} x_{i+2} \cdots x_j$, assuming that the substring is part of a multibranch loop
+ that contains exactly contains $n$ adjacent helices besides the exterior helix.
+ \item $n = 2$: the best possible score for folding the substring
+ $x_{i+1} x_{i+2} \cdots x_j$, assuming that the substring is part of a multibranch loop
+ that contains exactly at least 2 adjacent helices besides the exterior helix.
+ \end{itemize}
+ \end{itemize}
+
+ \subsection{Recurrences}
+
+ For each of the factors described in the previous subsection, we now give the appropriate
+ recurrence along with a description of the cases handled by the recurrence.
+
+ \subsubsection{Exterior loop}
+
+ When generating a substring belonging to the exterior loop, there are three cases:
+ \begin{enumerate}
+ \item the substring is of zero length,
+ \item the first base of the substring belongs to the exterior loop,
+ \item the first base belongs to a helix that is adjacent to the exterior loop.
+ \end{enumerate}
+ This gives:
+ \begin{align*}
+ \doouter(i) &= \max \begin{cases}
+ 1 & \text{if $i=L$} \\
+ \outerunpaired \cdot \doouter(i+1) & \text{if $0 \le i < L$} \\
+ \displaystyle \max_{\substack{i' \\ i+2 \le i' \le L}} \left(\outerbranch \cdot \dohelix(i,i',0) \cdot \doouter(i')\right)
+ & \text{if $0 \le i \le L$}.
+ \end{cases}
+ \end{align*}
+ Note that in the last case, we require that $i+2 \le i'$ so as to ensure that
+ the definition of $\doouter(i)$ is not circular (actually, it would suffice to
+ require that $i < i'$; however, the requirement we make here works as well since
+ a helix must contain at least two base pairs).
+
+ \subsubsection{Helix}
+
+ To generate a helix for the substring $x_{i+1} x_{i+2} \cdots x_j$, there are several cases:
+ \begin{enumerate}
+ \item no surrounding positions belong to the helix yet and $(x_{i+1},x_j)$ base pair,
+ \item $n$ surrounding positions belong to the helix (where $0 < n < d$) and $(x_{i+1},x_j)$ base pair,
+ \item at least $d$ surrounding positions belong to the helix and $(x_{i+1},x_j)$ base pair,
+ \item at least one surrounding position belongs to the helix and $x_{i+1} x_{i+2} \cdots x_j$ form a hairpin loop,
+ \item at least one surrounding position belongs to the helix and $x_{i+1} x_{i+2} \cdots x_j$ form the beginning of a single-branched loop,
+ \item at least one surrounding position belongs to the helix and $x_{i+1} x_{i+2} \cdots x_j$ form the beginning of a multi-branched loop.
+ \end{enumerate}
+ This gives:
+ \begin{align*}
+ &\dohelix(i,j,n) = \\
+ &\max \begin{cases}
+ \helixchange[1] \cdot \helixclosing (x_{i+1}, x_j) & \text{if $0\le i < i+2\le j \le L$ and $n=0$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \dohelix(i+1,j-1,1) \\
+ \helixchange[n+1] \cdot \helixstacking((x_{i},x_{j+1}),(x_{i+1},x_{j})) & \text{if $0 < i < i+2\le j < L$ and $0 < n < d$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \dohelix(i+1,j-1,n+1) \\
+ \helixextend \cdot \helixstacking((x_{i},x_{j+1}),(x_{i+1},x_{j})) & \text{if $0<i < i+2\le j<L$ and $n=d$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \dohelix(i+1,j-1,d) \\
+ \helixclosing (x_{j+1}, x_i) \cdot \doloop(i,j) & \text{if $0<i\le j<L$ and $n>0$} \\
+ \end{cases}
+ \end{align*}
+ Here, note that whenever a case depends on $(x_i,x_{j+1})$, we ensure that $0 < i$ and $j < L$. Also,
+ if a case depends on $x_{i+1}$ and $x_j$, we ensure that $i+2 \le j$.
+
+ \subsubsection{Loop}
+
+ To generate a loop for the substring $x_{i+1} x_{i+2} \cdots x_j$, there are several cases:
+ \begin{enumerate}
+ \item $x_{i+1} x_{i+2} \cdots x_j$ form a hairpin loop,
+ \item $x_{i+1} x_{i+2} \cdots x_j$ form the beginning of a single-branched loop,
+ \item $x_{i+1} x_{i+2} \cdots x_j$ form the beginning of a multi-branched loop.
+ \end{enumerate}
+ This gives:
+ \begin{align*}
+ &\doloop(i,j) = \\
+ &\max \begin{cases}
+ \vhairpin(i,j) & \text{if $0<i\le j<L$ and $n>0$} \\
+ \displaystyle \max_{\substack{i', j' \\ i \le i' < i'+2 \le j' \le j \\ 1 \le i'-i + j-j' \le c}}
+ \bigl(\single(i,j,i',j') \cdot \dohelix(i',j',0)\bigr) & \text{if $0<i\le j<L$ and $n>0$} \\
+ \multibase \cdot \multipaired & \text{if $0<i\le i+2 \le j<L$ and $n>0$.} \\
+ \qquad{} \cdot \multimismatch((x_i,x_{j+1}),x_{i+1},x_j) \cdot \domulti(i,j,0)
+ \end{cases}
+ \end{align*}
+ Note that in the case of single-branched
+ loops, $i'+2 \le j'$ since the inner helix must have at least one base pairing, and $1 \le i'-i+j-j' \le c$ to
+ ensure that the loop has length at least 1, but no more than $c$ (for efficiency).
+
+ \subsubsection{Multi-branched loops}
+
+ To generate a multi-branched loop for the substring $x_{i+1} x_{i+2} \cdots x_j$, there are several cases:
+ \begin{enumerate}
+ \item the substring is of length zero and has at least 2 adjacent helices (other than the exterior helix),
+ \item the first letter of the substring is unpaired,
+ \item the first letter of the substring belongs to a helix that is adjacent to the multi-branch loop and fewer than
+ 2 adjacent helices (other than the exterior helix) have been generated already.
+ \item the first letter of the substring belongs to a helix that is adjacent to the multi-branch loop and at least
+ 2 adjacent helices (other than the exterior helix) have been generated already.
+ \end{enumerate}
+ From this, we obtain
+ \begin{align*}
+ &\domulti(i,j,n) = \\
+ &\max \begin{cases}
+ 1 & \text{if $0 \le i=j \le L$ and $n=2$} \\
+ \multiunpaired \cdot \domulti(i+1,j,n) & \text{if $0 \le i < j \le L$ and $0 \le n \le 2$} \\
+ \displaystyle \max_{\substack{j' \\ i+2 \le j' \le j}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \multimismatch((x_{j'},x_{i+1}),x_{j'+1},x_i) \\
+ {} \cdot \dohelix(i,j',0) \cdot \domulti(j',j,\min(2,n+1))
+ \end{matrix}
+ \right) & \text{if $0 < i \le j < L$ and $0 \le n \le 2$} \\
+ \end{cases}
+ \end{align*}
+ As before, in the last case, the condition $i+2 \le j'$ ensures that $x_{j'}$ and $x_{i+1}$
+ are valid, and the conditions $0 < i$ and $j < L$ ensure that $x_{j'+1}$ and $x_i$ are valid.
+
+ \newpage
+ \section{The inside algorithm}
+ \label{sec:inside}
+
+ The inside algorithm looks just like Viterbi, with $\max$'s replaced by $\sum$'s. We repeat
+ these recurrences here, for convenience: \\
+ \\
+ \noindent
+ For $0 \le i \le L$,
+ \begin{align*}
+ \Adoouter(i) &= \sum \begin{cases}
+ 1 & \text{if $i=L$} \\
+ \outerunpaired \cdot \Adoouter(i+1) & \text{if $0 \le i < L$} \\
+ \displaystyle \sum_{\substack{i' \\ i+2 \le i' \le L}} \left(\outerbranch \cdot \Adohelix(i,i',0) \cdot \Adoouter(i')\right)
+ & \text{if $0 \le i \le L$}
+ \end{cases}
+ \end{align*}
+
+ \noindent
+ For $0 \le n \le d$ and $0 \le i \le j \le L$,
+ \begin{align*}
+ &\Adohelix(i,j,n) = \\
+ &\sum \begin{cases}
+ \helixchange[1] \cdot \helixclosing (x_{i+1}, x_j) & \text{if $0 \le i < i+2\le j \le L$ and $n=0$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \Adohelix(i+1,j-1,1) \\
+ \helixchange[n+1] \cdot \helixstacking((x_{i},x_{j+1}),(x_{i+1},x_{j})) & \text{if $0 < i < i+2\le j < L$ and $0 < n < d$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \Adohelix(i+1,j-1,n+1) \\
+ \helixextend \cdot \helixstacking((x_{i},x_{j+1}),(x_{i+1},x_{j})) & \text{if $0<i<i+2\le j<L$ and $n=d$} \\
+ \qquad{} \cdot \helixbasepair (x_{i+1}, x_j) \cdot \Adohelix(i+1,j-1,d) \\
+ \helixclosing (x_{j+1}, x_i) \cdot \Adoloop(i,j) & \text{if $0<i\le j<L$ and $n>0$}
+ \end{cases}
+ \end{align*}
+
+ \noindent
+ For $0 \le i \le j \le L$,
+ \begin{align*}
+ &\Adoloop(i,j) = \\
+ &\sum \begin{cases}
+ \vhairpin(i,j) & \text{if $0<i\le j<L$ and $n>0$} \\
+ \displaystyle \sum_{\substack{i', j' \\ i \le i' < i'+2 \le j' \le j \\ 1 \le i'-i + j-j' \le c}}
+ \bigl(\single(i,j,i',j') \cdot \Adohelix(i',j',0)\bigr) & \text{if $0<i\le j<L$ and $n>0$} \\
+ \multibase \cdot \multipaired & \text{if $0<i\le i+2 \le j<L$ and $n>0$.} \\
+ \qquad{} \cdot \multimismatch((x_i,x_{j+1}),x_{i+1},x_j) \cdot \Adomulti(i,j,0)
+ \end{cases}
+ \end{align*}
+
+ \noindent
+ For $0 \le n \le 2$ and $0 \le i \le j \le L$,
+ \begin{align*}
+ &\Adomulti(i,j,n) = \\
+ &\sum \begin{cases}
+ 1 & \text{if $0 \le i=j \le L$ and $n=2$} \\
+ \multiunpaired \cdot \Adomulti(i+1,j,n) & \text{if $0 \le i < j \le L$ and $0 \le n \le 2$} \\
+ \displaystyle \sum_{\substack{j' \\ i+2 \le j' \le j}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \multimismatch((x_{j'},x_{i+1}),x_{j'+1},x_i) \\
+ {} \cdot \Adohelix(i,j',0) \cdot \Adomulti(j',j,\min(2,n+1))
+ \end{matrix}
+ \right) & \text{if $0 < i \le j < L$ and $0 \le n \le 2$} \\
+ \end{cases}
+ \end{align*}
+
+ \newpage
+ \section{The outside algorithm}
+ \label{sec:outside}
+
+ The outside algorithm corresponding to the inside algorithm given in the previous section
+ is shown below: \\
+ \\
+ \noindent
+ For $0 \le i \le L$,
+ \begin{align*}
+ \Bdoouter(i) &= \sum \begin{cases}
+ 1 & \text{if $i=0$} \\
+ \outerunpaired \cdot \Bdoouter(i-1) & \text{if $i > 0$} \\
+ \displaystyle \sum_{\substack{i' \\ 0 \le i' \le i'+2 \le i}} \left(\outerbranch \cdot \Adohelix(i',i,0) \cdot \Bdoouter(i')\right)
+ \end{cases}
+ \end{align*}
+
+ \noindent
+ For $0 \le n \le d$ and $0 \le i \le j \le L$,
+ \begin{align*}
+ &\Bdohelix(i,j,n) = \\
+ &\sum \begin{cases}
+ \outerbranch \cdot \Bdoouter(i) \cdot \Adoouter(j) & \text{if $0 \le i < i+2 \le j \le L$ and $n=0$} \\
+ \displaystyle \sum_{\substack{i', j' \\ 0 < i' \le i < j \le j' < L\\ 1' \le i-i' + j'-j \le c}}
+ \left(
+ \begin{matrix}
+ \single(i',j',i,j) \cdot \Bdoloop(i',j')
+ \end{matrix}
+ \right) & \text{if $0 < i < i+2 \le j < L$ and $n = 0$} \\
+ \displaystyle \sum_{n'=0}^1 \sum_{\substack{j' \\ j \le j' < L}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \Bdomulti(i,j',n') \\
+ {} \cdot \multimismatch((x_j,x_{i+1}),x_{j+1},x_i) \\
+ {} \cdot \Adomulti(j,j',n'+1)
+ \end{matrix}
+ \right) & \text{if $0 < i \le j < L$ and $n = 0$} \\
+ \displaystyle \sum_{\substack{j' \\ j \le j' < L}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \Bdomulti(i,j',2) \\
+ {} \cdot \multimismatch((x_j,x_{i+1}),x_{j+1},x_i) \\
+ {} \cdot \Adomulti(j,j',2)
+ \end{matrix}
+ \right) & \text{if $0 < i \le j < L$ and $n = 0$} \\
+ \helixchange[1] \cdot \helixclosing (x_i, x_{j+1}) & \text{if $0 < i \le j < L$, and $n=1$} \\
+ \qquad{} \cdot \helixbasepair (x_i, x_{j+1}) \cdot \Bdohelix(i-1,j+1,0) \\
+ \helixchange[n] \cdot \helixstacking((x_{i-1},x_{j+2}),(x_{i},x_{j+1})) & \text{if $1 < i \le j < L-1$, and $1 < n \le d$} \\
+ \qquad{} \cdot \helixbasepair (x_i, x_{j+1}) \cdot \Bdohelix(i-1,j+1,n-1) \\
+ \helixextend \cdot \helixstacking((x_{i-1},x_{j+2}),(x_i,x_{j+1})) & \text{if $1 < i \le j < L-1$ and $n=d$} \\
+ \qquad{} \cdot \helixbasepair (x_i, x_{j+1}) \cdot \Bdohelix(i-1,j+1,d)
+ \end{cases}
+ \end{align*}
+
+ \noindent
+ For $0 \le i \le j \le L$,
+ \begin{align*}
+ \Bdoloop(i,j) = \sum_{n'=1}^d \helixclosing(x_{j+1},x_i) \cdot \Bdohelix(i,j,n') \qquad \text{if $0 < i \le j < L$ and $n > 0$}
+ \end{align*}
+
+ \noindent
+ For $0 \le n \le 2$ and $0 \le i \le j \le L$,
+ \begin{align*}
+ &\Bdomulti(i,j,n) = \\
+ &\sum \begin{cases}
+ \multibase \cdot \multipaired & \text{if $0<i < i+2 \le j<L$ and $n=0$}\\
+ \qquad {} \cdot \multimismatch((x_i,x_{j+1}),x_{i+1},x_j) \cdot \Bdoloop(i,j) \\
+ \multiunpaired \cdot \Bdomulti(i-1,j,n) & \text{if $0 < i \le j \le L$ and $0 \le n \le 2$} \\
+ \displaystyle \sum_{\substack{i' \\ 1 \le i' < i'+2 \le i}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \multimismatch((x_i,x_{i'+1}),x_{i+1},x_{i'}) \\
+ {} \cdot \Adohelix(i',i,0) \cdot \Bdomulti(i',j,n-1)
+ \end{matrix}
+ \right) & \text{if $2 < i \le j < L$ and $1 \le n \le 2$} \\
+ \displaystyle \sum_{\substack{i' \\ 1 \le i' < i'+2 \le i}}
+ \left(
+ \begin{matrix}
+ \multipaired \cdot \multimismatch((x_i,x_{i'+1}),x_{i+1},x_{i'}) \\
+ {} \cdot \Adohelix(i',i,0) \cdot \Bdomulti(i',j,2)
+ \end{matrix}
+ \right) & \text{if $2 < i \le j < L$ and $n=2$.}
+ \end{cases}
+ \end{align*}
+
+ \newpage
+ \section{Posterior decoding}
+
+ Given the inside and outside matrices computed in the previous sections, we can now compute the
+ posterior probabilities for paired and unpaired residues. Specifically, the posterior probability
+ $p_{ij}$ that nucleotide $i$ pairs with nucleotide $j$ (where $1 \le i < j \le L$) is given by
+ \begin{align}
+ p_{ij} = \frac{1}{Z(x)} \cdot \sum \begin{cases}
+ \helixchange[1] \cdot \helixclosing(x_i,x_j) & \text{if $1 \le i < j \le L$ and $n=0$}\\
+ \qquad{} \cdot \helixbasepair(x_i,x_j) \cdot \Adohelix(i,j-1,1) \\
+ \qquad{} \cdot \Bdohelix(i-1,j,0) \\
+ \displaystyle\sum_{n=2}^{d} \left(
+ \begin{matrix}
+ \helixchange[n] \cdot \helixstacking((x_{i-1},x_{j+1}),(x_{i},x_j)) \\
+ {} \cdot \helixbasepair (x_{i}, x_j) \cdot \Adohelix(i,j-1,n) \\
+ {} \cdot \Bdohelix(i-1,j,n-1)
+ \end{matrix}
+ \right) & \text{if $1 < i < j < L$} \\
+ \helixextend \cdot \helixstacking((x_{i-1},x_{j+1}),(x_{i},x_{j})) & \text{if $1<i<j<L$} \\
+ \qquad{} \cdot \helixbasepair (x_{i}, x_j) \cdot \Adohelix(i,j-1,d) \\
+ \qquad{} \cdot \Bdohelix(i-1,j,d) \\
+ \end{cases}
+ \end{align}
+ where
+ \begin{align}
+ Z(x) = \Adoouter(0) = \Bdoouter(L).
+ \end{align}
+ Using these posterior probabilities, the posterior decoding algorithm described in the
+ full paper can be used to find the maximum expected accuracy parse.
+
+ \newpage
+ \section{Gradient}
+
+ The gradient of the CONTRAfold conditional log-likelihood objective with respect to the
+ parameters $\bw$ is
+ \begin{align*}
+ \nabla_\bw \ell(\bw : \cD) = \sum_{i=1}^m \left(\bF(x\at{i},y\at{i}) - \bbE_{y' \sim P(y \mid x\at{i}; \bw)} [\bF(x\at{i}, y')]\right),
+ \end{align*}
+ where the expectation is taken with respect to the conditional distribution over structures $y'$ for the sequence $x\at{i}$ given by the
+ current parameters $\bw$. We now describe the construction of a dynamic programming algorithm for computing the expectation
+ $\bbE_{y' \sim P(y \mid x\at{i}; \bw)} [\bF(x\at{i}, y')]$ based on modifying an implementation of the inside
+ recurrences from Section~\ref{sec:inside}.
+
+ First, initialize a vector $\bz \in \reals^n$ to the zero vector. In a typical implementation of the inside algorithm,
+ computing entries of inside table involves repetitions of statements of the form
+ \begin{align*}
+ \alpha_{a}(i,j) \leftarrow \alpha_{a}(i,j) + (\text{product of some $\phi$'s}) \cdot (\text{product of some $\alpha_{a'}(i',j')$'s}).
+ \end{align*}
+ We will replace each such statement with several statements---one for each $\phi_k$ appearing in the product above.
+ Specifically, for each $\phi_k$ in the product, we will create a statement of the form
+ \begin{align*}
+ z_k \leftarrow z_k + \frac{\beta_{a}(i,j) \cdot (\text{product of some $\phi$'s}) \cdot (\text{product of some $\alpha_{a'}(i',j')$'s})}{Z(x)}
+ \end{align*}
+ where $Z(x) = \Adoouter(0)$. At the end of this modified inside algorithm, then, the vector $\bz$ will contain the desired feature
+ expectations.
+
+ For example, applying the transformation to the rules for the $\Adoouter$ recurrence gives the following:
+ \vskip 0.5cm
+ \begin{algorithm}[H]
+ \SetNoline
+ \SetKw{KwAnd}{and}
+ $\bz \leftarrow \zero$ \\
+ \For{$i \leftarrow 0$ \KwTo $L$}{
+ \If{$i<L$}{
+ $z_\text{outer unpaired} \leftarrow z_\text{outer unpaired} + \Bdoouter(i) \cdot \outerunpaired \cdot \Adoouter(i+1)$ \\
+ }
+ \For{$i' \leftarrow i+2$ \KwTo $L$}{
+ $z_\text{outer branch} \leftarrow z_\text{outer branch} + \Bdoouter(i) \cdot \outerbranch \cdot \Adohelix(i,i',0) \cdot \Adoouter(i')$ \\
+ }
+ }
+ \end{algorithm}
+
+\end{document}
diff --git a/src/BundleMethod.hpp b/src/BundleMethod.hpp
new file mode 100644
index 0000000..22dde59
--- /dev/null
+++ b/src/BundleMethod.hpp
@@ -0,0 +1,59 @@
+//////////////////////////////////////////////////////////////////////
+// SubgradientMethod.hpp
+//
+// This file contains an implementation of the bundle
+// optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+#ifndef BUNDLEMETHOD_HPP
+#define BUNDLEMETHOD_HPP
+
+#include <vector>
+#include "Utilities.hpp"
+
+#include "utilities/sml.hpp"
+#include "utilities/common.hpp"
+#include "utilities/timer.hpp"
+#include "utilities/configuration.hpp"
+#include "utilities/bmrmexception.hpp"
+#include "solver/bmrminnersolver/bmrminnersolver.hpp"
+#include "solver/bmrminnersolver/l2n2_daifletcherpgm.hpp"
+#include "solver/bmrminnersolver/l2n2_prloqo.hpp"
+#include <fstream>
+#include <sstream>
+using namespace std;
+
+//////////////////////////////////////////////////////////////////////
+// BundleMethod()
+//
+// Implementation of bundle optimization routine.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class BundleMethod
+{
+ const int MAX_ITERATIONS;
+ double lambda;
+
+
+public:
+ virtual ~BundleMethod() {}
+ BundleMethod
+ (
+ const int MAX_ITERATIONS = 1000, // maximum number of iterations to run subgradient method
+ const double lambda = 1
+ );
+
+ RealT Minimize(std::vector<RealT> &x0);
+
+ virtual RealT ComputeFunction(const std::vector<RealT> &x) = 0;
+ virtual void ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &x) = 0;
+ virtual void Report(int iteration, const std::vector<RealT> &x, RealT f, const std::vector<RealT> &g,
+ RealT norm_bound, RealT step_size) = 0;
+ virtual void Report(const std::string &s) = 0;
+
+};
+
+#include "BundleMethod.ipp"
+
+#endif
+
diff --git a/src/BundleMethod.ipp b/src/BundleMethod.ipp
new file mode 100644
index 0000000..5198737
--- /dev/null
+++ b/src/BundleMethod.ipp
@@ -0,0 +1,295 @@
+//////////////////////////////////////////////////////////////////////
+// BundleMethod.ipp
+//
+// This file contains an implementation bundle method optimization
+// algorithm
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// BundleMethod::BundleMethod()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+BundleMethod<RealT>::BundleMethod
+(
+ const int MAX_ITERATIONS, // maximum number of iterations to run subgradient method
+ const double lambda
+) :
+ MAX_ITERATIONS(MAX_ITERATIONS),
+ lambda(lambda)
+{
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// BundleMethod::Minimize()
+//
+// Implementation of bundle methods for optimization
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT BundleMethod<RealT>::Minimize(std::vector<RealT> &x)
+{
+
+ CBMRMInnerSolver* innerSolver; // pointer to inner solver object
+#ifdef DAIFLETCHER
+ cout << "Using daifletcher as inner solver\n";
+ innerSolver = new CL2N2_DaiFletcherPGM(lambda);
+#else
+ cout << "Using loqo as inner solver\n";
+ innerSolver = new CL2N2_prLOQO(lambda);
+#endif
+ int verbosity = 2;
+ int convergenceLog = 0;
+ unsigned int maxNumOfIter = 10000;
+ double epsilonTol = 1e-4;
+ double gammaTol = 0.0;
+ std::string checkpointPrefix = "model.checkpoint";
+ //unsigned int checkpointInterval = 1000000; // no checkpoint by default
+ //unsigned int checkpointMode = 2;
+
+ std::vector<RealT> g;
+ ComputeSubgradient(g, x);
+ RealT f = ComputeFunction(x);
+
+
+ /* Start from here */
+ Scalar temp;
+ /* Copy g to the matrix format */
+ const int col = g.size();
+ const int row = 1;
+ TheMatrix a(row, col, SML::DENSE); // gradient vector
+ TheMatrix w(row, col, SML::DENSE); // weight vector
+ TheMatrix w_best(row, col, SML::DENSE); // best weight vector
+ a.Zero();
+ w.Zero();
+ w_best.Zero();
+ /*for (int i=0;i<col; i++){
+ x[i] = i;
+ }*/
+ for (int i=0;i<col; i++){
+ w.Set(i, x[i]);
+ }
+
+ /*cout <<"print X init\n";
+ for (int i=0;i<col; i++){
+ cout << x[i] << " ";
+ }
+ cout << "\n";
+ cout <<"print W init\n";
+ for (int i=0;i<col; i++){
+ w.Get(i, temp);
+ cout << temp << " ";
+ }
+ cout << "\n";*/
+
+ // Timers (CPU and wall-clock)
+ CTimer totalTime; // total runtime of the training
+ CTimer innerSolverTime; // time for inner optimization (e.g., QP or LP)
+ CTimer lossAndGradientTime; // time for loss and gradient computation
+
+ unsigned int iter = 0; // iteration count
+ Scalar loss = 0.0; // loss function value
+ Scalar exactObjVal = 0.0; // (exact) objective function value
+ Scalar approxObjVal = -SML::INFTY; // convex lower-bound (approximate) of objective function value
+ Scalar minExactObjVal = SML::INFTY; // minimum of all previously evaluated (exact) objective function value
+ Scalar regVal = 0.0; // value of the regularizer term e.g., 0.5*w'*w
+ Scalar epsilon = 0.0; // (duality) gap := exactObjVal - approxObjVal
+ Scalar gamma = 0.0; // := minExactObjVal - approxObjVal
+ double innerSolverTol = 1.0; // optimization tolerance for inner solver
+
+ ofstream lossFp; // keep loss values
+ ofstream exactObjValFp; // keep exactObjVal values
+ ofstream approxObjValFp; // keep approxObjVal values
+ ofstream regValFp; // keep regVal values
+ ofstream epsilonFp; // keep epsilon values
+ ofstream gammaFp; // keep gamma values
+
+ // convergence log files
+ if(convergenceLog)
+ {
+ lossFp.open("loss.dump");
+ exactObjValFp.open("exactobj.dump");
+ approxObjValFp.open("approxobj.dump");
+ regValFp.open("regval.dump");
+ epsilonFp.open("epsilon.dump");
+ gammaFp.open("gamma.dump");
+ }
+
+ // start training
+ totalTime.Start();
+ while(1)
+ {
+ /*if (iter > 1)
+ return f;*/
+ iter++;
+ // column generation
+ lossAndGradientTime.Start();
+ for (int i=0;i<col; i++){
+ w.Get(i, temp);
+ x[i] = temp;
+ a.Get(i, temp);
+ g[i] = temp;
+ }
+ /*cout << "col = "<< col <<"\n";
+ cout << "lambda = " << lambda << "\n";
+ cout << " X == W " << "\n";
+ cout <<"print X, iter = " << iter << "\n";
+ for (int i=0;i<col; i++){
+ cout << x[i] << " ";
+ }
+ cout << "\n";
+ cout <<"print W, iter = "<< iter <<"\n";
+ for (int i=0;i<col; i++){
+ w.Get(i, temp);
+ cout << temp << " ";
+ }
+ cout << "\n";*/
+
+ ComputeSubgradient(g, x);
+ RealT f = ComputeFunction(x);
+ loss = f;
+
+ for (int i=0;i<col; i++){
+ temp = g[i];
+ a.Set(i, temp);
+ }
+ /*cout << " f == loss " << "\n";
+ cout << " A == G " << "\n";
+ cout << "f = " << f << "\n";
+ cout << "loss = "<< loss << "\n";
+ cout << "print G iter = "<< iter <<" \n";
+ for (int i=0;i<col; i++){
+ cout << g[i] << " " ;
+ }
+ cout << "\n";
+ cout << "print A iter = " << iter <<"\n";
+ for (int i=0;i<col; i++){
+ a.Get(i, temp);
+ cout << temp << " " ;
+ }
+ cout << "\n";*/
+ //cout << "Finish copying\n";
+ lossAndGradientTime.Stop();
+
+ // update convergence monitor
+ regVal = innerSolver->ComputeRegularizerValue(w);
+ exactObjVal = loss + regVal;
+ //minExactObjVal = std::min(minExactObjVal,exactObjVal);
+ if (minExactObjVal > exactObjVal){
+ w_best.Assign(w);
+ minExactObjVal = exactObjVal;
+ }
+ epsilon = exactObjVal - approxObjVal;
+ gamma = minExactObjVal - approxObjVal;
+
+ // dump convergence statistics into files
+ if(convergenceLog)
+ {
+ lossFp << loss << endl;
+ //xiFp << xi << endl;
+ exactObjValFp << exactObjVal << endl;
+ approxObjValFp << approxObjVal << endl;
+ regValFp << regVal << endl;
+ epsilonFp << epsilon << endl;
+ gammaFp << gamma << endl;
+ }
+
+ // dump convergence statistics on stdout
+ if(verbosity < 1)
+ {
+ printf(".");
+ if(iter%100 == 0)
+ printf("%d",iter);
+ fflush(stdout);
+ }
+ else if(verbosity == 1)
+ printf("#%d eps %.6e loss %.6e reg %.6e\n",iter, epsilon, loss, regVal);
+ else if(verbosity > 1)
+ printf("#%d f %.6e pobj %.6e aobj %.6e eps %.6e gam %.6e loss %.6e reg %.6e\n", iter, minExactObjVal, exactObjVal, approxObjVal, epsilon, gamma, loss, regVal);
+
+ // stopping criteria
+ if((iter >= 2) && ((gamma < gammaTol) || (epsilon < epsilonTol)))
+ {
+ break;
+ }
+ if(iter >= maxNumOfIter)
+ {
+ printf("\nWARNING: program exceeded maximum number of iterations (%d) !\n", maxNumOfIter);
+ break;
+ }
+
+ // adjust inner solver optimization tolerance
+ innerSolverTol = std::min(innerSolverTol, std::max((double)epsilon, (double)epsilonTol));
+ innerSolverTol = std::min(innerSolverTol, std::max((double)gamma, (double)gammaTol));
+ innerSolver->SetTolerance(innerSolverTol*0.5);
+
+ innerSolverTime.Start();
+ //innerSolver->Solve(w, a, loss, xi, regVal, approxObjVal);
+ innerSolver->Solve(w, a, loss, approxObjVal);
+ innerSolverTime.Stop();
+ }
+
+ // legends
+ if(verbosity >= 1)
+ {
+ printf("\nLegends::\n");
+ if(verbosity > 1)
+ printf("pobj: primal objective function value\naobj: approximate objective function value\n");
+ //printf("eps: epsilon (approximation error) \ngam: lower bound on eps \nloss: loss function value \nxi: approximation to loss \nreg: regularizer value\n");
+ printf("eps: epsilon (approximation error) \ngam: lower bound on eps \nloss: loss function value \nreg: regularizer value\n");
+ }
+
+ w.Assign(w_best);
+ Scalar norm1 = 0, norm2 = 0, norminf = 0;
+ w.Norm1(norm1);
+ w.Norm2(norm2);
+ w.NormInf(norminf);
+ printf("\n");
+ printf("No. of iterations: %d\n",iter);
+ printf("Primal obj. val.: %.6e\n",exactObjVal);
+ printf("Approx obj. val.: %.6e\n",approxObjVal);
+ printf("Primal - Approx.: %.6e\n",exactObjVal-approxObjVal);
+ printf("Loss: %.6e\n",loss);
+ printf("|w|_1: %.6e\n",norm1);
+ printf("|w|_2: %.6e\n",norm2);
+ printf("|w|_oo: %.6e\n",norminf);
+
+ totalTime.Stop();
+ // end of training
+
+ // display timing profile
+ printf("\nCPU seconds in:\n");
+ printf("1. loss and gradient: %8.2f\n", lossAndGradientTime.CPUTotal());
+ printf("2. solver: %8.2f\n", innerSolverTime.CPUTotal());
+ printf(" Total: %8.2f\n", totalTime.CPUTotal());
+
+ // clean up
+ if(convergenceLog)
+ {
+ lossFp.close();
+ //xiFp.close();
+ exactObjValFp.close();
+ approxObjValFp.close();
+ regValFp.close();
+ epsilonFp.close();
+ gammaFp.close();
+ }
+ //cout << "Start copying\n";
+ f = minExactObjVal;
+ for (int i=1;i<col; i++){
+ w_best.Get(i, temp);
+ x[i] = temp;
+ }
+ /*cout <<"X\n";
+ for (int i=1;i<col; i++){
+ cout << x[i] << " ";
+ }
+ cout <<"\n";*/
+ //cout << "Finish copying\n";
+
+ return f;
+}
+
diff --git a/src/CGLinear.hpp b/src/CGLinear.hpp
new file mode 100644
index 0000000..e8aff29
--- /dev/null
+++ b/src/CGLinear.hpp
@@ -0,0 +1,43 @@
+//////////////////////////////////////////////////////////////////////
+// CGLinear.hpp
+//
+// This file contains an implementation of the conjugate gradient
+// algorithm for solving linear systems.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef CGLINEAR_HPP
+#define CGLINEAR_HPP
+
+#include <vector>
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// CGLinear()
+//
+// Implementation of conjugate gradient for solving linear
+// systems Ax = b, where A is symmetric positive definite.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+class CGLinear {
+public:
+ Real Minimize
+ (
+ const std::vector<Real> &b, // right hand side
+ std::vector<Real> &x, // initial parameter vector
+
+ const int MAX_ITERATIONS = 1000, // maximum number of iterations to run CG
+ const Real SMALL_STEP_RATIO = 0.001, // ratio beneath which steps are considered "small"
+ const int MAX_SMALL_STEPS = 5 // maximum number of small steps before we quit
+ );
+
+ virtual ~CGLinear() {}
+
+ virtual void ComputeAx(std::vector<double> &Ax, const std::vector<double> &x) = 0;
+ virtual void Report(int iteration, const std::vector<double> &x, double f, double step_size) = 0;
+ virtual void Report(const std::string &s) = 0;
+};
+
+#include "CGLinear.ipp"
+
+#endif
diff --git a/src/CGLinear.ipp b/src/CGLinear.ipp
new file mode 100644
index 0000000..c9ba38e
--- /dev/null
+++ b/src/CGLinear.ipp
@@ -0,0 +1,131 @@
+//////////////////////////////////////////////////////////////////////
+// CGLinear.ipp
+//
+// This file contains an implementation of the conjugate gradient
+// algorithm for solving linear systems.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// CGLinear()
+//
+// Implementation of conjugate gradient for solving linear
+// systems Ax = b, where A is symmetric positive definite.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+Real CGLinear<Real>::Minimize
+(
+ const std::vector<Real> &b, // right hand side
+ std::vector<Real> &x, // initial parameter vector
+
+ const int MAX_ITERATIONS, // maximum number of iterations to run CG
+ const Real SMALL_STEP_RATIO, // ratio beneath which steps are considered "small"
+ const int MAX_SMALL_STEPS // maximum number of small steps before we quit
+)
+{
+ std::vector<Real> Ax;
+ ComputeAx(Ax, x);
+ std::vector<Real> r = b - Ax;
+ std::vector<Real> d = r;
+ Real rTr = DotProduct(r,r);
+ Real f = DotProduct(x, 0.5 * Ax - b);
+
+ Real best_f = f;
+ std::vector<Real> best_x = x;
+ Real best_rTr = rTr;
+
+ int num_consecutive_small_steps = 0;
+ bool progress_made = false;
+
+ // report initial iteration
+
+ Report(0, x, f, 0);
+
+ for (int iteration = 1; iteration <= MAX_ITERATIONS; ++iteration){
+
+ // compute step size
+
+ std::vector<Real> Ad;
+ ComputeAx(Ad, d);
+ Real alpha = rTr / DotProduct(d,Ad);
+
+ // update x and r
+
+ x += alpha * d;
+
+ // to prevent loss of precision
+
+ if (iteration % 10 == 0)
+ {
+ ComputeAx(Ax, x);
+ r = b - Ax;
+ }
+ else
+ {
+ r -= alpha * Ad;
+ Ax = b - r;
+ }
+
+ // update direction
+
+ Real rpTrp = rTr;
+ rTr = DotProduct(r,r);
+ d = r + (rTr / rpTrp) * d;
+
+ // update function value
+
+ f = DotProduct(x, 0.5 * Ax - b);
+ Report(iteration, x, f, alpha);
+
+ // note if we're making progress slowly
+
+ if ((best_f - f) / Abs(best_f) < SMALL_STEP_RATIO)
+ {
+ num_consecutive_small_steps++;
+ }
+ else
+ {
+ num_consecutive_small_steps = 0;
+ progress_made = true;
+ }
+
+ if (f < best_f)
+ {
+ best_f = f;
+ best_x = x;
+ best_rTr = rTr;
+ }
+
+ // prevent increasing steps
+
+ if (DotProduct(d, r) < 0)
+ {
+ d = r;
+ }
+
+ // if we're making slow progress
+
+ if (num_consecutive_small_steps == MAX_SMALL_STEPS)
+ {
+ // give us a second chance if we made some
+ // progress since the last restart
+
+ if (progress_made)
+ {
+ progress_made = false;
+ num_consecutive_small_steps = 0;
+ Report("Restart: Too many consecutive small steps");
+ d = r;
+ }
+ else
+ {
+ Report("Termination: Too many consecutive small steps");
+ break;
+ }
+ }
+ }
+
+ x = best_x;
+ return Sqrt(best_rTr / DotProduct(b,b));
+}
+
diff --git a/src/CGOptimizationWrapper.hpp b/src/CGOptimizationWrapper.hpp
new file mode 100644
index 0000000..7b2b66b
--- /dev/null
+++ b/src/CGOptimizationWrapper.hpp
@@ -0,0 +1,41 @@
+//////////////////////////////////////////////////////////////////////
+// CGOptimizationWrapper.hpp
+//
+// Conjugate gradient optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef CGOPTIMIZATIONWRAPPER_HPP
+#define CGOPTIMIZATIONWRAPPER_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "CGLinear.hpp"
+
+template<class RealT>
+class OptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class CGOptimizationWrapper
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class CGOptimizationWrapper : public CGLinear<RealT>
+{
+ OptimizationWrapper<RealT> *optimization_wrapper;
+ const std::vector<int> units;
+ const std::vector<RealT> w;
+ const std::vector<RealT> C;
+
+public:
+ CGOptimizationWrapper(OptimizationWrapper<RealT> *optimizer,
+ const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ const std::vector<RealT> &C);
+
+ void ComputeAx(std::vector<RealT> &Ax, const std::vector<RealT> &x);
+ void Report(int iteration, const std::vector<RealT> &x, RealT f, RealT step_size);
+ void Report(const std::string &s);
+};
+
+#include "CGOptimizationWrapper.ipp"
+
+#endif
diff --git a/src/CGOptimizationWrapper.ipp b/src/CGOptimizationWrapper.ipp
new file mode 100644
index 0000000..b3273d1
--- /dev/null
+++ b/src/CGOptimizationWrapper.ipp
@@ -0,0 +1,54 @@
+//////////////////////////////////////////////////////////////////////
+// CGOptimizationWrapper.ipp
+//
+// CG optimization code.
+//////////////////////////////////////////////////////////////////////
+
+#include "CGOptimizationWrapper.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// CGOptimizationWrapper<RealT>::CGOptimizationWrapper()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+CGOptimizationWrapper<RealT>::CGOptimizationWrapper(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ const std::vector<RealT> &C) :
+ CGLinear<RealT>(), optimization_wrapper(optimization_wrapper), units(units), w(w), C(C)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// CGOptimizationWrapper<RealT>::ComputeAx()
+//
+// Compute Hessian-vector product.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void CGOptimizationWrapper<RealT>::ComputeAx(std::vector<RealT> &Ax, const std::vector<RealT> &x)
+{
+ std::vector<RealT> Ce = optimization_wrapper->GetParameterManager().ExpandParameterGroupValues(C);
+ Ax = optimization_wrapper->GetComputationWrapper().ComputeHessianVectorProduct(units, w, x, true, optimization_wrapper->GetOptions().GetRealValue("log_base")) + Ce * x;
+}
+
+//////////////////////////////////////////////////////////////////////
+// CGOptimizationWrapper<RealT>::Report()
+//
+// Provide progress report on CG algorithm.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void CGOptimizationWrapper<RealT>::Report(int iteration, const std::vector<RealT> &x, RealT f, RealT step_size)
+{
+ optimization_wrapper->PrintMessage(SPrintF("CG iteration %d: f = %lf, |x| = %lf, step = %lf, efficiency = %lf%%",
+ iteration, double(f), double(Norm(x)), double(step_size),
+ double(optimization_wrapper->GetComputationEngine().GetEfficiency())));
+}
+
+template<class RealT>
+void CGOptimizationWrapper<RealT>::Report(const std::string &s)
+{
+ optimization_wrapper->PrintMessage(SPrintF("CG message: %s", s.c_str()));
+}
diff --git a/src/ComputationEngine.hpp b/src/ComputationEngine.hpp
new file mode 100644
index 0000000..1f9c35a
--- /dev/null
+++ b/src/ComputationEngine.hpp
@@ -0,0 +1,118 @@
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine.hpp
+//
+// This class provides an implementation of the DoComputation()
+// routine needed by the DistributedComputation class.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef COMPUTATIONENGINE_HPP
+#define COMPUTATIONENGINE_HPP
+
+#include "Config.hpp"
+#include "Options.hpp"
+#include "LogSpace.hpp"
+#include "Utilities.hpp"
+#include "SparseMatrix.hpp"
+#include "SStruct.hpp"
+#include "InferenceEngine.hpp"
+#include "DistributedComputation.hpp"
+#include "FileDescription.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// struct SharedInfo
+//
+// Class for storing information shared between processing units. In
+// particular, this includes:
+//
+// values = all parameter values
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+struct SharedInfo
+{
+ int command;
+ RealT w[SHARED_PARAMETER_SIZE];
+ RealT v[SHARED_PARAMETER_SIZE];
+ bool use_nonsmooth;
+ bool use_loss;
+ RealT gamma;
+ RealT log_base;
+};
+
+//////////////////////////////////////////////////////////////////////
+// struct NonSharedInfo
+//
+// Class for storing information unique to each processing unit. In
+// particular, this includes:
+//
+// command = type of command to be executed
+// id = index of the work unit to be processed
+//////////////////////////////////////////////////////////////////////
+
+enum ProcessingType
+{
+ CHECK_PARSABILITY,
+ COMPUTE_SOLUTION_NORM_BOUND,
+ COMPUTE_GRADIENT_NORM_BOUND,
+ COMPUTE_LOSS,
+ COMPUTE_FUNCTION,
+ COMPUTE_GRADIENT,
+ COMPUTE_HV,
+ PREDICT
+};
+
+struct NonSharedInfo
+{
+ int index;
+};
+
+//////////////////////////////////////////////////////////////////////
+// class ComputationEngine
+//
+// Wrapper class for DistributedComputation.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class ComputationEngine : public DistributedComputation<RealT, SharedInfo<RealT>, NonSharedInfo>
+{
+ const Options &options;
+ const std::vector<FileDescription> &descriptions;
+ InferenceEngine<RealT> &inference_engine;
+ ParameterManager<RealT> ¶meter_manager;
+
+ std::string MakeOutputFilename(const std::string &input_filename,
+ const std::string &output_destination,
+ const bool cross_validation,
+ const RealT gamma) const;
+
+public:
+
+ // constructor, destructor
+ ComputationEngine(const Options &options,
+ const std::vector<FileDescription> &descriptions,
+ InferenceEngine<RealT> &inference_engine,
+ ParameterManager<RealT> ¶meter_manager);
+ ~ComputationEngine();
+
+ // routine for performing an individual work unit
+ void DoComputation(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared);
+
+ // methods to act on individual work units
+ void CheckParsability(std::vector<RealT> &result, const NonSharedInfo &nonshared);
+ void ComputeSolutionNormBound(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared);
+ void ComputeGradientNormBound(std::vector<RealT> &result, const NonSharedInfo &nonshared);
+ void ComputeLoss(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared);
+ void ComputeFunctionAndGradient(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared, bool need_gradient);
+ void ComputeHessianVectorProduct(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared);
+ void Predict(std::vector<RealT> &result, const SharedInfo<RealT> &shared, const NonSharedInfo &nonshared);
+
+ // getters
+ const Options &GetOptions() const { return options; }
+ const std::vector<FileDescription> &GetDescriptions() const { return descriptions; }
+ InferenceEngine<RealT> &GetInferenceEngine() { return inference_engine; }
+ ParameterManager<RealT> &GetParameterManager() { return parameter_manager; }
+};
+
+#include "ComputationEngine.ipp"
+
+#endif
diff --git a/src/ComputationEngine.ipp b/src/ComputationEngine.ipp
new file mode 100644
index 0000000..4358512
--- /dev/null
+++ b/src/ComputationEngine.ipp
@@ -0,0 +1,588 @@
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "ComputationEngine.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputationEngine()
+// ComputationEngine::~ComputationEngine()
+//
+// Constructor and destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ComputationEngine<RealT>::ComputationEngine(const Options &options,
+ const std::vector<FileDescription> &descriptions,
+ InferenceEngine<RealT> &inference_engine,
+ ParameterManager<RealT> ¶meter_manager) :
+ DistributedComputation<RealT, SharedInfo<RealT>, NonSharedInfo>(options.GetBoolValue("verbose_output")),
+ options(options),
+ descriptions(descriptions),
+ inference_engine(inference_engine),
+ parameter_manager(parameter_manager)
+{}
+
+template<class RealT>
+ComputationEngine<RealT>::~ComputationEngine()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::DoComputation()
+//
+// Decide what type of computation needs to be done and then
+// pass the work on to the appropriate routine.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::DoComputation(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared)
+{
+ switch (shared.command)
+ {
+ case CHECK_PARSABILITY:
+ CheckParsability(result, nonshared);
+ break;
+ case COMPUTE_SOLUTION_NORM_BOUND:
+ ComputeSolutionNormBound(result, shared, nonshared);
+ break;
+ case COMPUTE_GRADIENT_NORM_BOUND:
+ ComputeGradientNormBound(result, nonshared);
+ break;
+ case COMPUTE_LOSS:
+ ComputeLoss(result, shared, nonshared);
+ break;
+ case COMPUTE_FUNCTION:
+ ComputeFunctionAndGradient(result, shared, nonshared, false);
+ break;
+ case COMPUTE_GRADIENT:
+ ComputeFunctionAndGradient(result, shared, nonshared, true);
+ break;
+ case COMPUTE_HV:
+ ComputeHessianVectorProduct(result, shared, nonshared);
+ break;
+ case PREDICT:
+ Predict(result, shared, nonshared);
+ break;
+ default:
+ Assert(false, "Unknown command type.");
+ break;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::CheckParsability()
+//
+// Check to see if a sequence is parsable or not. Return a
+// vector with a "0" in the appropriate spot indicating that a
+// file is not parsable.
+//////////////////////////////////////////////////////////////////////
+
+template <class RealT>
+void ComputationEngine<RealT>::CheckParsability(std::vector<RealT> &result,
+ const NonSharedInfo &nonshared)
+{
+ // load training example
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+
+ // conditional inference
+ inference_engine.LoadValues(std::vector<RealT>(parameter_manager.GetNumLogicalParameters()));
+ inference_engine.UseConstraints(sstruct.GetMapping());
+ inference_engine.ComputeViterbi();
+ RealT conditional_score = inference_engine.GetViterbiScore();
+
+ // check for bad parse
+ result.clear();
+ result.resize(descriptions.size());
+ result[nonshared.index] = (conditional_score < RealT(NEG_INF/2) ? 0 : 1);
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputeSolutionNormBound()
+//
+// Compute the max entropy and loss possible for an example.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::ComputeSolutionNormBound(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared)
+{
+ RealT max_entropy = RealT(0);
+ RealT max_loss = RealT(0);
+
+ // load training example
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+
+ // load parameters
+ const std::vector<RealT> w(parameter_manager.GetNumLogicalParameters(), RealT(0));
+ inference_engine.LoadValues(w);
+
+ // perform computation
+#if !SMOOTH_MAX_MARGIN
+ if (!options.GetBoolValue("viterbi_parsing"))
+#endif
+ {
+ inference_engine.ComputeInside();
+ max_entropy += inference_engine.ComputeLogPartitionCoefficient();
+ }
+
+#if defined(HAMMING_LOSS)
+ inference_engine.UseLoss(sstruct.GetMapping(), RealT(HAMMING_LOSS));
+ inference_engine.ComputeViterbi();
+ max_loss += inference_engine.GetViterbiScore();
+#endif
+
+ result.clear();
+ result.resize(descriptions.size());
+ result[nonshared.index] = max_entropy / shared.log_base + max_loss;
+
+ result *= RealT(descriptions[nonshared.index].weight);
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputeGradientNormBound()
+//
+// Compute the max L1 norm for the features of an example.
+// Return a vector with this value in the appropriate spot for
+// this example.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::ComputeGradientNormBound(std::vector<RealT> &result,
+ const NonSharedInfo &nonshared)
+{
+ // load training example
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+
+ // load parameters
+ const std::vector<RealT> w(parameter_manager.GetNumLogicalParameters(), RealT(1));
+ inference_engine.LoadValues(w);
+
+ // perform inference
+ inference_engine.ComputeViterbi();
+ const RealT max_L1_norm = inference_engine.GetViterbiScore();
+
+ result.clear();
+ result.resize(descriptions.size());
+ result[nonshared.index] = max_L1_norm;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputeLoss()
+//
+// Return a vector containing a single entry with the loss value.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::ComputeLoss(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared)
+{
+ // load training example
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+
+ // load parameters
+ const std::vector<RealT> w(shared.w, shared.w + parameter_manager.GetNumLogicalParameters());
+ inference_engine.LoadValues(w * shared.log_base);
+
+ // perform inference
+ SStruct *solution;
+ if (options.GetBoolValue("viterbi_parsing"))
+ {
+ inference_engine.ComputeViterbi();
+ solution = new SStruct(sstruct);
+ solution->SetMapping(inference_engine.PredictPairingsViterbi());
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ inference_engine.ComputeOutside();
+ inference_engine.ComputePosterior();
+ solution = new SStruct(sstruct);
+ solution->SetMapping(inference_engine.PredictPairingsPosterior(shared.gamma));
+ }
+
+ // compute loss
+ if (!shared.use_loss) Error("Must be using loss function in order to compute loss.");
+#if defined(HAMMING_LOSS)
+ inference_engine.UseLoss(sstruct.GetMapping(), shared.log_base * RealT(HAMMING_LOSS));
+#endif
+ inference_engine.LoadValues(std::vector<RealT>(w.size()));
+ inference_engine.UseConstraints(solution->GetMapping());
+ inference_engine.ComputeViterbi();
+
+ delete solution;
+
+ result.clear();
+ result.push_back(inference_engine.GetViterbiScore());
+
+ result *= RealT(descriptions[nonshared.index].weight);
+ result.back() /= shared.log_base;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputeFunctionAndGradient();
+//
+// Return a vector containing the gradient and function value.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::ComputeFunctionAndGradient(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared,
+ bool need_gradient)
+{
+ // load training example
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+
+ // load parameters
+ const std::vector<RealT> w(shared.w, shared.w + parameter_manager.GetNumLogicalParameters());
+ inference_engine.LoadValues(w * shared.log_base);
+#if defined(HAMMING_LOSS)
+ if (shared.use_loss) inference_engine.UseLoss(sstruct.GetMapping(), shared.log_base * RealT(HAMMING_LOSS));
+#endif
+
+ // unconditional inference
+ RealT unconditional_score;
+ std::vector<RealT> unconditional_counts;
+
+ if (shared.use_nonsmooth)
+ {
+ inference_engine.ComputeViterbi();
+ unconditional_score = inference_engine.GetViterbiScore();
+ if (need_gradient) unconditional_counts = inference_engine.ComputeViterbiFeatureCounts();
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ unconditional_score = inference_engine.ComputeLogPartitionCoefficient();
+ if (need_gradient)
+ {
+ inference_engine.ComputeOutside();
+ unconditional_counts = inference_engine.ComputeFeatureCountExpectations();
+ }
+ }
+
+ // conditional inference
+ RealT conditional_score;
+ std::vector<RealT> conditional_counts;
+
+ inference_engine.UseConstraints(sstruct.GetMapping());
+ if (shared.use_nonsmooth)
+ {
+ inference_engine.ComputeViterbi();
+ conditional_score = inference_engine.GetViterbiScore();
+ if (need_gradient) conditional_counts = inference_engine.ComputeViterbiFeatureCounts();
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ conditional_score = inference_engine.ComputeLogPartitionCoefficient();
+ if (need_gradient)
+ {
+ inference_engine.ComputeOutside();
+ conditional_counts = inference_engine.ComputeFeatureCountExpectations();
+ }
+ }
+
+ result.clear();
+
+ // compute subgradient
+ if (need_gradient) result = unconditional_counts - conditional_counts;
+
+ // compute function value
+ Assert(conditional_score <= unconditional_score, "Conditional score cannot exceed unconditional score.");
+ result.push_back(unconditional_score - conditional_score);
+
+ // check for bad parse
+ if (conditional_score < RealT(NEG_INF/2))
+ {
+ std::cerr << "Unexpected bad parse for file: " << descriptions[nonshared.index].input_filename << std::endl;
+ fill(result.begin(), result.end(), RealT(0));
+ return;
+ }
+
+ if (NONCONVEX_MULTIPLIER != 0)
+ {
+
+#if STOCHASTIC_GRADIENT
+ if (shared.use_loss) inference_engine.UseLoss(sstruct.GetMapping(), RealT(0));
+
+ // unconditional counts
+ inference_engine.UseMapping(std::vector<int>(sstruct.GetLength() + 1, UNKNOWN));
+ if (shared.use_nonsmooth)
+ {
+ inference_engine.ComputeViterbi();
+ unconditional_score = inference_engine.GetViterbiScore();
+ if (need_gradient) unconditional_counts = inference_engine.ComputeViterbiFeatureCounts();
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ unconditional_score = inference_engine.ComputeLogPartitionCoefficient();
+ if (need_gradient)
+ {
+ inference_engine.ComputeOutside();
+ unconditional_counts = inference_engine.ComputeFeatureCountExpectations();
+ }
+ }
+
+ // conditional counts
+ inference_engine.UseMapping(sstruct.GetMapping());
+ if (shared.use_nonsmooth)
+ {
+ inference_engine.ComputeViterbi();
+ unconditional_score = inference_engine.GetViterbiScore();
+ if (need_gradient) unconditional_counts = inference_engine.ComputeViterbiFeatureCounts();
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ unconditional_score = inference_engine.ComputeLogPartitionCoefficient();
+ if (need_gradient)
+ {
+ inference_engine.ComputeOutside();
+ unconditional_counts = inference_engine.ComputeFeatureCountExpectations();
+ }
+ }
+
+ std::vector<RealT> result2;
+
+ // compute subgradient
+ if (need_gradient) result2 = unconditional_counts - conditional_counts;
+
+ // compute function value
+ Assert(conditional_score <= unconditional_score, "Conditional score cannot exceed unconditional score.");
+ result2.push_back(unconditional_score - conditional_score);
+
+ // check for bad parse
+ if (conditional_score < RealT(NEG_INF/2))
+ {
+ std::cerr << "Unexpected bad parse for file: " << descriptions[nonshared.index].input_filename << std::endl;
+ fill(result.begin(), result.end(), 0);
+ return;
+ }
+
+ result -= NONCONVEX_MULTIPLIER * result2;
+#endif
+ }
+
+ // avoid precision problems
+ if (result.back() < 0)
+ {
+ if (result.back() < -1e-6)
+ {
+ std::cerr << "Encountered negative function value for " << descriptions[nonshared.index].input_filename << ": " << result.back() << std::endl;
+ parameter_manager.WriteToFile(SPrintF("neg_params.%s", GetBaseName(descriptions[nonshared.index].input_filename).c_str()), w);
+ exit(0);
+ }
+ std::fill(result.begin(), result.end(), RealT(0));
+ return;
+ }
+
+ result *= RealT(descriptions[nonshared.index].weight);
+ result.back() /= shared.log_base;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::ComputeHessianVectorProduct()
+//
+// Return a vector containing Hv.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::ComputeHessianVectorProduct(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared)
+{
+ const std::vector<RealT> w(shared.w, shared.w + parameter_manager.GetNumLogicalParameters());
+ const std::vector<RealT> v(shared.v, shared.v + parameter_manager.GetNumLogicalParameters());
+
+ if (options.GetBoolValue("viterbi_parsing"))
+ {
+ Error("Should not use Hessian-vector products with Viterbi parsing.");
+ }
+
+ const RealT EPSILON = RealT(1e-8);
+ SharedInfo<RealT> shared_temp(shared);
+ std::vector<RealT> result2;
+
+ for (size_t i = 0; i < parameter_manager.GetNumLogicalParameters(); i++)
+ shared_temp.w[i] = shared.w[i] + EPSILON * v[i];
+ ComputeFunctionAndGradient(result, shared_temp, nonshared, true);
+
+ for (size_t i = 0; i < parameter_manager.GetNumLogicalParameters(); i++)
+ shared_temp.w[i] = shared.w[i] - EPSILON * v[i];
+ ComputeFunctionAndGradient(result2, shared_temp, nonshared, true);
+
+ result = (result - result2) / (RealT(2) * EPSILON);
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::Predict()
+//
+// Predict structure of a single sequence.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationEngine<RealT>::Predict(std::vector<RealT> &result,
+ const SharedInfo<RealT> &shared,
+ const NonSharedInfo &nonshared)
+{
+ result.clear();
+
+ // load sequence, with constraints if necessary
+ const SStruct &sstruct = descriptions[nonshared.index].sstruct;
+ inference_engine.LoadSequence(sstruct);
+ if (options.GetBoolValue("use_constraints")) inference_engine.UseConstraints(sstruct.GetMapping());
+
+ // load parameters
+ const std::vector<RealT> w(shared.w, shared.w + parameter_manager.GetNumLogicalParameters());
+ inference_engine.LoadValues(w * shared.log_base);
+
+ // perform inference
+ SStruct *solution;
+ if (options.GetBoolValue("viterbi_parsing"))
+ {
+ inference_engine.ComputeViterbi();
+ if (options.GetBoolValue("partition_function_only"))
+ {
+ std::cout << "Viterbi score for \"" << descriptions[nonshared.index].input_filename << "\": "
+ << inference_engine.GetViterbiScore() << std::endl;
+ return;
+ }
+ solution = new SStruct(sstruct);
+ solution->SetMapping(inference_engine.PredictPairingsViterbi());
+ }
+ else
+ {
+ inference_engine.ComputeInside();
+ if (options.GetBoolValue("partition_function_only"))
+ {
+ std::cout << "Log partition coefficient for \"" << descriptions[nonshared.index].input_filename << "\": "
+ << inference_engine.ComputeLogPartitionCoefficient() << std::endl;
+ return;
+ }
+ inference_engine.ComputeOutside();
+ inference_engine.ComputePosterior();
+ solution = new SStruct(sstruct);
+ solution->SetMapping(inference_engine.PredictPairingsPosterior(shared.gamma));
+ }
+
+ // write output
+ if (options.GetStringValue("output_parens_destination") != "")
+ {
+ const std::string filename = MakeOutputFilename(descriptions[nonshared.index].input_filename,
+ options.GetStringValue("output_parens_destination"),
+ options.GetRealValue("gamma") < 0,
+ shared.gamma);
+ std::ofstream outfile(filename.c_str());
+ if (outfile.fail()) Error("Unable to open output parens file '%s' for writing.", filename.c_str());
+ solution->WriteParens(outfile);
+ outfile.close();
+ }
+
+ if (options.GetStringValue("output_bpseq_destination") != "")
+ {
+ const std::string filename = MakeOutputFilename(descriptions[nonshared.index].input_filename,
+ options.GetStringValue("output_bpseq_destination"),
+ options.GetRealValue("gamma") < 0,
+ shared.gamma);
+ std::ofstream outfile(filename.c_str());
+ if (outfile.fail()) Error("Unable to open output bpseq file '%s' for writing.", filename.c_str());
+ solution->WriteBPSEQ(outfile);
+ outfile.close();
+ }
+
+ if (options.GetStringValue("output_posteriors_destination") != "")
+ {
+ const std::string filename = MakeOutputFilename(descriptions[nonshared.index].input_filename,
+ options.GetStringValue("output_posteriors_destination"),
+ options.GetRealValue("gamma") < 0,
+ shared.gamma);
+ RealT *posterior = inference_engine.GetPosterior(options.GetRealValue("output_posteriors_cutoff"));
+ SparseMatrix<RealT> sparse(posterior, sstruct.GetLength()+1, RealT(0));
+ delete [] posterior;
+ std::ofstream outfile(filename.c_str());
+ if (outfile.fail()) Error("Unable to open output posteriors file '%s' for writing.", filename.c_str());
+ sparse.PrintSparseBPSEQ(outfile, sstruct.GetSequences()[0]);
+ outfile.close();
+ }
+
+ if (options.GetStringValue("output_parens_destination") == "" &&
+ options.GetStringValue("output_bpseq_destination") == "" &&
+ options.GetStringValue("output_posteriors_destination") == "")
+ {
+ WriteProgressMessage("");
+ solution->WriteParens(std::cout);
+ }
+
+ delete solution;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationEngine::MakeOutputFilename()
+//
+// Decide on output filename, if any. The arguments to this function
+// consist of (1) a boolean variable indicating whether the output
+// destination should be treated as the name of an output directory
+// (and the output filename is chosen to match the input file) or
+// whether the output destination should be interpreted as the output
+// filename; (2) the name of the input file to be processed; and (3)
+// the supplied output destination.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::string ComputationEngine<RealT>::MakeOutputFilename(const std::string &input_filename,
+ const std::string &output_destination,
+ const bool cross_validation,
+ const RealT gamma) const
+{
+ if (output_destination == "") return "";
+
+ const std::string dir_name = GetDirName(output_destination);
+ const std::string base_name = GetBaseName(output_destination);
+
+ const std::string prefix = (dir_name != "" ? (dir_name + DIR_SEPARATOR_CHAR) : std::string(""));
+
+ // check if output directory required
+ if (descriptions.size() > 1)
+ {
+ if (cross_validation)
+ {
+ return SPrintF("%s%s%c%s.gamma=%lf%c%s",
+ prefix.c_str(),
+ base_name.c_str(),
+ DIR_SEPARATOR_CHAR,
+ base_name.c_str(),
+ double(gamma),
+ DIR_SEPARATOR_CHAR,
+ GetBaseName(input_filename).c_str());
+ }
+ return SPrintF("%s%s%c%s",
+ prefix.c_str(),
+ base_name.c_str(),
+ DIR_SEPARATOR_CHAR,
+ GetBaseName(input_filename).c_str());
+ }
+
+ if (cross_validation)
+ {
+ return SPrintF("%s%s%c%s.gamma=%lf",
+ prefix.c_str(),
+ base_name.c_str(),
+ DIR_SEPARATOR_CHAR,
+ base_name.c_str(),
+ double(gamma));
+ }
+ return SPrintF("%s%s",
+ prefix.c_str(),
+ base_name.c_str());
+}
diff --git a/src/ComputationWrapper.hpp b/src/ComputationWrapper.hpp
new file mode 100644
index 0000000..cd52b73
--- /dev/null
+++ b/src/ComputationWrapper.hpp
@@ -0,0 +1,70 @@
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper.hpp
+//
+// This class provides a wrapper around the Computation class that
+// provides a framework for translating basic queries into the format
+// needed by the Computation class. This class also provides caching
+// facilities for preventing redundant computations.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef COMPUTATIONWRAPPER_HPP
+#define COMPUTATIONWRAPPER_HPP
+
+#include "ComputationEngine.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class ComputationWrapper
+//
+// Wrapper class for Computation.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class ComputationWrapper
+{
+ ComputationEngine<RealT> &computation_engine;
+
+ SharedInfo<RealT> shared_info;
+ std::vector<NonSharedInfo> nonshared_info;
+
+ // the following member variables are used to "cache" work to
+ // ensure that it is not repeated unnecessarily
+ bool cached_toggle_use_nonsmooth;
+ bool cached_toggle_use_loss;
+ std::vector<int> cached_units;
+ std::vector<double> cached_w;
+ std::vector<double> cached_function;
+ std::vector<double> cached_gradient;
+
+public:
+
+ // constructor, destructor
+ ComputationWrapper(ComputationEngine<RealT> &computation_engine);
+ ~ComputationWrapper();
+
+ // retrieve list of work units
+ std::vector<int> GetAllUnits() const;
+
+ // methods to act on vectors of work units
+ std::vector<int> FilterNonparsable(const std::vector<int> &units);
+ RealT ComputeSolutionNormBound(const std::vector<int> &units, const std::vector<RealT> &C, RealT log_base);
+ RealT ComputeGradientNormBound(const std::vector<int> &units, const std::vector<RealT> &C, RealT log_base);
+ void Predict(const std::vector<int> &units, const std::vector<RealT> &w, RealT gamma, RealT log_base);
+ RealT ComputeLoss(const std::vector<int> &units, const std::vector<RealT> &w, RealT log_base);
+ RealT ComputeFunction(const std::vector<int> &units, const std::vector<RealT> &w, bool toggle_use_nonsmooth, bool toggle_use_loss, RealT log_base);
+ std::vector<RealT> ComputeGradient(const std::vector<int> &units, const std::vector<RealT> &w, bool toggle_use_nonsmooth, bool toggle_use_loss, RealT log_base);
+ std::vector<RealT> ComputeHessianVectorProduct(const std::vector<int> &units, const std::vector<RealT> &w, const std::vector<RealT> &v, bool toggle_use_loss, RealT log_base);
+
+ // for debugging
+ void SanityCheckGradient(const std::vector<int> &units, const std::vector<RealT> &w);
+
+ // getters
+ const Options &GetOptions() const { return computation_engine.GetOptions(); }
+ const std::vector<FileDescription> &GetDescriptions() const { return computation_engine.GetDescriptions(); }
+ InferenceEngine<RealT> &GetInferenceEngine() { return computation_engine.GetInferenceEngine(); }
+ ParameterManager<RealT> &GetParameterManager() { return computation_engine.GetParameterManager(); }
+ ComputationEngine<RealT> &GetComputationEngine() { return computation_engine; }
+};
+
+#include "ComputationWrapper.ipp"
+
+#endif
diff --git a/src/ComputationWrapper.ipp b/src/ComputationWrapper.ipp
new file mode 100644
index 0000000..9d2f210
--- /dev/null
+++ b/src/ComputationWrapper.ipp
@@ -0,0 +1,460 @@
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "ComputationWrapper.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputationWrapper()
+// ComputationWrapper::~ComputationWrapper()
+//
+// Constructor and destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ComputationWrapper<RealT>::ComputationWrapper(ComputationEngine<RealT> &computation_engine) :
+ computation_engine(computation_engine)
+{}
+
+template<class RealT>
+ComputationWrapper<RealT>::~ComputationWrapper()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::GetAllUnits()
+//
+// Return a vector containing the index of every input file.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<int> ComputationWrapper<RealT>::GetAllUnits() const
+{
+ std::vector<int> ret;
+ for (size_t i = 0; i < GetDescriptions().size(); i++)
+ ret.push_back(int(i));
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeSolutionNormBound()
+//
+// Return a bound on the norm for each batch gradient iteration, not
+// including regularization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT ComputationWrapper<RealT>::ComputeSolutionNormBound(const std::vector<int> &units,
+ const std::vector<RealT> &C,
+ RealT log_base)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+
+ static std::vector<int> cached_units;
+ static std::vector<RealT> cached_C;
+ static RealT cached_bound = 0;
+
+ // check cache
+ if (cached_units != units || cached_C != C)
+ {
+ // set up computation
+ shared_info.command = COMPUTE_SOLUTION_NORM_BOUND;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ // perform computation
+ std::vector<RealT> entropy_plus_loss;
+ computation_engine.DistributeComputation(entropy_plus_loss, shared_info, nonshared_info);
+ cached_bound = Sqrt(Sum(entropy_plus_loss) / (Min(C) + RealT(1e-10)));
+ std::cerr << "Solution norm bound: " << cached_bound << std::endl;
+
+ // save cache
+ cached_units = units;
+ cached_C = C;
+ }
+
+ return cached_bound;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeGradientNormBound()
+//
+// Return a bound on the norm for each batch gradient iteration, not
+// including regularization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT ComputationWrapper<RealT>::ComputeGradientNormBound(const std::vector<int> &units,
+ const std::vector<RealT> &C,
+ RealT log_base)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+
+ // compute the max L1 norm for feature vectors; the max L1
+ // norm also serves as a bound on the max L2 norm
+
+ shared_info.command = COMPUTE_GRADIENT_NORM_BOUND;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ std::vector<RealT> max_feature_L1_norm;
+ computation_engine.DistributeComputation(max_feature_L1_norm, shared_info, nonshared_info);
+
+ RealT bound = Max(C) * ComputeSolutionNormBound(units, C, log_base) + RealT(2) * Sum(max_feature_L1_norm);
+ std::cerr << "Gradient norm bound: " << bound << std::endl;
+
+ return bound;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::FilterNonparsable()
+//
+// Filter a vector of units, removing any units whose supplied
+// structures are not parsable.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<int> ComputationWrapper<RealT>::FilterNonparsable(const std::vector<int> &units)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+
+ std::vector<RealT> parsable;
+
+ shared_info.command = CHECK_PARSABILITY;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ computation_engine.DistributeComputation(parsable, shared_info, nonshared_info);
+
+ std::vector<int> ret;
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ Assert(units[i] >= 0 && units[i] < int(parsable.size()), "Out-of-bounds index.");
+ if (parsable[units[i]])
+ {
+ ret.push_back(units[i]);
+ }
+ else
+ {
+ std::cerr << "No valid parse for file: " << GetDescriptions()[units[i]].input_filename << std::endl;
+ }
+ }
+
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeLoss()
+//
+// Compute loss function for model over a fixed set of work units
+// using a particular setting of the parameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT ComputationWrapper<RealT>::ComputeLoss(const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ RealT log_base)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+ if (int(w.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+
+ std::vector<RealT> ret;
+
+ shared_info.command = COMPUTE_LOSS;
+ for (size_t i = 0; i < w.size(); i++)
+ {
+ shared_info.w[i] = w[i];
+ }
+ shared_info.use_loss = true;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ computation_engine.DistributeComputation(ret, shared_info, nonshared_info);
+ return ret[0];
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeFunction()
+//
+// Compute negative log-likelihood of the model over a fixed set
+// of work units using a particular setting of the parameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT ComputationWrapper<RealT>::ComputeFunction(const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ bool toggle_use_nonsmooth,
+ bool toggle_use_loss,
+ RealT log_base)
+{
+#if STOCHASTIC_GRADIENT
+ Error("Should not get here.");
+#endif
+
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+ if (int(w.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+
+ // check cache
+ if (cached_units != units ||
+ cached_w != w ||
+ cached_toggle_use_nonsmooth != toggle_use_nonsmooth ||
+ cached_toggle_use_loss != toggle_use_loss ||
+ cached_function.size() == 0)
+ {
+ // set up computation
+ shared_info.command = COMPUTE_FUNCTION;
+ for (size_t i = 0; i < w.size(); i++)
+ {
+ shared_info.w[i] = w[i];
+ }
+ shared_info.use_nonsmooth = toggle_use_nonsmooth;
+ shared_info.use_loss = toggle_use_loss;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ // perform computation
+ computation_engine.DistributeComputation(cached_function, shared_info, nonshared_info);
+ Assert(cached_function.size() == 1, "Unexpected return value size.");
+
+ // replace cache
+ cached_units = units;
+ cached_w = w;
+ cached_toggle_use_nonsmooth = toggle_use_nonsmooth;
+ cached_toggle_use_loss = toggle_use_loss;
+ cached_gradient.clear();
+ }
+
+ return cached_function[0];
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeGradient()
+//
+// Compute gradient of the negative log-likelihood of the model
+// over a fixed set of work units using a particular setting of
+// the parameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<RealT> ComputationWrapper<RealT>::ComputeGradient(const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ bool toggle_use_nonsmooth,
+ bool toggle_use_loss,
+ RealT log_base)
+{
+#if STOCHASTIC_GRADIENT
+ Error("Should not get here.");
+#endif
+
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+ if (int(w.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+
+ // check cache
+ if (cached_units != units ||
+ cached_w != w ||
+ cached_toggle_use_nonsmooth != toggle_use_nonsmooth ||
+ cached_toggle_use_loss != toggle_use_loss ||
+ cached_gradient.size() == 0)
+ {
+ // set up computation
+ shared_info.command = COMPUTE_GRADIENT;
+ for (size_t i = 0; i < w.size(); i++)
+ {
+ shared_info.w[i] = w[i];
+ }
+ shared_info.use_nonsmooth = toggle_use_nonsmooth;
+ shared_info.use_loss = toggle_use_loss;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ // perform computation
+ computation_engine.DistributeComputation(cached_gradient, shared_info, nonshared_info);
+ Assert(cached_gradient.size() == GetParameterManager().GetNumLogicalParameters() + 1, "Unexpected return value size.");
+
+ // replace cache
+ cached_units = units;
+ cached_w = w;
+ cached_toggle_use_nonsmooth = toggle_use_nonsmooth;
+ cached_toggle_use_loss = toggle_use_loss;
+ cached_function.clear();
+ cached_function.push_back(cached_gradient.back());
+ cached_gradient.pop_back();
+ }
+
+ return cached_gradient;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::ComputeHessianVectorProduct()
+//
+// Compute product of the Hessian with an arbitrary vector v.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<RealT> ComputationWrapper<RealT>::ComputeHessianVectorProduct(const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ const std::vector<RealT> &v,
+ bool toggle_use_loss,
+ RealT log_base)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+ if (int(w.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+ if (int(v.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+ if (GetOptions().GetBoolValue("viterbi_parsing")) Error("Hessian-vector products should not be needed when using Viterbi parsing.");
+
+ std::vector<RealT> ret;
+
+ shared_info.command = COMPUTE_HV;
+ for (size_t i = 0; i < w.size(); i++)
+ {
+ shared_info.w[i] = w[i];
+ }
+ for (size_t i = 0; i < v.size(); i++)
+ {
+ shared_info.v[i] = v[i];
+ }
+ shared_info.use_nonsmooth = false;
+ shared_info.use_loss = toggle_use_loss;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ computation_engine.DistributeComputation(ret, shared_info, nonshared_info);
+ Assert(ret.size() == GetParameterManager().GetNumLogicalParameters() + 1, "Unexpected return value size.");
+ ret.pop_back();
+
+ return ret;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::Predict()
+//
+// Run prediction algorithm on each of the work units.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationWrapper<RealT>::Predict(const std::vector<int> &units,
+ const std::vector<RealT> &w,
+ RealT gamma,
+ RealT log_base)
+{
+ Assert(computation_engine.IsMasterNode(), "Routine should only be called by master process.");
+ if (int(w.size()) > SHARED_PARAMETER_SIZE) Error("SHARED_PARAMETER_SIZE in Config.hpp too small; increase to at least %d.", int(w.size()));
+
+ if (GetOptions().GetBoolValue("verbose_output"))
+ {
+ std::cerr << "Performing predictions with gamma=" << double(gamma) << "..." << std::endl;
+ }
+
+ std::vector<RealT> ret;
+
+ shared_info.command = PREDICT;
+ for (size_t i = 0; i < w.size(); i++)
+ {
+ shared_info.w[i] = w[i];
+ }
+ shared_info.gamma = gamma;
+ shared_info.log_base = log_base;
+
+ nonshared_info.resize(units.size());
+ for (size_t i = 0; i < units.size(); i++)
+ {
+ nonshared_info[i].index = units[i];
+ }
+
+ computation_engine.DistributeComputation(ret, shared_info, nonshared_info);
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputationWrapper::SanityCheckGradient()
+//
+// Perform sanity check for the gradient computation.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ComputationWrapper<RealT>::SanityCheckGradient(const std::vector<int> &units,
+ const std::vector<RealT> &x)
+{
+ const int NUM_PER_GROUP = 5;
+ const int ATTEMPTS = 8;
+
+ std::cerr << "Starting gradient sanity check..." << std::endl;
+
+ std::vector<RealT> g = ComputeGradient(units, x, false, true, GetOptions().GetRealValue("log_base"));
+ RealT f = ComputeFunction(units, x, false, true, GetOptions().GetRealValue("log_base"));
+ std::vector<RealT> xp = x;
+
+ const std::vector<ParameterGroup> &groups = GetParameterManager().GetParameterGroups();
+ for (size_t k = 0; k < groups.size(); k++)
+ {
+ int num_left = NUM_PER_GROUP;
+
+ // perform sanity check for a group of parameters
+
+ std::cerr << "Performing sanity check for parameter group: " << groups[k].name
+ << " (indices " << groups[k].begin << " to " << groups[k].end << ", limit " << num_left << ")" << std::endl;
+
+ for (int i = groups[k].begin; num_left && i < groups[k].end; i++)
+ {
+ // perform sanity check for a single parameter
+
+ std::vector<RealT> gp(ATTEMPTS);
+ for (int j = 0; j < ATTEMPTS; j++)
+ {
+ RealT EPSILON = Pow(10.0, RealT(-j));
+ xp[i] += EPSILON;
+ gp[j] = (ComputeFunction(units, xp, false, true, GetOptions().GetRealValue("log_base")) - f) / EPSILON;
+ xp[i] = x[i];
+
+ if (g[i] == gp[j]) break;
+ if (Abs(g[i] - gp[j]) / (Abs(g[i]) + Abs(gp[j])) < 1e-5) break;
+ }
+
+ // print results of sanity check
+
+ if (g[i] != 0 || g[i] != gp[0])
+ {
+ std::cerr << std::setw(13) << i << std::setw(13) << g[i];
+ for (int j = 0; j < ATTEMPTS; j++)
+ std::cerr << std::setw(13) << gp[j];
+ std::cerr << std::endl;
+ num_left--;
+ }
+ }
+ }
+
+ std::cerr << "Gradient sanity check complete." << std::endl;
+}
diff --git a/src/Config.hpp b/src/Config.hpp
new file mode 100644
index 0000000..6cd45da
--- /dev/null
+++ b/src/Config.hpp
@@ -0,0 +1,239 @@
+//////////////////////////////////////////////////////////////////////
+// Config.hpp
+//
+// Global configuration file.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef CONFIG_HPP
+#define CONFIG_HPP
+
+#include <string>
+
+#define COMMENT 0
+
+//////////////////////////////////////////////////////////////////////
+// Miscellaneous options
+//////////////////////////////////////////////////////////////////////
+
+// upper bound on the number of logical parameters in the model; the
+// program will fail to operate properly if this value is set too low
+const int SHARED_PARAMETER_SIZE = 5000;
+
+//////////////////////////////////////////////////////////////////////
+// Options related to general inference
+//////////////////////////////////////////////////////////////////////
+
+// showing timings for inference routines
+#define SHOW_TIMINGS 0
+
+// use straightforward calculation for FM2 matrix
+#define SIMPLE_FM2 0
+
+// use candidate list optimization for Viterbi parsing
+#define CANDIDATE_LIST 1
+
+// use unrolled computation for single branch loops
+#define FAST_SINGLE_BRANCH_LOOPS 1
+
+// use caching algorithm for fast helix length scores
+#define FAST_HELIX_LENGTHS 1
+
+//////////////////////////////////////////////////////////////////////
+// Options related to training mode configuration
+//////////////////////////////////////////////////////////////////////
+
+#define STOCHASTIC_GRADIENT 0
+
+//////////////////////////////////////////////////////////////////////
+// (A) Options related to max-margin training
+//////////////////////////////////////////////////////////////////////
+
+// the maximum loss DELTA(y,y') allocated to each training example; if
+// this symbol is undefined, then the DELTA(y,y') loss function is not
+// included
+// -- for a straight CRF, this value should be undefined
+// -- for a max-margin model, this value should be set to 1
+// #define HAMMING_LOSS 1
+
+// multiplier used in the iterative convex-concave procedure (CCCP) for
+// improving the solution of a max-margin model
+// -- for a regular max-margin model, this value should be set to 0
+// -- for a standard nonconvex model, this value should be set to 1
+const double NONCONVEX_MULTIPLIER = 0.0;
+
+// number of steps of CCCP; there is no need to change this to 1 in the
+// case that NONCONVEX_MULTIPLIER == 0, as the code will detect this
+// and abort after the first CCCP iteration by default
+const int NUM_CCCP_STEPS = 5;
+
+// use smooth approximation of max-margin algorithm for inference
+// during training
+#define SMOOTH_MAX_MARGIN 0
+
+//////////////////////////////////////////////////////////////////////
+// (B) Regularization type
+//////////////////////////////////////////////////////////////////////
+
+#define SINGLE_HYPERPARAMETER 0
+#define MULTIPLE_HYPERPARAMETERS 1
+#define ARD_HYPERPARAMETERS 0
+
+//////////////////////////////////////////////////////////////////////
+// (C) Options related to regularization hyperparameter estimation
+//////////////////////////////////////////////////////////////////////
+
+// Three possible modes:
+// -- holdout cross-validation via grid search
+// -- holdout cross-validation via gradient-based optimization
+// -- majorization-minimization
+
+#define HYPERPARAMETER_GRID_SEARCH 0
+#define HYPERPARAMETER_GRADIENT_OPTIMIZATION 1
+#define HYPERPARAMETER_MAJORIZATION_MINIMIZATION 0
+
+//////////////////////////////////////////////////////////////////////
+// (C1) Grid-search options
+//////////////////////////////////////////////////////////////////////
+
+// use logloss instead of regular holdout loss for holdout cross-validation
+#define CROSS_VALIDATE_USING_LOGLOSS 1
+
+//////////////////////////////////////////////////////////////////////
+// (C2) Gradient-based optimization options
+//////////////////////////////////////////////////////////////////////
+
+// starting regularization parameter
+const double INITIAL_LOG_C = 5.0;
+
+//////////////////////////////////////////////////////////////////////
+// (C3) majorization-minimization-options
+//////////////////////////////////////////////////////////////////////
+
+// number of iterative relinearization steps if using
+// majorization-minmimization algorithm
+const int NUM_ITERATIVE_RELINEARIZATION_STEPS = 5;
+
+// smoothing used for majorization-minimization algorithm
+const double MM_SMOOTHING = 1.0;
+
+//////////////////////////////////////////////////////////////////////
+// (D) Input type
+//////////////////////////////////////////////////////////////////////
+
+#define PROFILE 0
+
+//////////////////////////////////////////////////////////////////////
+// (E) Used parameter groups
+//////////////////////////////////////////////////////////////////////
+
+/*
+#define PARAMS_BASE_PAIR 1
+#define PARAMS_BASE_PAIR_DIST 1
+#define PARAMS_TERMINAL_MISMATCH 1
+#define PARAMS_HAIRPIN_LENGTH 1
+#define PARAMS_HAIRPIN_3_NUCLEOTIDES 1
+#define PARAMS_HAIRPIN_4_NUCLEOTIDES 1
+#define PARAMS_HELIX_LENGTH 1
+#define PARAMS_ISOLATED_BASE_PAIR 1
+#define PARAMS_INTERNAL_EXPLICIT 1
+#define PARAMS_BULGE_LENGTH 1
+#define PARAMS_INTERNAL_LENGTH 1
+#define PARAMS_INTERNAL_SYMMETRY 1
+#define PARAMS_INTERNAL_ASYMMETRY 1
+#define PARAMS_BULGE_0x1_NUCLEOTIDES 1
+#define PARAMS_BULGE_0x2_NUCLEOTIDES 1
+#define PARAMS_BULGE_0x3_NUCLEOTIDES 1
+#define PARAMS_INTERNAL_1x1_NUCLEOTIDES 1
+#define PARAMS_INTERNAL_1x2_NUCLEOTIDES 1
+#define PARAMS_INTERNAL_2x2_NUCLEOTIDES 1
+#define PARAMS_HELIX_STACKING 1
+#define PARAMS_HELIX_CLOSING 1
+#define PARAMS_MULTI_LENGTH 1
+#define PARAMS_DANGLE 1
+#define PARAMS_EXTERNAL_LENGTH 1
+*/
+
+#define PARAMS_BASE_PAIR 1
+#define PARAMS_BASE_PAIR_DIST 0
+#define PARAMS_TERMINAL_MISMATCH 1
+#define PARAMS_HAIRPIN_LENGTH 1
+#define PARAMS_HAIRPIN_3_NUCLEOTIDES 0
+#define PARAMS_HAIRPIN_4_NUCLEOTIDES 0
+#define PARAMS_HELIX_LENGTH 0
+#define PARAMS_ISOLATED_BASE_PAIR 0
+#define PARAMS_INTERNAL_EXPLICIT 1
+#define PARAMS_BULGE_LENGTH 1
+#define PARAMS_INTERNAL_LENGTH 1
+#define PARAMS_INTERNAL_SYMMETRY 1
+#define PARAMS_INTERNAL_ASYMMETRY 1
+#define PARAMS_BULGE_0x1_NUCLEOTIDES 1
+#define PARAMS_BULGE_0x2_NUCLEOTIDES 0
+#define PARAMS_BULGE_0x3_NUCLEOTIDES 0
+#define PARAMS_INTERNAL_1x1_NUCLEOTIDES 1
+#define PARAMS_INTERNAL_1x2_NUCLEOTIDES 0
+#define PARAMS_INTERNAL_2x2_NUCLEOTIDES 0
+#define PARAMS_HELIX_STACKING 1
+#define PARAMS_HELIX_CLOSING 1
+#define PARAMS_MULTI_LENGTH 1
+#define PARAMS_DANGLE 1
+#define PARAMS_EXTERNAL_LENGTH 1
+
+/*
+#define PARAMS_BASE_PAIR 1
+#define PARAMS_BASE_PAIR_DIST 0
+#define PARAMS_TERMINAL_MISMATCH 0
+#define PARAMS_HAIRPIN_LENGTH 0
+#define PARAMS_HAIRPIN_3_NUCLEOTIDES 0
+#define PARAMS_HAIRPIN_4_NUCLEOTIDES 0
+#define PARAMS_HELIX_LENGTH 0
+#define PARAMS_ISOLATED_BASE_PAIR 0
+#define PARAMS_INTERNAL_EXPLICIT 0
+#define PARAMS_BULGE_LENGTH 0
+#define PARAMS_INTERNAL_LENGTH 0
+#define PARAMS_INTERNAL_SYMMETRY 0
+#define PARAMS_INTERNAL_ASYMMETRY 0
+#define PARAMS_BULGE_0x1_NUCLEOTIDES 0
+#define PARAMS_BULGE_0x2_NUCLEOTIDES 0
+#define PARAMS_BULGE_0x3_NUCLEOTIDES 0
+#define PARAMS_INTERNAL_1x1_NUCLEOTIDES 0
+#define PARAMS_INTERNAL_1x2_NUCLEOTIDES 0
+#define PARAMS_INTERNAL_2x2_NUCLEOTIDES 0
+#define PARAMS_HELIX_STACKING 0
+#define PARAMS_HELIX_CLOSING 0
+#define PARAMS_MULTI_LENGTH 0
+#define PARAMS_DANGLE 0
+#define PARAMS_EXTERNAL_LENGTH 0
+*/
+
+//////////////////////////////////////////////////////////////////////
+// (F) Miscellaneous model constants
+//////////////////////////////////////////////////////////////////////
+
+const int C_MIN_HAIRPIN_LENGTH = 0;
+const int C_MAX_SINGLE_LENGTH = 30;
+
+const int D_MAX_HAIRPIN_LENGTH = 30;
+const int D_MAX_BP_DIST_THRESHOLDS = 10;
+const int D_MAX_BULGE_LENGTH = 30;
+const int D_MAX_INTERNAL_LENGTH = 30;
+const int D_MAX_INTERNAL_SYMMETRIC_LENGTH = 15;
+const int D_MAX_INTERNAL_ASYMMETRY = 28;
+const int D_MAX_INTERNAL_EXPLICIT_LENGTH = 4;
+const int D_MAX_HELIX_LENGTH = 30;
+
+const int BP_DIST_LAST_THRESHOLD = 132;
+const int BP_DIST_THRESHOLDS[D_MAX_BP_DIST_THRESHOLDS] = { 3, 9, 12, 16, 21, 26, 34, 47, 71, BP_DIST_LAST_THRESHOLD };
+
+const std::string alphabet = "ACGU"; // allowed symbols -- all other letters ignored
+const int M = 4; // number of alphabet symbols
+
+const int MAX_DIMENSIONS = 4;
+
+/////////////////////////////////////////////////////////////////////
+// (G) BMRM stuff
+//////////////////////////////////////////////////////////////////////
+
+#define BMRM_AVAILABLE 0
+// #define DAIFLETCHER
+
+#endif
diff --git a/src/Contrafold.cpp b/src/Contrafold.cpp
new file mode 100644
index 0000000..376f324
--- /dev/null
+++ b/src/Contrafold.cpp
@@ -0,0 +1,553 @@
+/////////////////////////////////////////////////////////////////
+// Contrafold.cpp
+/////////////////////////////////////////////////////////////////
+
+// include files
+#ifdef MULTI
+#include <mpi.h>
+#endif
+#include "Config.hpp"
+#include "Options.hpp"
+#include "Utilities.hpp"
+#include "ComputationWrapper.hpp"
+#include "FileDescription.hpp"
+#include "InferenceEngine.hpp"
+#include "ParameterManager.hpp"
+#include "OptimizationWrapper.hpp"
+
+// constants
+const double GAMMA_DEFAULT = 6;
+const double REGULARIZATION_DEFAULT = 0;
+
+// function prototypes
+void Usage(const Options &options);
+void Version();
+void ParseArguments(int argc, char **argv, Options &options, std::vector<std::string> &filenames);
+void MakeFileDescriptions(const Options &options, const std::vector<std::string> &filenames, std::vector<FileDescription> &descriptions);
+
+template<class RealT>
+void RunGradientSanityCheck(const Options &options, const std::vector<FileDescription> &descriptions);
+
+template<class RealT>
+void RunTrainingMode(const Options &options, const std::vector<FileDescription> &descriptions);
+
+template<class RealT>
+void RunPredictionMode(const Options &options, const std::vector<FileDescription> &descriptions);
+
+// default parameters
+#include "Defaults.ipp"
+
+/////////////////////////////////////////////////////////////////
+// main()
+//
+// Main program.
+/////////////////////////////////////////////////////////////////
+
+int main(int argc, char **argv)
+{
+#ifdef MULTI
+ MPI_Init(&argc, &argv);
+#endif
+
+ // first, parse arguments
+ Options options;
+ std::vector<std::string> filenames;
+ ParseArguments(argc, argv, options, filenames);
+
+ // second, read input files
+ std::vector<FileDescription> descriptions;
+ MakeFileDescriptions(options, filenames, descriptions);
+
+ // perform required task
+ if (options.GetBoolValue("gradient_sanity_check"))
+ {
+ RunGradientSanityCheck<double>(options, descriptions);
+ }
+ else if (options.GetBoolValue("training_mode"))
+ {
+ RunTrainingMode<double>(options, descriptions);
+ }
+ else
+ {
+ RunPredictionMode<float>(options, descriptions);
+ }
+
+#ifdef MULTI
+ MPI_Finalize();
+#endif
+}
+
+/////////////////////////////////////////////////////////////////
+// Usage()
+//
+// Display program usage.
+/////////////////////////////////////////////////////////////////
+
+void Usage(const Options &options)
+{
+ std::cerr << std::endl
+ << "Usage: contrafold [predict|train] [OPTION]... INFILE(s)" << std::endl
+ << std::endl
+ << " where [OPTION]... is a list of zero or more optional arguments" << std::endl
+ << " INFILE(s) is the name of the input BPSEQ, plain text, or FASTA file(s)" << std::endl
+ << std::endl
+ << "Miscellaneous arguments:" << std::endl
+ << " --version display program version information" << std::endl
+ << " --verbose show detailed console output" << std::endl
+ << " --logbase LOG_BASE set base of log-sum-exp" << std::endl
+ << " --viterbi use Viterbi instead of posterior decoding for prediction, " << std::endl
+ << " or max-margin instead of log-likelihood for training" << std::endl
+ << " --noncomplementary allow non-{AU,CG,GU} pairs" << std::endl
+ << std::endl
+ << "Additional arguments for 'predict' mode:" << std::endl
+ << " --params FILENAME use particular model parameters" << std::endl
+ << " --constraints use existing constraints (requires BPSEQ or FASTA format input)" << std::endl
+ << " --gamma GAMMA set sensivity/specificity tradeoff parameter (default: GAMMA=" << options.GetRealValue("gamma") << ")" << std::endl
+ << " if GAMMA > 1, emphasize sensitivity" << std::endl
+ << " if 0 <= GAMMA <= 1, emphasize specificity" << std::endl
+ << " if GAMMA < 0, try tradeoff parameters of 2^k for k = -5,...,10" << std::endl
+ << std::endl
+ << " --parens OUTFILEORDIR write parenthesized output to file or directory" << std::endl
+ << " --bpseq OUTFILEORDIR write BPSEQ output to file or directory" << std::endl
+ << " --posteriors CUTOFF OUTFILEORDIR" << std::endl
+ << " write posterior pairing probabilities to file or directory" << std::endl
+ << " --partition compute the partition function or Viterbi score only" << std::endl
+ << std::endl
+ << "Additional arguments for training (many input files may be specified):" << std::endl
+ << " --sanity perform gradient sanity check" << std::endl
+ << " --holdout F use fraction F of training data for holdout cross-validation" << std::endl
+ << " --regularize C perform BFGS training, using a single regularization coefficient C" << std::endl
+ << std::endl;
+ exit(0);
+}
+
+/////////////////////////////////////////////////////////////////
+// Version()
+//
+// Display program version.
+/////////////////////////////////////////////////////////////////
+
+void Version()
+{
+#if PROFILE
+ std::cerr << "CONTRAFold(m) version 2.01 - Multiple sequence RNA secondary structure prediction" << std::endl << std::endl
+#else
+ std::cerr << "CONTRAFold version 2.01 - RNA secondary structure prediction" << std::endl << std::endl
+#endif
+ << "Written by Chuong B. Do" << std::endl;
+ exit(0);
+}
+
+/////////////////////////////////////////////////////////////////
+// ParseArguments()
+//
+// Parse command line parameters.
+/////////////////////////////////////////////////////////////////
+
+void ParseArguments(int argc,
+ char **argv,
+ Options &options,
+ std::vector<std::string> &filenames)
+{
+ // register default options
+ options.SetBoolValue("training_mode", false);
+
+ options.SetBoolValue("verbose_output", false);
+ options.SetRealValue("log_base", 1.0);
+ options.SetBoolValue("viterbi_parsing", false);
+ options.SetBoolValue("allow_noncomplementary", false);
+
+ options.SetStringValue("parameter_filename", "");
+ options.SetBoolValue("use_constraints", false);
+ options.SetRealValue("gamma", GAMMA_DEFAULT);
+ options.SetStringValue("output_parens_destination", "");
+ options.SetStringValue("output_bpseq_destination", "");
+ options.SetRealValue("output_posteriors_cutoff", 0);
+ options.SetStringValue("output_posteriors_destination", "");
+ options.SetBoolValue("partition_function_only", false);
+
+ options.SetBoolValue("gradient_sanity_check", false);
+ options.SetRealValue("holdout_ratio", 0);
+ options.SetRealValue("regularization_coefficient", REGULARIZATION_DEFAULT);
+
+ // check for sufficient arguments
+ if (argc < 2) Usage(options);
+ filenames.clear();
+
+ // check for prediction or training mode
+ if (!strcmp(argv[1], "train"))
+ options.SetBoolValue("training_mode", true);
+ else
+ if (strcmp(argv[1], "predict"))
+ Error("CONTRAfold must be run in either 'predict' or 'train' mode.");
+
+ // go through remaining arguments
+ for (int argno = 2; argno < argc; argno++)
+ {
+ // parse optional arguments
+ if (argv[argno][0] == '-')
+ {
+ // miscellaneous options
+ if (!strcmp(argv[argno], "--version"))
+ {
+ Version();
+ }
+ else if (!strcmp(argv[argno], "--verbose"))
+ {
+ options.SetBoolValue("verbose_output", true);
+ }
+ else if (!strcmp(argv[argno], "--logbase"))
+ {
+ if (argno == argc - 1) Error("Must specify log base LOG_BASE after --logbase.");
+ double value;
+ if (!ConvertToNumber(argv[++argno], value))
+ Error("Unable to parse log base.");
+ if (value <= 0)
+ Error("Log base must be positive.");
+ options.SetRealValue("log_base", value);
+ }
+ else if (!strcmp(argv[argno], "--viterbi"))
+ {
+ options.SetBoolValue("viterbi_parsing", true);
+ }
+ else if (!strcmp(argv[argno], "--noncomplementary"))
+ {
+ options.SetBoolValue("allow_noncomplementary", true);
+ }
+
+ // prediction options
+ else if (!strcmp(argv[argno], "--params"))
+ {
+ if (argno == argc - 1) Error("Must specify FILENAME after --params.");
+ options.SetStringValue("parameter_filename", argv[++argno]);
+ }
+ else if (!strcmp(argv[argno], "--constraints"))
+ {
+ options.SetBoolValue("use_constraints", true);
+ }
+ else if (!strcmp(argv[argno], "--gamma"))
+ {
+ if (argno == argc - 1) Error("Must specify trade-off parameter GAMMA after --gamma.");
+ double value;
+ if (!ConvertToNumber(argv[++argno], value))
+ Error("Unable to parse value after --gamma.");
+ options.SetRealValue("gamma", value);
+ }
+ else if (!strcmp(argv[argno], "--parens"))
+ {
+ if (argno == argc - 1) Error("Must specify output file or directory name after --parens.");
+ options.SetStringValue("output_parens_destination", argv[++argno]);
+ }
+ else if (!strcmp(argv[argno], "--bpseq"))
+ {
+ if (argno == argc - 1) Error("Must specify output file or directory name after --bpseq.");
+ options.SetStringValue("output_bpseq_destination", argv[++argno]);
+ }
+ else if (!strcmp(argv[argno], "--posteriors"))
+ {
+ if (argno == argc - 1) Error("Must specify posterior probability threshold CUTOFF after --posteriors.");
+ double value;
+ if (!ConvertToNumber(argv[++argno], value))
+ Error("Unable to parse cutoff value after --posteriors.");
+ options.SetRealValue("output_posteriors_cutoff", value);
+ if (argno == argc - 1) Error("Must specify output file or directory for --posteriors.");
+ options.SetStringValue("output_posteriors_destination", argv[++argno]);
+ }
+ else if (!strcmp(argv[argno], "--partition"))
+ {
+ options.SetBoolValue("partition_function_only", true);
+ }
+
+ // training options
+ else if (!strcmp(argv[argno], "--sanity"))
+ {
+ options.SetBoolValue("gradient_sanity_check", true);
+ }
+ else if (!strcmp(argv[argno], "--holdout"))
+ {
+ if (argno == argc - 1) Error("Must specify holdout ratio F after --holdout.");
+ double value;
+ if (!ConvertToNumber(argv[++argno], value))
+ Error("Unable to parse holdout ratio.");
+ if (value < 0 || value > 1)
+ Error("Holdout ratio must be between 0 and 1.");
+ options.SetRealValue("holdout_ratio", value);
+ }
+ else if (!strcmp(argv[argno], "--regularize"))
+ {
+ if (argno == argc - 1) Error("Must specify regularization parameter C after --regularize.");
+ double value;
+ if (!ConvertToNumber(argv[++argno], value))
+ Error("Unable to parse regularization parameter after --regularize.");
+ if (value < 0)
+ Error("Regularization parameter should not be negative.");
+ options.SetRealValue("regularization_coefficient", value);
+ }
+ else
+ {
+ Error("Unknown option \"%s\" specified. Run program without any arguments to see command-line options.", argv[argno]);
+ }
+
+ }
+ else
+ {
+ filenames.push_back(argv[argno]);
+ }
+ }
+
+ // ensure that at least one input file specified
+ if (filenames.size() == 0)
+ Error("No filenames specified.");
+
+ // check to make sure that arguments make sense
+ if (options.GetBoolValue("training_mode"))
+ {
+ if (options.GetStringValue("parameter_filename") != "")
+ Error("Should not specify parameter file for training mode.");
+ if (options.GetBoolValue("use_constraints"))
+ Error("The --constraints flag has no effect in training mode.");
+ if (options.GetRealValue("gamma") != GAMMA_DEFAULT)
+ Error("Gamma parameter should not be specified in training mode.");
+ if (options.GetStringValue("output_parens_destination") != "")
+ Error("The --parens option cannot be used in training mode.");
+ if (options.GetStringValue("output_bpseq_destination") != "")
+ Error("The --bpseq option cannot be used in training mode.");
+ if (options.GetStringValue("output_posteriors_destination") != "" ||
+ options.GetRealValue("output_posteriors_cutoff") != 0)
+ Error("The --posteriors option cannot be used in training mode.");
+ if (options.GetBoolValue("partition_function_only"))
+ Error("The --partition flag cannot be used in training mode.");
+ if (options.GetRealValue("regularization_coefficient") != REGULARIZATION_DEFAULT &&
+ options.GetRealValue("holdout_ratio") > 0)
+ Error("The --holdout and --regularize options cannot be specified simultaneously.");
+ }
+ else
+ {
+ if (options.GetRealValue("gamma") < 0 &&
+ options.GetStringValue("output_parens_destination") == "" &&
+ options.GetStringValue("output_bpseq_destination") == "" &&
+ options.GetStringValue("output_posteriors_destination") == "")
+ Error("Output directory must be specified when using GAMMA < 0.");
+
+#ifdef MULTI
+ if (filenames.size() > 1 &&
+ options.GetStringValue("output_parens_destination") == "" &&
+ options.GetStringValue("output_bpseq_destination") == "" &&
+ options.GetStringValue("output_posteriors_destination") == "")
+ Error("Output directory must be specified when performing predictions for multiple input files.");
+#endif
+ if (options.GetBoolValue("viterbi_parsing") &&
+ options.GetStringValue("output_posteriors_destination") != "")
+ Error("The --posteriors option cannot be used with Viterbi parsing.");
+ }
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeFileDescriptions()
+//
+// Build file descriptions
+/////////////////////////////////////////////////////////////////
+
+void MakeFileDescriptions(const Options &options,
+ const std::vector<std::string> &filenames,
+ std::vector<FileDescription> &descriptions)
+{
+ descriptions.clear();
+ for (size_t i = 0; i < filenames.size(); i++)
+ {
+ descriptions.push_back(FileDescription(filenames[i],
+ options.GetBoolValue("allow_noncomplementary")));
+ }
+ std::sort(descriptions.begin(), descriptions.end());
+}
+
+/////////////////////////////////////////////////////////////////
+// RunGradientSanityCheck()
+//
+// Compute gradient sanity check.
+/////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void RunGradientSanityCheck(const Options &options,
+ const std::vector<FileDescription> &descriptions)
+{
+ // The architecture of the code is somewhat complicated here, so
+ // here's a quick explanation:
+ //
+ // ParameterManager: associates each parameter of the model
+ // with a name and manages hyperparameter
+ // groups
+ //
+ // InferenceEngine: performs application-specific
+ // (loss-augmented) inference
+ //
+ // ComputationEngine: makes all necessary calls to dynamic
+ // programming routines for processing
+ // individual sequences and interfaces with
+ // distributed computation module
+ //
+ // ComputationWrapper: provides a high-level interface for
+ // performing computations on groups of
+ // sequences
+ //
+ // OuterOptimizationWrapper / InnerOptimizationWrapper:
+ // interface between computation routines
+ // and optimization routines
+
+ ParameterManager<RealT> parameter_manager;
+ InferenceEngine<RealT> inference_engine(options.GetBoolValue("allow_noncomplementary"));
+ inference_engine.RegisterParameters(parameter_manager);
+ ComputationEngine<RealT> computation_engine(options, descriptions, inference_engine, parameter_manager);
+ ComputationWrapper<RealT> computation_wrapper(computation_engine);
+
+ // decide whether I'm a compute node or master node
+ if (computation_engine.IsComputeNode())
+ {
+ computation_engine.RunAsComputeNode();
+ return;
+ }
+
+ std::vector<RealT> w(parameter_manager.GetNumLogicalParameters(), RealT(0));
+ computation_wrapper.SanityCheckGradient(computation_wrapper.GetAllUnits(), w);
+ computation_engine.StopComputeNodes();
+}
+
+/////////////////////////////////////////////////////////////////
+// RunTrainingMode()
+//
+// Run CONTRAfold in training mode.
+/////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void RunTrainingMode(const Options &options,
+ const std::vector<FileDescription> &descriptions)
+{
+ ParameterManager<RealT> parameter_manager;
+ InferenceEngine<RealT> inference_engine(options.GetBoolValue("allow_noncomplementary"));
+ inference_engine.RegisterParameters(parameter_manager);
+ ComputationEngine<RealT> computation_engine(options, descriptions, inference_engine, parameter_manager);
+ ComputationWrapper<RealT> computation_wrapper(computation_engine);
+
+ // decide whether I'm a compute node or master node
+ if (computation_engine.IsComputeNode())
+ {
+ computation_engine.RunAsComputeNode();
+ return;
+ }
+
+ std::vector<RealT> w(parameter_manager.GetNumLogicalParameters(), RealT(0));
+ std::vector<int> units = computation_wrapper.FilterNonparsable(computation_wrapper.GetAllUnits());
+ OptimizationWrapper<RealT> optimization_wrapper(computation_wrapper);
+
+ // decide between using a fixed regularization parameter or
+ // using cross-validation to determine regularization parameters
+ if (options.GetRealValue("holdout_ratio") <= 0)
+ {
+ std::vector<RealT> regularization_coefficients(parameter_manager.GetNumParameterGroups(), options.GetRealValue("regularization_coefficient"));
+ optimization_wrapper.Train(units, w, regularization_coefficients);
+ }
+ else
+ {
+ optimization_wrapper.LearnHyperparameters(units, w);
+ }
+
+ parameter_manager.WriteToFile("optimize.params.final", w);
+ computation_engine.StopComputeNodes();
+}
+
+/////////////////////////////////////////////////////////////////
+// RunPredictionMode()
+//
+// Run CONTRAfold in prediction mode.
+/////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void RunPredictionMode(const Options &options,
+ const std::vector<FileDescription> &descriptions)
+{
+ ParameterManager<RealT> parameter_manager;
+ InferenceEngine<RealT> inference_engine(options.GetBoolValue("allow_noncomplementary"));
+ inference_engine.RegisterParameters(parameter_manager);
+ ComputationEngine<RealT> computation_engine(options, descriptions, inference_engine, parameter_manager);
+ ComputationWrapper<RealT> computation_wrapper(computation_engine);
+
+ // decide whether I'm a compute node or master node
+ if (computation_engine.IsComputeNode())
+ {
+ computation_engine.RunAsComputeNode();
+ return;
+ }
+
+ const std::string output_parens_destination = options.GetStringValue("output_parens_destination");
+ const std::string output_bpseq_destination = options.GetStringValue("output_bpseq_destination");
+ const std::string output_posteriors_destination = options.GetStringValue("output_posteriors_destination");
+
+ // load parameters
+ std::vector<RealT> w;
+
+ if (options.GetStringValue("parameter_filename") != "")
+ {
+ parameter_manager.ReadFromFile(options.GetStringValue("parameter_filename"), w);
+ }
+ else
+ {
+#if PROFILE
+ w = GetDefaultProfileValues<RealT>();
+#else
+ if (options.GetBoolValue("allow_noncomplementary"))
+ w = GetDefaultNoncomplementaryValues<RealT>();
+ else
+ w = GetDefaultComplementaryValues<RealT>();
+#endif
+ }
+
+ if (options.GetRealValue("gamma") < 0)
+ {
+ // create directories for storing each run
+ if (output_parens_destination != "") MakeDirectory(output_parens_destination);
+ if (output_bpseq_destination != "") MakeDirectory(output_bpseq_destination);
+ if (output_posteriors_destination != "") MakeDirectory(output_posteriors_destination);
+
+ // try different values of gamma
+ for (int k = -5; k <= 10; k++)
+ {
+ // create output subdirectories, if needed
+ const double gamma = Pow(2.0, double(k));
+
+ if (descriptions.size() > 1)
+ {
+ if (output_parens_destination != "")
+ MakeDirectory(SPrintF("%s%c%s.gamma=%lf",
+ output_parens_destination.c_str(),
+ DIR_SEPARATOR_CHAR,
+ GetBaseName(output_parens_destination).c_str(), gamma));
+ if (output_bpseq_destination != "")
+ MakeDirectory(SPrintF("%s%c%s.gamma=%lf",
+ output_bpseq_destination.c_str(),
+ DIR_SEPARATOR_CHAR,
+ GetBaseName(output_bpseq_destination).c_str(), gamma));
+ if (output_posteriors_destination != "")
+ MakeDirectory(SPrintF("%s%c%s.gamma=%lf",
+ output_posteriors_destination.c_str(),
+ DIR_SEPARATOR_CHAR,
+ GetBaseName(output_posteriors_destination).c_str(), gamma));
+ }
+
+ // perform predictions
+ computation_wrapper.Predict(computation_wrapper.GetAllUnits(), w, gamma, options.GetRealValue("log_base"));
+ }
+ }
+ else
+ {
+ // create output directories for output files, if needed
+ if (descriptions.size() > 1)
+ {
+ if (output_parens_destination != "") MakeDirectory(output_parens_destination);
+ if (output_bpseq_destination != "") MakeDirectory(output_bpseq_destination);
+ if (output_posteriors_destination != "") MakeDirectory(output_posteriors_destination);
+ }
+
+ computation_wrapper.Predict(computation_wrapper.GetAllUnits(), w, options.GetRealValue("gamma"), options.GetRealValue("log_base"));
+ }
+ computation_engine.StopComputeNodes();
+}
diff --git a/src/DistributedComputation.hpp b/src/DistributedComputation.hpp
new file mode 100644
index 0000000..309681d
--- /dev/null
+++ b/src/DistributedComputation.hpp
@@ -0,0 +1,182 @@
+//////////////////////////////////////////////////////////////////////
+// DistributedComputation.hpp
+//
+// This is a class for performing distributed computation via MPI. In
+// general, suppose you have an indexed family of functions f_i(x) for
+// i belonging to some set S. This class will allow you to run f_i(x)
+// for each i in S efficiently by distributing the work among a
+// collection of processors. The result returned will be
+//
+// sum_i f_i(x)
+//
+// One particular assumption made by the DistributedComputation class
+// is that the results returned will *always* be a vector of doubles.
+// This assumption is made to ensure that each of the "reductions" is
+// efficient.
+//
+// Examples:
+//
+// (1) Computation of the gradient for a batch mode machine learning
+// algorithm. In this case, the set S can represent the set of
+// training examples, each i corresponds to a single training
+// example, and x is a parameter set, and f_i(x) computes the
+// gradient of some error function for the ith training example at
+// the parameters x.
+//
+// (2) Processing a collection of files in some way. Here, each i in
+// S is some file descriptor, and x contains any shared data that
+// must be provided to do the processing. Note that it may be the
+// case that the return value of each f_i is irrelevant, and that
+// the main purpose of the distributed computation is simply to
+// "run" each f_i(x) to ensure that all files are processed.
+//
+// In order to use this class, you must
+//
+// (1) Create a class for storing shared data which is to be
+// "broadcast" to all processors. This class should not contain
+// any pointers, as the data in this class will be distributed to
+// other processors by a direct memcpy(). In the following, we
+// will assume that the name of this class is SharedData.
+//
+// (2) Create a class for storing nonshared data which is to be passed
+// to each processor individually. This class should not contain
+// any pointers, as the data in this class will be distributed to
+// other processors by a direct memcpy(). In the following, we
+// will assume that the name of this class is NonSharedData.
+//
+// (3) Create a subclass of DistributedComputation which implements
+// the method DoComputation(). Essentially, the DoComputation()
+// method implements the functionality for a single f_i(x).
+// Observe that the DoComputation() method takes essentially three
+// arguments,
+//
+// std::vector<double> &result
+// const SharedData &shared_data
+// const NonSharedData &nonshared_data
+//
+// The latter two arguments correspond to the "x" and "i"
+// arguments of each functon f_i(x). The first argument is where
+// the results of the computation (if any) should be stored.
+//
+// (5) Call the RunAsComputeNode() from all compute nodes and
+// the DistributeComputation() method from the master node to
+// perform a distributed computation. The arguments to the
+// DistributeComputation() method are:
+//
+// std::vector<double> &result
+// const Shared Data &shared_data
+// const std::vector<const NonSharedData> &nonshared_data>
+//
+// The DistributedComputation class will take care of the details
+// to ensure that the data is shuffled to the compute nodes in an
+// efficient manner. Note that the work units associated with
+// each of the nonshared data entries is allocated in the order
+// supplied in the nonshared_data[] vector. In general, for
+// efficiency, it makes sense to sort the entries of
+// nonshared_data[] in order of decreasing expected time to
+// completion.
+//
+// (5) Call the StopComputeNodes() routine from the master node to
+// ensure that the compute nodes stop running.
+//
+// That's it!
+//////////////////////////////////////////////////////////////////////
+
+#ifndef DISTRIBUTEDCOMPUTATION_HPP
+#define DISTRIBUTEDCOMPUTATION_HPP
+
+#ifdef MULTI
+#include <mpi.h>
+#endif
+
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class DistributedComputationBase
+//
+// Class for distributing computation (see description above).
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+class DistributedComputationBase
+{
+ bool toggle_verbose;
+ double processing_time;
+ double total_time;
+ int id;
+ int num_procs;
+
+ // internal use only
+#ifdef MULTI
+ virtual ompi_datatype_t *GetResultMPIDataType() = 0;
+#endif
+
+protected:
+
+ // perform individual computations
+ virtual void DoComputation(std::vector<RealT> &result,
+ const SharedData &shared_data,
+ const NonSharedData &nonshared_data) = 0;
+
+public:
+
+ // constructor and destructor
+ DistributedComputationBase(bool toggle_verbose);
+ virtual ~DistributedComputationBase() {}
+
+ // start and stop compute nodes
+ void RunAsComputeNode();
+ void StopComputeNodes();
+
+ // perform distributed computation (to be called by master node)
+ void DistributeComputation(std::vector<RealT> &result,
+ const SharedData &shared_data,
+ const std::vector<NonSharedData> &nonshared_data);
+
+ // some simple routines for dealing with node IDs
+ bool IsComputeNode() const { return id != 0; }
+ bool IsMasterNode() const { return id == 0; }
+ int GetNumNodes() const { return num_procs; }
+ int GetNodeID() const { return id; }
+
+ // query statistics regarding the efficiency of distributed computation
+ double GetEfficiency() const;
+ void ResetEfficiency();
+};
+
+//////////////////////////////////////////////////////////////////////
+// class DistributedComputation
+//
+// Partial specialization of class to particular result data types.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+class DistributedComputation : public DistributedComputationBase<RealT, SharedData, NonSharedData>
+{
+public:
+ DistributedComputation(bool toggle_verbose) : DistributedComputationBase<RealT, SharedData, NonSharedData>(toggle_verbose) {}
+};
+
+#ifdef MULTI
+
+template<class SharedData, class NonSharedData>
+class DistributedComputation<float, SharedData, NonSharedData> : public DistributedComputationBase<float, SharedData, NonSharedData>
+{
+ ompi_datatype_t *GetResultMPIDataType() { return MPI_FLOAT; }
+public:
+ DistributedComputation(bool toggle_verbose) : DistributedComputationBase<float, SharedData, NonSharedData>(toggle_verbose) {}
+};
+
+template<class SharedData, class NonSharedData>
+class DistributedComputation<double, SharedData, NonSharedData> : public DistributedComputationBase<double, SharedData, NonSharedData>
+{
+ ompi_datatype_t *GetResultMPIDataType() { return MPI_DOUBLE; }
+public:
+ DistributedComputation(bool toggle_verbose) : DistributedComputationBase<double, SharedData, NonSharedData>(toggle_verbose) {}
+};
+
+#endif
+
+#include "DistributedComputation.ipp"
+
+#endif
diff --git a/src/DistributedComputation.ipp b/src/DistributedComputation.ipp
new file mode 100644
index 0000000..a590fd5
--- /dev/null
+++ b/src/DistributedComputation.ipp
@@ -0,0 +1,406 @@
+//////////////////////////////////////////////////////////////////////
+// DistributedComputation.cpp
+//
+// Class for performing distributed optimization.
+//////////////////////////////////////////////////////////////////////
+
+#include "DistributedComputation.hpp"
+
+enum CommandType
+{
+ CommandType_LoadSharedData,
+ CommandType_DoWork,
+ CommandType_SendResultSize,
+ CommandType_SendResult,
+ CommandType_ClearResult,
+ CommandType_Quit
+};
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::DistributedComputationBase()
+//
+// Constructor. Performs MPI initializations if MULTI is
+// defined.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+DistributedComputationBase<RealT, SharedData, NonSharedData>::DistributedComputationBase(bool toggle_verbose) :
+ toggle_verbose(toggle_verbose),
+ processing_time(0),
+ total_time(0),
+ id(0),
+ num_procs(1)
+{
+
+#ifdef MULTI
+ MPI_Comm_rank(MPI_COMM_WORLD, &id);
+ MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
+ if (id == 0 && toggle_verbose)
+ {
+ WriteProgressMessage("");
+ std::cerr << "Distributed Optimization Library started. Using "
+ << num_procs << " processor(s)." << std::endl;
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::RunAsComputeNode()
+//
+// Turn into a compute node and process work requests from the master
+// node until the command to quit is sent. Should only be called
+// ifdef MULTI is defined.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+void DistributedComputationBase<RealT, SharedData, NonSharedData>::RunAsComputeNode()
+{
+ Assert(id != 0, "Routine should not be called by master process.");
+
+#ifdef MULTI
+ MPI_Status status;
+ SharedData shared_data;
+ NonSharedData nonshared_data;
+ std::vector<RealT> result;
+ std::vector<RealT> partial_result;
+
+ while (true)
+ {
+ // block until command received
+ int command;
+ MPI_Recv(&command, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
+
+ switch (command)
+ {
+ case CommandType_LoadSharedData:
+ {
+ // get shared data
+ MPI_Bcast(&shared_data, sizeof(SharedData), MPI_BYTE, 0, MPI_COMM_WORLD);
+ }
+ break;
+
+ case CommandType_DoWork:
+ {
+ // get nonshared data
+ MPI_Recv(&nonshared_data, sizeof(NonSharedData), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
+
+ // perform and time computation
+ processing_time = GetSystemTime();
+ DoComputation(partial_result, shared_data, nonshared_data);
+ processing_time = GetSystemTime() - processing_time;
+
+ // resize results vector as needed
+ if (result.size() == 0)
+ result.resize(partial_result.size());
+ else if (result.size() != partial_result.size())
+ Error("Encountered return values of different size.");
+
+ // accumulate results
+ result += partial_result;
+
+ // return processing time to main node
+ MPI_Send(&processing_time, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
+ }
+ break;
+
+ case CommandType_SendResultSize:
+ {
+ // send result size to main node
+ int size = result.size();
+ MPI_Send(&size, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
+ }
+ break;
+
+ case CommandType_SendResult:
+ {
+ // make sure all results are of the same length first
+ int size;
+ MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+ if (size == 0)
+ Error("Should not call this unless results expected.");
+ else if (result.size() == 0)
+ result.resize(size);
+ else if (size != int(result.size()))
+ Error("Return values of different size (%d vs %d).", size, int(result.size()));
+
+ // then send result to main node
+ MPI_Reduce(&result[0], NULL, result.size(), GetResultMPIDataType(), MPI_SUM, 0, MPI_COMM_WORLD);
+ }
+ break;
+
+ case CommandType_ClearResult:
+ result.clear();
+ break;
+
+ case CommandType_Quit:
+ return;
+ }
+ }
+#endif
+
+}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::StopComputeNodes()
+//
+// Closes down MPI connections for compute nodes.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+void DistributedComputationBase<RealT, SharedData, NonSharedData>::StopComputeNodes()
+{
+#ifdef MULTI
+ if (id == 0)
+ {
+ for (int i = 1; i < num_procs; i++)
+ {
+ int command = CommandType_Quit;
+ MPI_Send(&command, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
+ }
+ }
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::DistributeComputation()
+//
+// Distribute computation tasks among all nodes (other than 0) if
+// MULTI is defined; work units are allocated starting from largest
+// unit size to smallest unit size.
+//////////////////////////////////////////////////////////////////////
+
+const int NOT_ALLOCATED = -1;
+const int DO_NOT_ALLOCATE = -2;
+
+template<class RealT, class SharedData, class NonSharedData>
+void DistributedComputationBase<RealT, SharedData, NonSharedData>::DistributeComputation(std::vector<RealT> &result,
+ const SharedData &shared_data,
+ const std::vector<NonSharedData> &nonshared_data)
+{
+ Assert(id == 0, "Routine should only be called by master process.");
+ Assert(nonshared_data.size() > 0, "Must submit at least one work description for processing.");
+
+ double starting_time = GetSystemTime();
+ size_t units_complete = 0;
+
+ result.clear();
+
+#ifdef MULTI
+ size_t num_procs_in_use = 1;
+ size_t curr_unit = 0;
+ int command;
+ int size;
+
+ MPI_Status status;
+ std::string progress;
+
+ // initialize work assignments
+ std::vector<int> assignment(num_procs, NOT_ALLOCATED);
+ assignment[0] = DO_NOT_ALLOCATE;
+
+ // clear accumulated result on all processors
+ if (toggle_verbose) WriteProgressMessage("Clearing accumulated result on all processors.");
+ command = CommandType_ClearResult;
+ for (int proc = 1; proc < num_procs; proc++)
+ MPI_Send(&command, 1, MPI_INT, proc, 0, MPI_COMM_WORLD);
+
+ // broadcast shared data to all processors
+ if (toggle_verbose) WriteProgressMessage("Broadcasting shared data to all processors.");
+ command = CommandType_LoadSharedData;
+ for (int proc = 1; proc < num_procs; proc++)
+ MPI_Send(&command, 1, MPI_INT, proc, 0, MPI_COMM_WORLD);
+ MPI_Bcast(const_cast<SharedData *>(&shared_data), sizeof(SharedData), MPI_BYTE, 0, MPI_COMM_WORLD);
+
+ // while there is work to be done
+ if (toggle_verbose) WriteProgressMessage("Sending work units to all processors.");
+ while (num_procs_in_use > 1 || curr_unit < nonshared_data.size())
+ {
+ // allocate the max number of processors possible
+ while (int(num_procs_in_use) < num_procs && curr_unit < nonshared_data.size())
+ {
+ // find free processor
+ size_t proc = 0;
+ while (proc < assignment.size() && assignment[proc] != NOT_ALLOCATED) proc++;
+ Assert(proc < assignment.size(), "Expected to find free processor.");
+
+ // send command
+ command = CommandType_DoWork;
+ MPI_Send(&command, 1, MPI_INT, proc, 0, MPI_COMM_WORLD);
+
+ // send nonshared data
+ MPI_Send(const_cast<NonSharedData *>(&nonshared_data[curr_unit]), sizeof(NonSharedData), MPI_BYTE, proc, 0, MPI_COMM_WORLD);
+
+ // update processor allocation table
+ num_procs_in_use++;
+ assignment[proc] = curr_unit;
+ curr_unit++;
+ }
+
+ // write progress message (at most 1 update per second)
+ double current_time = GetSystemTime();
+ static double prev_reporting_time = 0;
+ if (current_time - prev_reporting_time > 1)
+ {
+ prev_reporting_time = current_time;
+ size_t percent_complete = 100 * units_complete / nonshared_data.size();
+ if (toggle_verbose) WriteProgressMessage(SPrintF("%u/%u work units allocated, %d%% complete.", curr_unit, nonshared_data.size(), percent_complete));
+ }
+
+ // if no processors left, or all work allocated, then wait for results
+ MPI_Recv(¤t_time, 1, MPI_DOUBLE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
+ Assert(current_time >= 0, "Expected positive time value for acknowledgment of job completion.");
+ processing_time += current_time;
+
+ // update processor allocation table
+ num_procs_in_use--;
+ assignment[status.MPI_SOURCE] = -1;
+ units_complete++;
+ }
+
+ // get accumulated result size
+ if (toggle_verbose) WriteProgressMessage("Computing result size.");
+ command = CommandType_SendResultSize;
+ MPI_Send(&command, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
+ MPI_Recv(&size, 1, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
+ Assert(size >= 0, "Size should be nonnegative.");
+ result.resize(size);
+
+ // check if result is expected
+ if (size > 0)
+ {
+ // tell all processors to send results
+ if (toggle_verbose) WriteProgressMessage("Requesting results from processors.");
+ command = CommandType_SendResult;
+ for (int proc = 1; proc < num_procs; proc++)
+ MPI_Send(&command, 1, MPI_INT, proc, 0, MPI_COMM_WORLD);
+
+ // check that results are all of the appropriate size
+ MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+ // retrieving accumulated results
+ if (toggle_verbose) WriteProgressMessage("Receiving accumulated results from processors.");
+ MPI_Reduce(MPI_IN_PLACE, &result[0], size, GetResultMPIDataType(), MPI_SUM, 0, MPI_COMM_WORLD);
+ }
+
+
+#else
+
+ // retrieve one result at a time, and accumulate
+ std::vector<RealT> partial_result;
+ if (toggle_verbose) WriteProgressMessage("Starting first work unit.");
+ for (size_t j = 0; j < nonshared_data.size(); j++)
+ {
+ DoComputation(partial_result, shared_data, nonshared_data[j]);
+
+ // resize results vector as needed
+ if (result.size() == 0)
+ result.resize(partial_result.size());
+ else if (result.size() != partial_result.size())
+ Error("Encountered return values of different size.");
+
+ // accumulate results
+ result += partial_result;
+ units_complete++;
+
+ // write progress message (at most 1 update per second)
+ double current_time = GetSystemTime();
+ static double prev_reporting_time = 0;
+ if (current_time - prev_reporting_time > 1)
+ {
+ prev_reporting_time = current_time;
+ size_t percent_complete = 100 * units_complete / nonshared_data.size();
+ if (toggle_verbose) WriteProgressMessage(SPrintF("%u/%u work units allocated, %d%% complete.", units_complete, nonshared_data.size(), percent_complete));
+ }
+ }
+
+#endif
+
+ if (toggle_verbose) WriteProgressMessage("");
+ total_time += (GetSystemTime() - starting_time);
+}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::GetEfficiency()
+//
+// Compute the processor usage efficiency.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+double DistributedComputationBase<RealT, SharedData, NonSharedData>::GetEfficiency() const
+{
+ Assert(IsMasterNode(), "Should only be called by master node.");
+#ifdef MULTI
+ return 100.0 * (processing_time / (num_procs - 1)) / (1e-10 + total_time);
+#else
+ return 100.0;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputationBase::ResetEfficiency()
+//
+// Reset efficiency statistics.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+void DistributedComputationBase<RealT, SharedData, NonSharedData>::ResetEfficiency()
+{
+ Assert(IsMasterNode(), "Should only be called by master node.");
+ processing_time = total_time = 0;
+}
+
+/*
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputation::DistributedComputation()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+DistributedComputation<RealT, SharedData, NonSharedData>::DistributedComputation(bool toggle_verbose) :
+ DistributedComputationBase(toggle_verbose)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputation::~DistributedComputation()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+DistributedComputation<RealT, SharedData, NonSharedData>::~DistributedComputation()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// DistributedComputation::GetResultMPIDataType()
+//
+// Routine for indicating data type to be transmitted.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT, class SharedData, class NonSharedData>
+int DistributedComputation<RealT, SharedData, NonSharedData>::GetResultMPIDataType() const
+{
+ return 0;
+}
+
+#ifdef MULTI
+
+template<class SharedData, class NonSharedData>
+class DistributedComputation<float, SharedData, NonSharedData> : public DistributedComputationBase<float, SharedData, NonSharedData>
+{
+ int GetResultMPIDataType() const { return MPI_FLOAT; }
+};
+
+template<class SharedData, class NonSharedData>
+class DistributedComputation<double> : public DistributedComputationBase<double, SharedData, NonSharedData>
+{
+ int GetResultMPIDataType() const { return MPI_DOUBLE; }
+};
+
+#endif
+*/
diff --git a/src/EncapsulatedPostScript.cpp b/src/EncapsulatedPostScript.cpp
new file mode 100644
index 0000000..555aec0
--- /dev/null
+++ b/src/EncapsulatedPostScript.cpp
@@ -0,0 +1,145 @@
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript.cpp
+//
+// The routines shown here for creating encapsulated postscript
+// figures were adapted from a modification of PlotRNA by
+// Marc Parisien.
+//////////////////////////////////////////////////////////////////////
+
+#include "EncapsulatedPostScript.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::EncapsulatedPostScript()
+//
+// Constructor. Write PostScript prolog.
+//////////////////////////////////////////////////////////////////////
+
+EncapsulatedPostScript::EncapsulatedPostScript(std::ostream &out, double image_width, double image_height, int font_size) :
+ out(out), image_width(image_width), image_height(image_height), font_size(font_size), done(false)
+{
+ out << "%!PS-Adobe-3.0 EPSF-3.0" << std::endl
+ << "%%BoundingBox: 0 0 " << int(image_width) << " " << int(image_height) << std::endl
+ << "1.0 1.0 scale" << std::endl
+ << "0 0 translate" << std::endl
+ << "/HelveticaBold findfont" << std::endl
+ << font_size << " scalefont" << std::endl
+ << "setfont" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::SetRGBColor()
+//
+// Set current color explicitly.
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::SetRGBColor(double r, double g, double b)
+{
+ Assert(0.0 <= r && r <= 1.0, "Out-of-range.");
+ Assert(0.0 <= g && g <= 1.0, "Out-of-range.");
+ Assert(0.0 <= b && b <= 1.0, "Out-of-range.");
+
+ out << std::setprecision(3) << std::setiosflags(std::ios::showpoint | std::ios::fixed)
+ << r << " " << g << " " << b << " setrgbcolor" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::SetColorBlack()
+//
+// Set current color back to black.
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::SetColorBlack()
+{
+ out << "0 0 0 setrgbcolor" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::DrawString()
+//
+// Write a text string. Adapted from:
+//
+// http://www.nipr.ac.jp/~uap-mon/uapm/src.bak/pltSyowaMag_save.c
+//
+// Text alignment:
+// pos_x : x-align 0:left 1:center 2:right
+// pos_y : y-align 0:bottom 1:center 2:top
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::DrawString(double x, double y, const std::string &s, int pos_x, int pos_y)
+{
+ if (done) Error("EPS file already closed.");
+
+ int kx = 0, ky = 0;
+
+ out << std::setprecision(3) << std::setiosflags(std::ios::showpoint | std::ios::fixed)
+ << x << " " << image_height - y << " moveto" << std::endl
+ << "(" << s << ")" << std::endl;
+
+ if (pos_x == 1) kx = 2;
+ if (pos_x == 2) kx = 1;
+ if (pos_y == 1) ky = 2;
+ if (pos_y == 2) ky = 1;
+
+ if (pos_x == 1 || pos_x == 2)
+ {
+ out << "dup stringwidth pop " << kx << " div neg 0 rmoveto" << std::endl;
+ }
+
+ if( pos_y == 1 || pos_y == 2 )
+ {
+ out << "gsave" << std::endl
+ << "newpath" << std::endl
+ << "0 0 moveto" << std::endl
+ << "(" << s << ") true charpath flattenpath" << std::endl
+ << "pathbbox /charheight exch def pop pop pop" << std::endl
+ << "closepath" << std::endl
+ << "grestore" << std::endl
+ << "0 charheight " << ky << " div neg rmoveto" << std::endl;
+ }
+
+ out << "show" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::DrawLine()
+//
+// Draw a line from (sx,sy) to (ex,ey) with given width.
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::DrawLine(double sx, double sy, double ex, double ey, double width)
+{
+ if (done) Error("EPS file already closed.");
+ out << std::setprecision(3) << std::setiosflags(std::ios::showpoint | std::ios::fixed)
+ << width << " setlinewidth" << std::endl
+ << sx << " " << image_height - sy << " moveto " << ex << " " << image_height - ey
+ << " lineto stroke" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::DrawCircle()
+//
+// Draw a circle at (x,y) with given radius.
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::DrawCircle(double x, double y, double r)
+{
+ if (done) Error("EPS file already closed.");
+ out << std::setprecision(3) << std::setiosflags(std::ios::showpoint | std::ios::fixed)
+ << x << " " << image_height - y << " moveto" << std::endl
+ << x << " " << image_height - y << " " << r << " 0 360 arc closepath fill" << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript::Close()
+//
+// Finish EPS file.
+//////////////////////////////////////////////////////////////////////
+
+void EncapsulatedPostScript::Close()
+{
+ if (done) Error("EPS file already closed.");
+
+ out << "showpage" << std::endl
+ << "%EOF" << std::endl;
+ done = true;
+}
diff --git a/src/EncapsulatedPostScript.hpp b/src/EncapsulatedPostScript.hpp
new file mode 100644
index 0000000..5563d59
--- /dev/null
+++ b/src/EncapsulatedPostScript.hpp
@@ -0,0 +1,34 @@
+//////////////////////////////////////////////////////////////////////
+// EncapsulatedPostScript.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef ENCAPSULATEDPOSTSCRIPT_HPP
+#define ENCAPSULATEDPOSTSCRIPT_HPP
+
+#include <fstream>
+#include <string>
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class EncapsulatedPostScript
+//////////////////////////////////////////////////////////////////////
+
+class EncapsulatedPostScript
+{
+ std::ostream &out;
+ double image_width;
+ double image_height;
+ int font_size;
+ bool done;
+
+public:
+ EncapsulatedPostScript(std::ostream &out, double image_width, double image_height, int font_size);
+ void SetRGBColor(double r, double g, double b);
+ void SetColorBlack();
+ void DrawString(double x, double y, const std::string &s, int pos_x, int pos_y);
+ void DrawLine(double sx, double sy, double ex, double ey, double width);
+ void DrawCircle(double x, double y, double r);
+ void Close();
+};
+
+#endif
diff --git a/src/FileDescription.cpp b/src/FileDescription.cpp
new file mode 100644
index 0000000..d00736d
--- /dev/null
+++ b/src/FileDescription.cpp
@@ -0,0 +1,81 @@
+//////////////////////////////////////////////////////////////////////
+// FileDescription.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "FileDescription.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// FileDescription::~FileDescription()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+FileDescription::~FileDescription()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// FileDescription::FileDescription()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+FileDescription::FileDescription(const std::string &input_filename,
+ const bool allow_noncomplementary) :
+ sstruct(input_filename),
+ input_filename(input_filename),
+ size(int(Pow(double(sstruct.GetLength()), 3.0))),
+ weight(1.0)
+{
+ if (!allow_noncomplementary)
+ sstruct.RemoveNoncomplementaryPairings();
+#if PROFILE
+ if (sstruct.GetNumSequences() == 1)
+ Warning("Using --profile mode with only one input sequence.");
+#else
+ if (sstruct.GetNumSequences() > 1)
+ Warning("Folding multiple input sequences without --profile mode enabled.");
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// FileDescription::FileDescription()
+//
+// Copy constructor.
+//////////////////////////////////////////////////////////////////////
+
+FileDescription::FileDescription(const FileDescription &rhs) :
+ sstruct(rhs.sstruct),
+ input_filename(rhs.input_filename),
+ size(rhs.size),
+ weight(rhs.weight)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// FileDescription::operator=()
+//
+// Assignment operator.
+//////////////////////////////////////////////////////////////////////
+
+FileDescription &FileDescription::operator=(const FileDescription &rhs)
+{
+ if (this != &rhs)
+ {
+ sstruct = rhs.sstruct;
+ input_filename = rhs.input_filename;
+ size = rhs.size;
+ weight = rhs.weight;
+ }
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// FileDescription::operator<()
+//
+// Comparator used to sort by decreasing size.
+//////////////////////////////////////////////////////////////////////
+
+bool FileDescription::operator<(const FileDescription &rhs) const
+{
+ return size > rhs.size;
+}
+
diff --git a/src/FileDescription.hpp b/src/FileDescription.hpp
new file mode 100644
index 0000000..be80c7f
--- /dev/null
+++ b/src/FileDescription.hpp
@@ -0,0 +1,36 @@
+//////////////////////////////////////////////////////////////////////
+// FileDescription.hpp
+//
+// Contains a description of a file for processing.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef FILEDESCRIPTION_HPP
+#define FILEDESCRIPTION_HPP
+
+#include <string>
+#include "Config.hpp"
+#include "SStruct.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// struct FileDescription
+//////////////////////////////////////////////////////////////////////
+
+struct FileDescription
+{
+ SStruct sstruct;
+ std::string input_filename;
+ int size;
+ double weight;
+
+ // constructors, assignment operator, destructor
+ FileDescription(const std::string &input_filename,
+ const bool allow_noncomplementary);
+ FileDescription(const FileDescription &rhs);
+ FileDescription &operator=(const FileDescription &rhs);
+ ~FileDescription();
+
+ // comparator for sorting by decreasing size
+ bool operator<(const FileDescription &rhs) const;
+};
+
+#endif
diff --git a/src/InferenceEngine.hpp b/src/InferenceEngine.hpp
new file mode 100644
index 0000000..8ebcae1
--- /dev/null
+++ b/src/InferenceEngine.hpp
@@ -0,0 +1,301 @@
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INFERENCEENGINE_HPP
+#define INFERENCEENGINE_HPP
+
+#include <queue>
+#include <vector>
+#include <string>
+#include "Config.hpp"
+#include "SStruct.hpp"
+#include "ParameterManager.hpp"
+#include "Utilities.hpp"
+#include "LogSpace.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class InferenceEngine
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InferenceEngine
+{
+ const bool allow_noncomplementary;
+ unsigned char char_mapping[256];
+ int is_complementary[M+1][M+1];
+ bool cache_initialized;
+ ParameterManager<RealT> *parameter_manager;
+
+ // dimensions
+ int L, SIZE;
+#if PROFILE
+ int N, SIZE2;
+#endif
+
+ // sequence data
+ std::vector<int> s, offset;
+#if PROFILE
+ std::vector<int> A;
+ std::vector<RealT> weights;
+#endif
+ std::vector<int> allow_unpaired_position;
+ std::vector<int> allow_unpaired, allow_paired;
+ std::vector<RealT> loss_unpaired_position;
+ std::vector<RealT> loss_unpaired, loss_paired;
+
+ enum TRACEBACK_TYPE {
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ TB_FN_HAIRPIN,
+ TB_FN_SINGLE,
+ TB_FN_BIFURCATION,
+ TB_FE_STACKING,
+ TB_FE_FN,
+ TB_FC_FN,
+ TB_FC_HELIX,
+ TB_FC_FE,
+#else
+ TB_FC_HAIRPIN,
+ TB_FC_SINGLE,
+ TB_FC_BIFURCATION,
+#endif
+ TB_FM1_PAIRED,
+ TB_FM1_UNPAIRED,
+ TB_FM_BIFURCATION,
+ TB_FM_UNPAIRED,
+ TB_FM_FM1,
+ TB_F5_ZERO,
+ TB_F5_UNPAIRED,
+ TB_F5_BIFURCATION,
+ NUM_TRACEBACK_TYPES
+ };
+
+ // dynamic programming matrices
+ std::vector<int> FCt, F5t, FMt, FM1t; // traceback
+ std::vector<RealT> FCv, F5v, FMv, FM1v; // Viterbi
+ std::vector<RealT> FCi, F5i, FMi, FM1i; // inside
+ std::vector<RealT> FCo, F5o, FMo, FM1o; // outside
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ std::vector<int> FEt, FNt;
+ std::vector<RealT> FEv, FNv;
+ std::vector<RealT> FEi, FNi;
+ std::vector<RealT> FEo, FNo;
+#endif
+
+ std::vector<RealT> posterior;
+
+ // parameters
+
+#if PARAMS_BASE_PAIR
+ std::pair<RealT,RealT> score_base_pair[M+1][M+1];
+#endif
+#if PARAMS_BASE_PAIR_DIST
+ std::pair<RealT,RealT> score_base_pair_dist_at_least[D_MAX_BP_DIST_THRESHOLDS];
+ std::pair<RealT,RealT> cache_score_base_pair_dist[BP_DIST_LAST_THRESHOLD+1];
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ std::pair<RealT,RealT> score_terminal_mismatch[M+1][M+1][M+1][M+1];
+#endif
+#if PARAMS_HAIRPIN_LENGTH
+ std::pair<RealT,RealT> score_hairpin_length_at_least[D_MAX_HAIRPIN_LENGTH+1];
+ std::pair<RealT,RealT> cache_score_hairpin_length[D_MAX_HAIRPIN_LENGTH+1];
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ std::pair<RealT,RealT> score_hairpin_3_nucleotides[M+1][M+1][M+1];
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ std::pair<RealT,RealT> score_hairpin_4_nucleotides[M+1][M+1][M+1][M+1];
+#endif
+#if PARAMS_HELIX_LENGTH
+ std::pair<RealT,RealT> score_helix_length_at_least[D_MAX_HELIX_LENGTH+1];
+ std::pair<RealT,RealT> cache_score_helix_length[D_MAX_HELIX_LENGTH+1];
+#endif
+#if PARAMS_ISOLATED_BASE_PAIR
+ std::pair<RealT,RealT> score_isolated_base_pair;
+#endif
+#if PARAMS_INTERNAL_EXPLICIT
+ std::pair<RealT,RealT> score_internal_explicit[D_MAX_INTERNAL_EXPLICIT_LENGTH+1][D_MAX_INTERNAL_EXPLICIT_LENGTH+1];
+#endif
+#if PARAMS_BULGE_LENGTH
+ std::pair<RealT,RealT> score_bulge_length_at_least[D_MAX_BULGE_LENGTH+1];
+#endif
+#if PARAMS_INTERNAL_LENGTH
+ std::pair<RealT,RealT> score_internal_length_at_least[D_MAX_INTERNAL_LENGTH+1];
+#endif
+#if PARAMS_INTERNAL_SYMMETRY
+ std::pair<RealT,RealT> score_internal_symmetric_length_at_least[D_MAX_INTERNAL_SYMMETRIC_LENGTH+1];
+#endif
+#if PARAMS_INTERNAL_ASYMMETRY
+ std::pair<RealT,RealT> score_internal_asymmetry_at_least[D_MAX_INTERNAL_ASYMMETRY+1];
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ std::pair<RealT,RealT> score_bulge_0x1_nucleotides[M+1];
+ std::pair<RealT,RealT> score_bulge_1x0_nucleotides[M+1];
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ std::pair<RealT,RealT> score_bulge_0x2_nucleotides[M+1][M+1];
+ std::pair<RealT,RealT> score_bulge_2x0_nucleotides[M+1][M+1];
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ std::pair<RealT,RealT> score_bulge_0x3_nucleotides[M+1][M+1][M+1];
+ std::pair<RealT,RealT> score_bulge_3x0_nucleotides[M+1][M+1][M+1];
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ std::pair<RealT,RealT> score_internal_1x1_nucleotides[M+1][M+1];
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ std::pair<RealT,RealT> score_internal_1x2_nucleotides[M+1][M+1][M+1];
+ std::pair<RealT,RealT> score_internal_2x1_nucleotides[M+1][M+1][M+1];
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ std::pair<RealT,RealT> score_internal_2x2_nucleotides[M+1][M+1][M+1][M+1];
+#endif
+#if PARAMS_HELIX_STACKING
+ std::pair<RealT,RealT> score_helix_stacking[M+1][M+1][M+1][M+1];
+#endif
+#if PARAMS_HELIX_CLOSING
+ std::pair<RealT,RealT> score_helix_closing[M+1][M+1];
+#endif
+#if PARAMS_MULTI_LENGTH
+ std::pair<RealT,RealT> score_multi_base;
+ std::pair<RealT,RealT> score_multi_unpaired;
+ std::pair<RealT,RealT> score_multi_paired;
+#endif
+#if PARAMS_DANGLE
+ std::pair<RealT,RealT> score_dangle_left[M+1][M+1][M+1];
+ std::pair<RealT,RealT> score_dangle_right[M+1][M+1][M+1];
+#endif
+#if PARAMS_EXTERNAL_LENGTH
+ std::pair<RealT,RealT> score_external_unpaired;
+ std::pair<RealT,RealT> score_external_paired;
+#endif
+
+#if PROFILE
+
+ // multiple sequence scoring
+#if PARAMS_BASE_PAIR
+ std::vector<std::pair<RealT,RealT> > profile_score_base_pair;
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ std::vector<std::pair<RealT,RealT> > profile_score_terminal_mismatch;
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_hairpin_3_nucleotides;
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_hairpin_4_nucleotides;
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_0x1_nucleotides;
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_1x0_nucleotides;
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_0x2_nucleotides;
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_2x0_nucleotides;
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_0x3_nucleotides;
+ std::vector<std::pair<RealT,RealT> > profile_score_bulge_3x0_nucleotides;
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_internal_1x1_nucleotides;
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_internal_1x2_nucleotides;
+ std::vector<std::pair<RealT,RealT> > profile_score_internal_2x1_nucleotides;
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ std::vector<std::pair<RealT,RealT> > profile_score_internal_2x2_nucleotides;
+#endif
+#if PARAMS_HELIX_STACKING
+ std::vector<std::pair<RealT,RealT> > profile_score_helix_stacking;
+#endif
+#if PARAMS_HELIX_CLOSING
+ std::vector<std::pair<RealT,RealT> > profile_score_helix_closing;
+#endif
+#if PARAMS_DANGLE
+ std::vector<std::pair<RealT,RealT> > profile_score_dangle_left;
+ std::vector<std::pair<RealT,RealT> > profile_score_dangle_right;
+#endif
+
+#endif
+
+ // cache
+ std::pair<RealT,RealT> cache_score_single[C_MAX_SINGLE_LENGTH+1][C_MAX_SINGLE_LENGTH+1];
+ std::vector<std::pair<RealT,RealT> > cache_score_helix_sums;
+
+ void FillScores(typename std::vector<std::pair<RealT, RealT> >::iterator begin, typename std::vector<std::pair<RealT, RealT> >::iterator end, RealT value);
+ void FillCounts(typename std::vector<std::pair<RealT, RealT> >::iterator begin, typename std::vector<std::pair<RealT, RealT> >::iterator end, RealT value);
+ int ComputeRowOffset(int i, int N) const;
+ bool IsComplementary(int i, int j) const;
+
+ RealT ScoreJunctionA(int i, int j) const;
+ RealT ScoreJunctionB(int i, int j) const;
+ RealT ScoreBasePair(int i, int j) const;
+ RealT ScoreHairpin(int i, int j) const;
+ RealT ScoreHelix(int i, int j, int m) const;
+ RealT ScoreSingleNucleotides(int i, int j, int p, int q) const;
+ RealT ScoreSingle(int i, int j, int p, int q) const;
+
+ void CountJunctionA(int i, int j, RealT value);
+ void CountJunctionB(int i, int j, RealT value);
+ void CountBasePair(int i, int j, RealT value);
+ void CountHairpin(int i, int j, RealT value);
+ void CountHelix(int i, int j, int m, RealT value);
+ void CountSingleNucleotides(int i, int j, int p, int q, RealT value);
+ void CountSingle(int i, int j, int p, int q, RealT value);
+
+ int EncodeTraceback(int i, int j) const;
+ std::pair<int,int> DecodeTraceback(int s) const;
+
+ std::vector<RealT> GetCounts();
+ void ClearCounts();
+ void InitializeCache();
+ void FinalizeCounts();
+
+#if PROFILE
+ void ComputeProfileScore(RealT &profile_score, const int *pos, int dimensions, std::pair<RealT,RealT> *table);
+ void ConvertProfileCount(const RealT &profile_score, const int *pos, int dimensions, std::pair<RealT,RealT> *table);
+#endif
+
+public:
+
+ // constructor and destructor
+ InferenceEngine(bool allow_noncomplementary);
+ ~InferenceEngine();
+
+ // register params with the parameter manager
+ void RegisterParameters(ParameterManager<RealT> ¶meter_manager);
+
+ // load sequence
+ void LoadSequence(const SStruct &sstruct);
+
+ // load parameter values
+ void LoadValues(const std::vector<RealT> &values);
+
+ // load loss function
+ void UseLoss(const std::vector<int> &true_mapping, RealT example_loss);
+
+ // use constraints
+ void UseConstraints(const std::vector<int> &true_mapping);
+
+ // Viterbi inference
+ void ComputeViterbi();
+ RealT GetViterbiScore() const;
+ std::vector<int> PredictPairingsViterbi() const;
+ std::vector<RealT> ComputeViterbiFeatureCounts();
+
+ // MEA inference
+ void ComputeInside();
+ RealT ComputeLogPartitionCoefficient() const;
+ void ComputeOutside();
+ std::vector<RealT> ComputeFeatureCountExpectations();
+ void ComputePosterior();
+ std::vector<int> PredictPairingsPosterior(const RealT gamma) const;
+ RealT *GetPosterior(const RealT posterior_cutoff) const;
+};
+
+#include "InferenceEngine.ipp"
+
+#endif
diff --git a/src/InferenceEngine.ipp b/src/InferenceEngine.ipp
new file mode 100644
index 0000000..dc035a7
--- /dev/null
+++ b/src/InferenceEngine.ipp
@@ -0,0 +1,4759 @@
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine.ipp
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// Wrapper macros for certain model features.
+//////////////////////////////////////////////////////////////////////
+
+// score for leaving s[i] unpaired
+
+#if defined(HAMMING_LOSS)
+#define ScoreUnpairedPosition(i) (loss_unpaired_position[i])
+#else
+#define ScoreUnpairedPosition(i) (RealT(0))
+#endif
+#define CountUnpairedPosition(i,v)
+
+// score for leaving s[i+1...j] unpaired
+
+#if defined(HAMMING_LOSS)
+#define ScoreUnpaired(i,j) (loss_unpaired[offset[i]+j])
+#else
+#define ScoreUnpaired(i,j) (RealT(0))
+#endif
+#define CountUnpaired(i,j,v)
+
+// score for a base pair which is not part of any helix
+
+#if PARAMS_ISOLATED_BASE_PAIR
+#define ScoreIsolated() score_isolated_base_pair.first
+#define CountIsolated(v) { score_isolated_base_pair.second += (v); }
+#else
+#define ScoreIsolated() RealT(0)
+#define CountIsolated(v)
+#endif
+
+// base score for a multi-branch loop
+
+#if PARAMS_MULTI_LENGTH
+#define ScoreMultiBase() score_multi_base.first
+#define CountMultiBase(v) { score_multi_base.second += (v); }
+#else
+#define ScoreMultiBase() RealT(0)
+#define CountMultiBase(v)
+#endif
+
+// score for a base-pair adjacent to a multi-branch loop
+
+#if PARAMS_MULTI_LENGTH
+#define ScoreMultiPaired() score_multi_paired.first
+#define CountMultiPaired(v) { score_multi_paired.second += (v); }
+#else
+#define ScoreMultiPaired() RealT(0)
+#define CountMultiPaired(v)
+#endif
+
+// score for each unpaired position in a multi-branch loop
+
+#if PARAMS_MULTI_LENGTH
+#define ScoreMultiUnpaired(i) (score_multi_unpaired.first + ScoreUnpairedPosition(i))
+#define CountMultiUnpaired(i,v) { score_multi_unpaired.second += (v); CountUnpairedPosition(i,v); }
+#else
+#define ScoreMultiUnpaired(i) (ScoreUnpairedPosition(i))
+#define CountMultiUnpaired(i,v) { CountUnpairedPosition(i,v); }
+#endif
+
+// score for each base-pair adjacent to an external loop
+
+#if PARAMS_EXTERNAL_LENGTH
+#define ScoreExternalPaired() score_external_paired.first
+#define CountExternalPaired(v) { score_external_paired.second += (v); }
+#else
+#define ScoreExternalPaired() RealT(0)
+#define CountExternalPaired(v)
+#endif
+
+// score for each unpaired position in an external loop
+
+#if PARAMS_EXTERNAL_LENGTH
+#define ScoreExternalUnpaired(i) (score_external_unpaired.first + ScoreUnpairedPosition(i))
+#define CountExternalUnpaired(i,v) { score_external_unpaired.second += (v); CountUnpairedPosition(i,v); }
+#else
+#define ScoreExternalUnpaired(i) (ScoreUnpairedPosition(i))
+#define CountExternalUnpaired(i,v) { CountUnpairedPosition(i,v); }
+#endif
+
+// score for a helix stacking pair of the form:
+//
+// | |
+// s[i+1] == s[j-1]
+// | |
+// s[i] ==== s[j]
+// | |
+
+#if PARAMS_HELIX_STACKING
+#if PROFILE
+#define ScoreHelixStacking(i,j) profile_score_helix_stacking[i*(L+1)+j].first
+#define CountHelixStacking(i,j,v) { profile_score_helix_stacking[i*(L+1)+j].second += (v); }
+#else
+#define ScoreHelixStacking(i,j) score_helix_stacking[s[i]][s[j]][s[i+1]][s[j-1]].first
+#define CountHelixStacking(i,j,v) { score_helix_stacking[s[i]][s[j]][s[i+1]][s[j-1]].second += (v); }
+#endif
+#else
+#define ScoreHelixStacking(i,j) RealT(0)
+#define CountHelixStacking(i,j,v)
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// UPDATE_MAX()
+//
+// Macro for updating a score/traceback pointer which does not
+// evaluate t unless an update is needed. Make sure that this is
+// used as a stand-alone statement (i.e., not the "if" condition
+// of an if-then-else statement.)
+//////////////////////////////////////////////////////////////////////
+
+#define UPDATE_MAX(bs,bt,s,t) { RealT work(s); if ((work)>(bs)) { (bs)=(work); (bt)=(t); } }
+
+//////////////////////////////////////////////////////////////////////
+// FillScores()
+// FillCounts()
+//
+// Routines for setting scores and counts quickly.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::FillScores(typename std::vector<std::pair<RealT, RealT> >::iterator begin, typename std::vector<std::pair<RealT, RealT> >::iterator end, RealT value)
+{
+ while (begin != end)
+ {
+ begin->first = value;
+ ++begin;
+ }
+}
+
+template<class RealT>
+void InferenceEngine<RealT>::FillCounts(typename std::vector<std::pair<RealT,RealT> >::iterator begin, typename std::vector<std::pair<RealT,RealT> >::iterator end, RealT value)
+{
+ while (begin != end)
+ {
+ begin->second = value;
+ ++begin;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// ComputeRowOffset()
+//
+// Consider an N x N upper triangular matrix whose elements are
+// stored in a one-dimensional flat array using the following
+// row-major indexing scheme:
+//
+// 0 1 2 3 <-- row 0
+// 4 5 6 <-- row 1
+// 7 [8] <-- row 2
+// 9 <-- row 3
+//
+// Assuming 0-based indexing, this function computes offset[i]
+// for the ith row such that offset[i]+j is the index of the
+// (i,j)th element of the upper triangular matrix in the flat
+// array.
+//
+// For example, offset[2] = 5, so the (2,3)th element of the
+// upper triangular matrix (marked in the picture above) can be
+// found at position offset[2]+3 = 5+3 = 8 in the flat array.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+int InferenceEngine<RealT>::ComputeRowOffset(int i, int N) const
+{
+ Assert(i >= 0 && i <= N, "Index out-of-bounds.");
+
+ // equivalent to:
+ // return N*(N+1)/2 - (N-i)*(N-i+1)/2 - i;
+ return i*(N+N-i-1)/2;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::IsComplementary()
+//
+// Determine if a pair of positions is considered "complementary."
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+bool InferenceEngine<RealT>::IsComplementary(int i, int j) const
+{
+ Assert(1 <= i && i <= L, "Index out-of-bounds.");
+ Assert(1 <= j && j <= L, "Index out-of-bounds.");
+
+#if !PROFILE
+ return is_complementary[s[i]][s[j]];
+#else
+ RealT complementary_weight = 0;
+ RealT total_weight = 0;
+
+ for (int k = 0; k < N; k++)
+ {
+ if (is_complementary[A[k*(L+1)+i]][A[k*(L+1)+j]]) complementary_weight += weights[k];
+ total_weight += weights[k];
+ }
+
+ return complementary_weight / total_weight >= std::min(RealT(N-1) / RealT(N), RealT(0.5));
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::InferenceEngine()
+//
+// Constructor
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InferenceEngine<RealT>::InferenceEngine(bool allow_noncomplementary) :
+ allow_noncomplementary(allow_noncomplementary),
+ cache_initialized(false),
+ parameter_manager(NULL),
+ L(0),
+ SIZE(0)
+#if PROFILE
+ , N(0)
+ , SIZE2(0)
+#endif
+
+{
+ // precompute mapping from characters to index representation
+ std::memset(char_mapping, BYTE(alphabet.size()), 256);
+ for (size_t i = 0; i < alphabet.size(); i++)
+ {
+ char_mapping[BYTE(tolower(alphabet[i]))] =
+ char_mapping[BYTE(toupper(alphabet[i]))] = i;
+ }
+
+ // precompute complementary pairings
+ for (int i = 0; i <= M; i++)
+ for (int j = 0; j <= M; j++)
+ is_complementary[i][j] = 0;
+
+ is_complementary[char_mapping[BYTE('A')]][char_mapping[BYTE('U')]] =
+ is_complementary[char_mapping[BYTE('U')]][char_mapping[BYTE('A')]] =
+ is_complementary[char_mapping[BYTE('G')]][char_mapping[BYTE('U')]] =
+ is_complementary[char_mapping[BYTE('U')]][char_mapping[BYTE('G')]] =
+ is_complementary[char_mapping[BYTE('C')]][char_mapping[BYTE('G')]] =
+ is_complementary[char_mapping[BYTE('G')]][char_mapping[BYTE('C')]] = 1;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::~InferenceEngine()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InferenceEngine<RealT>::~InferenceEngine()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::RegisterParameters()
+//
+// Establish a mapping between parameters in the inference
+// engine and parameters in the parameter manager.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::RegisterParameters(ParameterManager<RealT> ¶meter_manager)
+{
+ char buffer[1000];
+ char buffer2[1000];
+
+ cache_initialized = false;
+ this->parameter_manager = ¶meter_manager;
+ parameter_manager.ClearParameters();
+
+#if SINGLE_HYPERPARAMETER
+ parameter_manager.AddParameterGroup("all_params");
+#endif
+
+#if PARAMS_BASE_PAIR
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("base_pair");
+#endif
+ for (int i = 0; i <= M; i++)
+ {
+ for (int j = 0; j <= M; j++)
+ {
+ if (i == M || j == M)
+ {
+ score_base_pair[i][j] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "base_pair_%c%c", alphabet[i], alphabet[j]);
+ sprintf(buffer2, "base_pair_%c%c", alphabet[j], alphabet[i]);
+ if (strcmp(buffer, buffer2) < 0)
+ parameter_manager.AddParameterMapping(buffer, &score_base_pair[i][j]);
+ else
+ parameter_manager.AddParameterMapping(buffer2, &score_base_pair[i][j]);
+ }
+ }
+ }
+#endif
+
+#if PARAMS_BASE_PAIR_DIST
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("base_pair_dist_at_least");
+#endif
+ for (int i = 0; i < D_MAX_BP_DIST_THRESHOLDS; i++)
+ {
+ sprintf(buffer, "base_pair_dist_at_least_%d", BP_DIST_THRESHOLDS[i]);
+ parameter_manager.AddParameterMapping(buffer, &score_base_pair_dist_at_least[i]);
+ }
+#endif
+
+#if PARAMS_TERMINAL_MISMATCH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("terminal_mismatch");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int j1 = 0; j1 <= M; j1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int j2 = 0; j2 <= M; j2++)
+ {
+ if (i1 == M || j1 == M || i2 == M || j2 == M)
+ {
+ score_terminal_mismatch[i1][j1][i2][j2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "terminal_mismatch_%c%c%c%c", alphabet[i1], alphabet[j1], alphabet[i2], alphabet[j2]);
+ parameter_manager.AddParameterMapping(buffer, &score_terminal_mismatch[i1][j1][i2][j2]);
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_HAIRPIN_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("hairpin_length_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_HAIRPIN_LENGTH; i++)
+ {
+ sprintf(buffer, "hairpin_length_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_hairpin_length_at_least[i]);
+ }
+#endif
+
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("hairpin_3_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int i3 = 0; i3 <= M; i3++)
+ {
+ if (i1 == M || i2 == M || i3 == M)
+ {
+ score_hairpin_3_nucleotides[i1][i2][i3] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "hairpin_3_nucleotides_%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3]);
+ parameter_manager.AddParameterMapping(buffer, &score_hairpin_3_nucleotides[i1][i2][i3]);
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("hairpin_4_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int i3 = 0; i3 <= M; i3++)
+ {
+ for (int i4 = 0; i4 <= M; i4++)
+ {
+ if (i1 == M || i2 == M || i3 == M || i4 == M)
+ {
+ score_hairpin_4_nucleotides[i1][i2][i3][i4] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "hairpin_4_nucleotides_%c%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3], alphabet[i4]);
+ parameter_manager.AddParameterMapping(buffer, &score_hairpin_4_nucleotides[i1][i2][i3][i4]);
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_HELIX_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("helix_length_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_HELIX_LENGTH; i++)
+ {
+ if (i < 3)
+ {
+ score_helix_length_at_least[i] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "helix_length_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_helix_length_at_least[i]);
+ }
+ }
+#endif
+
+#if PARAMS_ISOLATED_BASE_PAIR
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("isolated_base_pair");
+#endif
+ parameter_manager.AddParameterMapping("isolated_base_pair", &score_isolated_base_pair);
+#endif
+
+#if PARAMS_INTERNAL_EXPLICIT
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_explicit");
+#endif
+ for (int i = 0; i <= D_MAX_INTERNAL_EXPLICIT_LENGTH; i++)
+ {
+ for (int j = 0; j <= D_MAX_INTERNAL_EXPLICIT_LENGTH; j++)
+ {
+ if (i == 0 || j == 0)
+ {
+ score_internal_explicit[i][j] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_explicit_%d_%d", std::min(i, j), std::max(i, j));
+ parameter_manager.AddParameterMapping(buffer, &score_internal_explicit[i][j]);
+ }
+ }
+ }
+#endif
+
+#if PARAMS_BULGE_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("bulge_length_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_BULGE_LENGTH; i++)
+ {
+ if (i == 0)
+ {
+ score_bulge_length_at_least[i] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "bulge_length_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_length_at_least[i]);
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_length_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_INTERNAL_LENGTH; i++)
+ {
+ if (i < 2)
+ {
+ score_internal_length_at_least[i] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_length_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_internal_length_at_least[i]);
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_SYMMETRY
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_symmetric_length_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_INTERNAL_SYMMETRIC_LENGTH; i++)
+ {
+ if (i == 0)
+ {
+ score_internal_symmetric_length_at_least[i] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_symmetric_length_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_internal_symmetric_length_at_least[i]);
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_ASYMMETRY
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_asymmetry_at_least");
+#endif
+ for (int i = 0; i <= D_MAX_INTERNAL_ASYMMETRY; i++)
+ {
+ if (i == 0)
+ {
+ score_internal_asymmetry_at_least[i] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_asymmetry_at_least_%d", i);
+ parameter_manager.AddParameterMapping(buffer, &score_internal_asymmetry_at_least[i]);
+ }
+ }
+#endif
+
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("bulge_0x1_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ if (i1 == M)
+ {
+ score_bulge_0x1_nucleotides[i1] = std::pair<RealT,RealT>(0, 0);
+ score_bulge_1x0_nucleotides[i1] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "bulge_0x1_nucleotides_%c", alphabet[i1]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_0x1_nucleotides[i1]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_1x0_nucleotides[i1]);
+ }
+ }
+#endif
+
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("bulge_0x2_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ if (i1 == M || i2 == M)
+ {
+ score_bulge_0x2_nucleotides[i1][i2] = std::pair<RealT,RealT>(0, 0);
+ score_bulge_2x0_nucleotides[i1][i2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "bulge_0x2_nucleotides_%c%c", alphabet[i1], alphabet[i2]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_0x2_nucleotides[i1][i2]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_2x0_nucleotides[i1][i2]);
+ }
+ }
+ }
+#endif
+
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("bulge_0x3_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int i3 = 0; i3 <= M; i3++)
+ {
+ if (i1 == M || i2 == M)
+ {
+ score_bulge_0x3_nucleotides[i1][i2][i3] = std::pair<RealT,RealT>(0, 0);
+ score_bulge_3x0_nucleotides[i1][i2][i3] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "bulge_0x3_nucleotides_%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_0x3_nucleotides[i1][i2][i3]);
+ parameter_manager.AddParameterMapping(buffer, &score_bulge_3x0_nucleotides[i1][i2][i3]);
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_1x1_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ if (i1 == M || i2 == M)
+ {
+ score_internal_1x1_nucleotides[i1][i2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_1x1_nucleotides_%c%c", alphabet[i1], alphabet[i2]);
+ sprintf(buffer2, "internal_1x1_nucleotides_%c%c", alphabet[i2], alphabet[i1]);
+ if (strcmp(buffer, buffer2) < 0)
+ parameter_manager.AddParameterMapping(buffer, &score_internal_1x1_nucleotides[i1][i2]);
+ else
+ parameter_manager.AddParameterMapping(buffer2, &score_internal_1x1_nucleotides[i1][i2]);
+ }
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_1x2_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int i3 = 0; i3 <= M; i3++)
+ {
+ if (i1 == M || i2 == M || i3 == M)
+ {
+ score_internal_1x2_nucleotides[i1][i2][i3] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_1x2_nucleotides_%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3]);
+ parameter_manager.AddParameterMapping(buffer, &score_internal_1x2_nucleotides[i1][i2][i3]);
+ sprintf(buffer, "internal_2x1_nucleotides_%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3]);
+ parameter_manager.AddParameterMapping(buffer, &score_internal_2x1_nucleotides[i1][i2][i3]);
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("internal_2x2_nucleotides");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int i3 = 0; i3 <= M; i3++)
+ {
+ for (int i4 = 0; i4 <= M; i4++)
+ {
+ if (i1 == M || i2 == M || i3 == M || i4 == M)
+ {
+ score_internal_2x2_nucleotides[i1][i2][i3][i4] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "internal_2x2_nucleotides_%c%c%c%c", alphabet[i1], alphabet[i2], alphabet[i3], alphabet[i4]);
+ sprintf(buffer2, "internal_2x2_nucleotides_%c%c%c%c", alphabet[i3], alphabet[i4], alphabet[i1], alphabet[i2]);
+ if (strcmp(buffer, buffer2) < 0)
+ parameter_manager.AddParameterMapping(buffer, &score_internal_2x2_nucleotides[i1][i2][i3][i4]);
+ else
+ parameter_manager.AddParameterMapping(buffer2, &score_internal_2x2_nucleotides[i1][i2][i3][i4]);
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_HELIX_STACKING
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("helix_stacking");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int j1 = 0; j1 <= M; j1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ for (int j2 = 0; j2 <= M; j2++)
+ {
+ if (i1 == M || j1 == M || i2 == M || j2 == M)
+ {
+ score_helix_stacking[i1][j1][i2][j2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "helix_stacking_%c%c%c%c", alphabet[i1], alphabet[j1], alphabet[i2], alphabet[j2]);
+ sprintf(buffer2, "helix_stacking_%c%c%c%c", alphabet[j2], alphabet[i2], alphabet[j1], alphabet[i1]);
+ if (strcmp(buffer, buffer2) < 0)
+ parameter_manager.AddParameterMapping(buffer, &score_helix_stacking[i1][j1][i2][j2]);
+ else
+ parameter_manager.AddParameterMapping(buffer2, &score_helix_stacking[i1][j1][i2][j2]);
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_HELIX_CLOSING
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("helix_closing");
+#endif
+ for (int i = 0; i <= M; i++)
+ {
+ for (int j = 0; j <= M; j++)
+ {
+ if (i == M || j == M)
+ {
+ score_helix_closing[i][j] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "helix_closing_%c%c", alphabet[i], alphabet[j]);
+ parameter_manager.AddParameterMapping(buffer, &score_helix_closing[i][j]);
+ }
+ }
+ }
+#endif
+
+#if PARAMS_MULTI_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("multi_length");
+#endif
+ parameter_manager.AddParameterMapping("multi_base", &score_multi_base);
+ parameter_manager.AddParameterMapping("multi_unpaired", &score_multi_unpaired);
+ parameter_manager.AddParameterMapping("multi_paired", &score_multi_paired);
+#endif
+
+#if PARAMS_DANGLE
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("dangle");
+#endif
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int j1 = 0; j1 <= M; j1++)
+ {
+ for (int i2 = 0; i2 <= M; i2++)
+ {
+ if (i1 == M || j1 == M || i2 == M)
+ {
+ score_dangle_left[i1][j1][i2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "dangle_left_%c%c%c", alphabet[i1], alphabet[j1], alphabet[i2]);
+ parameter_manager.AddParameterMapping(buffer, &score_dangle_left[i1][j1][i2]);
+ }
+ }
+ }
+ }
+
+ for (int i1 = 0; i1 <= M; i1++)
+ {
+ for (int j1 = 0; j1 <= M; j1++)
+ {
+ for (int j2 = 0; j2 <= M; j2++)
+ {
+ if (i1 == M || j1 == M || j2 == M)
+ {
+ score_dangle_right[i1][j1][j2] = std::pair<RealT,RealT>(0, 0);
+ }
+ else
+ {
+ sprintf(buffer, "dangle_right_%c%c%c", alphabet[i1], alphabet[j1], alphabet[j2]);
+ parameter_manager.AddParameterMapping(buffer, &score_dangle_right[i1][j1][j2]);
+ }
+ }
+ }
+ }
+#endif
+
+#if PARAMS_EXTERNAL_LENGTH
+#if MULTIPLE_HYPERPARAMETERS
+ parameter_manager.AddParameterGroup("external_length");
+#endif
+ parameter_manager.AddParameterMapping("external_unpaired", &score_external_unpaired);
+ parameter_manager.AddParameterMapping("external_paired", &score_external_paired);
+#endif
+
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::LoadSequence()
+//
+// Load an RNA sequence.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::LoadSequence(const SStruct &sstruct)
+{
+ cache_initialized = false;
+
+ // compute dimensions
+ L = sstruct.GetLength();
+ SIZE = (L+1)*(L+2) / 2;
+#if PROFILE
+ N = sstruct.GetNumSequences();
+ SIZE2 = (L+1)*(L+1);
+#endif
+
+ // allocate memory
+ s.resize(L+1);
+#if PROFILE
+ A.resize(N*(L+1));
+ weights.resize(N);
+#endif
+ offset.resize(L+1);
+ allow_unpaired_position.resize(L+1);
+ allow_unpaired.resize(SIZE);
+ allow_paired.resize(SIZE);
+ loss_unpaired_position.resize(L+1);
+ loss_unpaired.resize(SIZE);
+ loss_paired.resize(SIZE);
+
+#if PROFILE
+
+#if PARAMS_BASE_PAIR
+ profile_score_base_pair.clear(); profile_score_base_pair.resize(SIZE2);
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ profile_score_terminal_mismatch.clear(); profile_score_terminal_mismatch.resize(SIZE2);
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ profile_score_hairpin_3_nucleotides.clear(); profile_score_hairpin_3_nucleotides.resize(L+1);
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ profile_score_hairpin_4_nucleotides.clear(); profile_score_hairpin_4_nucleotides.resize(L+1);
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ profile_score_bulge_0x1_nucleotides.clear(); profile_score_bulge_0x1_nucleotides.resize(L+1);
+ profile_score_bulge_1x0_nucleotides.clear(); profile_score_bulge_1x0_nucleotides.resize(L+1);
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ profile_score_bulge_0x2_nucleotides.clear(); profile_score_bulge_0x2_nucleotides.resize(L+1);
+ profile_score_bulge_2x0_nucleotides.clear(); profile_score_bulge_2x0_nucleotides.resize(L+1);
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ profile_score_bulge_0x3_nucleotides.clear(); profile_score_bulge_0x3_nucleotides.resize(L+1);
+ profile_score_bulge_3x0_nucleotides.clear(); profile_score_bulge_3x0_nucleotides.resize(L+1);
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ profile_score_internal_1x1_nucleotides.clear(); profile_score_internal_1x1_nucleotides.resize(SIZE2);
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ profile_score_internal_1x2_nucleotides.clear(); profile_score_internal_1x2_nucleotides.resize(SIZE2);
+ profile_score_internal_2x1_nucleotides.clear(); profile_score_internal_2x1_nucleotides.resize(SIZE2);
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ profile_score_internal_2x2_nucleotides.clear(); profile_score_internal_2x2_nucleotides.resize(SIZE2);
+#endif
+#if PARAMS_HELIX_STACKING
+ profile_score_helix_stacking.clear(); profile_score_helix_stacking.resize(SIZE2);
+#endif
+#if PARAMS_HELIX_CLOSING
+ profile_score_helix_closing.clear(); profile_score_helix_closing.resize(SIZE2);
+#endif
+#if PARAMS_DANGLE
+ profile_score_dangle_left.clear(); profile_score_dangle_left.resize(SIZE2);
+ profile_score_dangle_right.clear(); profile_score_dangle_right.resize(SIZE2);
+#endif
+
+#endif
+
+#if FAST_HELIX_LENGTHS
+ cache_score_helix_sums.clear(); cache_score_helix_sums.resize((2*L+1)*L);
+#endif
+
+ // convert sequences to index representation
+ const std::string &sequence = sstruct.GetSequences()[0];
+ s[0] = BYTE(alphabet.size());
+ for (int i = 1; i <= L; i++)
+ {
+ s[i] = char_mapping[BYTE(sequence[i])];
+ }
+
+#if PROFILE
+ const std::vector<std::string> &alignment = sstruct.GetSequences();
+ for (int k = 0; k < N; k++)
+ {
+ A[k*(L+1)+0] = BYTE(alphabet.size());
+ for (int i = 1; i <= L; i++)
+ {
+ A[k*(L+1)+i] = char_mapping[BYTE(alignment[k][i])];
+ }
+ }
+
+ weights = ConvertVector<RealT>(sstruct.ComputePositionBasedSequenceWeights());
+#endif
+
+ // compute indexing scheme for upper triangular arrays;
+ // also allow each position to be unpaired by default, and
+ // set the loss for each unpaired position to zero
+ for (int i = 0; i <= L; i++)
+ {
+ offset[i] = ComputeRowOffset(i,L+1);
+ allow_unpaired_position[i] = 1;
+ loss_unpaired_position[i] = RealT(0);
+ }
+
+ // allow all ranges to be unpaired, and all pairs of letters
+ // to be paired; set the respective losses to zero
+ for (int i = 0; i < SIZE; i++)
+ {
+ allow_unpaired[i] = 1;
+ allow_paired[i] = 1;
+ loss_unpaired[i] = RealT(0);
+ loss_paired[i] = RealT(0);
+ }
+
+ // prevent the non-letter before each sequence from pairing with anything;
+ // also prevent each letter from pairing with itself
+ for (int i = 0; i <= L; i++)
+ {
+ allow_paired[offset[0]+i] = 0;
+ allow_paired[offset[i]+i] = 0;
+ }
+
+ // enforce complementarity of base-pairings
+ if (!allow_noncomplementary)
+ {
+ // for each pair of non-complementary letters in the sequence, disallow the pairing
+ for (int i = 1; i <= L; i++)
+ {
+ for (int j = i+1; j <= L; j++)
+ {
+ if (!IsComplementary(i,j))
+ allow_paired[offset[i]+j] = 0;
+ }
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::InitializeCache()
+//
+// Initialize scoring cache prior to inference.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::InitializeCache()
+{
+ if (cache_initialized) return;
+ cache_initialized = true;
+
+ // initialize length and distance scoring
+#if PARAMS_BASE_PAIR_DIST
+ for (int j = 0; j <= BP_DIST_LAST_THRESHOLD; j++)
+ cache_score_base_pair_dist[j].first = RealT(0);
+ for (int i = 0; i < D_MAX_BP_DIST_THRESHOLDS; i++)
+ for (int j = BP_DIST_THRESHOLDS[i]; j <= BP_DIST_LAST_THRESHOLD; j++)
+ cache_score_base_pair_dist[j].first += score_base_pair_dist_at_least[i].first;
+#endif
+
+#if PARAMS_HAIRPIN_LENGTH
+ cache_score_hairpin_length[0].first = score_hairpin_length_at_least[0].first;
+ for (int i = 1; i <= D_MAX_HAIRPIN_LENGTH; i++)
+ cache_score_hairpin_length[i].first = cache_score_hairpin_length[i-1].first + score_hairpin_length_at_least[i].first;
+#endif
+
+#if PARAMS_HELIX_LENGTH
+ cache_score_helix_length[0].first = score_helix_length_at_least[0].first;
+ for (int i = 1; i <= D_MAX_HELIX_LENGTH; i++)
+ cache_score_helix_length[i].first = cache_score_helix_length[i-1].first + score_helix_length_at_least[i].first;
+#endif
+
+#if PARAMS_BULGE_LENGTH
+ RealT temp_cache_score_bulge_length[D_MAX_BULGE_LENGTH+1];
+ temp_cache_score_bulge_length[0] = score_bulge_length_at_least[0].first;
+ for (int i = 1; i <= D_MAX_BULGE_LENGTH; i++)
+ temp_cache_score_bulge_length[i] = temp_cache_score_bulge_length[i-1] + score_bulge_length_at_least[i].first;
+#endif
+
+#if PARAMS_INTERNAL_LENGTH
+ RealT temp_cache_score_internal_length[D_MAX_INTERNAL_LENGTH+1];
+ temp_cache_score_internal_length[0] = score_internal_length_at_least[0].first;
+ for (int i = 1; i <= D_MAX_INTERNAL_LENGTH; i++)
+ temp_cache_score_internal_length[i] = temp_cache_score_internal_length[i-1] + score_internal_length_at_least[i].first;
+#endif
+
+#if PARAMS_INTERNAL_SYMMETRY
+ RealT temp_cache_score_internal_symmetric_length[D_MAX_INTERNAL_SYMMETRIC_LENGTH+1];
+ temp_cache_score_internal_symmetric_length[0] = score_internal_symmetric_length_at_least[0].first;
+ for (int i = 1; i <= D_MAX_INTERNAL_SYMMETRIC_LENGTH; i++)
+ temp_cache_score_internal_symmetric_length[i] = temp_cache_score_internal_symmetric_length[i-1] + score_internal_symmetric_length_at_least[i].first;
+#endif
+
+#if PARAMS_INTERNAL_ASYMMETRY
+ RealT temp_cache_score_internal_asymmetry[D_MAX_INTERNAL_ASYMMETRY+1];
+ temp_cache_score_internal_asymmetry[0] = score_internal_asymmetry_at_least[0].first;
+ for (int i = 1; i <= D_MAX_INTERNAL_ASYMMETRY; i++)
+ temp_cache_score_internal_asymmetry[i] = temp_cache_score_internal_asymmetry[i-1] + score_internal_asymmetry_at_least[i].first;
+#endif
+
+ // precompute score for single-branch loops of length l1 and l2
+ for (int l1 = 0; l1 <= C_MAX_SINGLE_LENGTH; l1++)
+ {
+ for (int l2 = 0; l1+l2 <= C_MAX_SINGLE_LENGTH; l2++)
+ {
+ cache_score_single[l1][l2].first = RealT(0);
+
+ // skip over stacking pairs
+ if (l1 == 0 && l2 == 0) continue;
+
+ // consider bulge loops
+ if (l1 == 0 || l2 == 0)
+ {
+#if PARAMS_BULGE_LENGTH
+ cache_score_single[l1][l2].first += temp_cache_score_bulge_length[std::min(D_MAX_BULGE_LENGTH, l1+l2)];
+#endif
+ }
+
+ // consider internal loops
+ else
+ {
+#if PARAMS_INTERNAL_EXPLICIT
+ if (l1 <= D_MAX_INTERNAL_EXPLICIT_LENGTH && l2 <= D_MAX_INTERNAL_EXPLICIT_LENGTH)
+ cache_score_single[l1][l2].first += score_internal_explicit[l1][l2].first;
+#endif
+#if PARAMS_INTERNAL_LENGTH
+ cache_score_single[l1][l2].first += temp_cache_score_internal_length[std::min(D_MAX_INTERNAL_LENGTH, l1+l2)];
+#endif
+#if PARAMS_INTERNAL_SYMMETRY
+ if (l1 == l2)
+ cache_score_single[l1][l2].first += temp_cache_score_internal_symmetric_length[std::min(D_MAX_INTERNAL_SYMMETRIC_LENGTH, l1)];
+#endif
+#if PARAMS_INTERNAL_ASYMMETRY
+ cache_score_single[l1][l2].first += temp_cache_score_internal_asymmetry[std::min(D_MAX_INTERNAL_ASYMMETRY, Abs(l1-l2))];
+#endif
+ }
+ }
+ }
+
+#if PROFILE
+ // initialize counts for profile scoring
+ for (int i = 0; i <= L; i++)
+ {
+ for (int j = 0; j <= L; j++)
+ {
+#if PARAMS_BASE_PAIR
+ {
+ const int pos[2] = {i, j};
+ ComputeProfileScore(profile_score_base_pair[i*(L+1)+j].first, pos, 2, reinterpret_cast<std::pair<RealT,RealT> *>(score_base_pair));
+ }
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ {
+ const int pos[4] = {i, j+1, i+1, j};
+ ComputeProfileScore(profile_score_terminal_mismatch[i*(L+1)+j].first, pos, 4, reinterpret_cast<std::pair<RealT,RealT> *>(score_terminal_mismatch));
+ }
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ if (j == 0)
+ {
+ const int pos[3] = {i+1, i+2, i+3};
+ ComputeProfileScore(profile_score_hairpin_3_nucleotides[i].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_hairpin_3_nucleotides));
+ }
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ if (j == 0)
+ {
+ const int pos[4] = {i+1, i+2, i+3, i+4};
+ ComputeProfileScore(profile_score_hairpin_4_nucleotides[i].first, pos, 4, reinterpret_cast<std::pair<RealT,RealT> *>(score_hairpin_4_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[1] = {j};
+ ComputeProfileScore(profile_score_bulge_0x1_nucleotides[j].first, pos, 1, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_0x1_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[1] = {i+1};
+ ComputeProfileScore(profile_score_bulge_1x0_nucleotides[i].first, pos, 1, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_1x0_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[2] = {j-1, j};
+ ComputeProfileScore(profile_score_bulge_0x2_nucleotides[j].first, pos, 2, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_0x2_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[2] = {i+1, i+2};
+ ComputeProfileScore(profile_score_bulge_2x0_nucleotides[i].first, pos, 2, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_2x0_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[3] = {j-2, j-1, j};
+ ComputeProfileScore(profile_score_bulge_0x3_nucleotides[j].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_0x3_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[3] = {i+1, i+2, i+3};
+ ComputeProfileScore(profile_score_bulge_3x0_nucleotides[i].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_bulge_3x0_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ {
+ const int pos[2] = {i+1, j};
+ ComputeProfileScore(profile_score_internal_1x1_nucleotides[i*(L+1)+j].first, pos, 2, reinterpret_cast<std::pair<RealT,RealT> *>(score_internal_1x1_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ {
+ const int pos[3] = {i+1, j-1, j};
+ ComputeProfileScore(profile_score_internal_1x2_nucleotides[i*(L+1)+j].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_internal_1x2_nucleotides));
+ }
+ {
+ const int pos[3] = {i+1, i+2, j};
+ ComputeProfileScore(profile_score_internal_2x1_nucleotides[i*(L+1)+j].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_internal_2x1_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ {
+ const int pos[4] = {i+1, i+2, j-1, j};
+ ComputeProfileScore(profile_score_internal_2x2_nucleotides[i*(L+1)+j].first, pos, 4, reinterpret_cast<std::pair<RealT,RealT> *>(score_internal_2x2_nucleotides));
+ }
+#endif
+#if PARAMS_HELIX_STACKING
+ {
+ const int pos[4] = {i, j, i+1, j-1};
+ ComputeProfileScore(profile_score_helix_stacking[i*(L+1)+j].first, pos, 4, reinterpret_cast<std::pair<RealT,RealT> *>(score_helix_stacking));
+ }
+#endif
+#if PARAMS_HELIX_CLOSING
+ {
+ const int pos[2] = {i, j+1};
+ ComputeProfileScore(profile_score_helix_closing[i*(L+1)+j].first, pos, 2, reinterpret_cast<std::pair<RealT,RealT> *>(score_helix_closing));
+ }
+#endif
+#if PARAMS_DANGLE
+ {
+ const int pos[3] = {i, j+1, i+1};
+ ComputeProfileScore(profile_score_dangle_left[i*(L+1)+j].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_dangle_left));
+ }
+ {
+ const int pos[3] = {i, j+1, j};
+ ComputeProfileScore(profile_score_dangle_right[i*(L+1)+j].first, pos, 3, reinterpret_cast<std::pair<RealT,RealT> *>(score_dangle_right));
+ }
+#endif
+ }
+ }
+
+#endif
+
+#if FAST_HELIX_LENGTHS
+ // precompute helix partial sums
+ FillScores(cache_score_helix_sums.begin(), cache_score_helix_sums.end(), RealT(0));
+ for (int i = L; i >= 1; i--)
+ {
+ for (int j = i+3; j <= L; j++)
+ {
+ cache_score_helix_sums[(i+j)*L+j-i].first = cache_score_helix_sums[(i+j)*L+j-i-2].first;
+ if (allow_paired[offset[i+1]+j-1])
+ {
+ cache_score_helix_sums[(i+j)*L+j-i].first += ScoreBasePair(i+1,j-1);
+ if (allow_paired[offset[i]+j])
+ cache_score_helix_sums[(i+j)*L+j-i].first += ScoreHelixStacking(i,j);
+ }
+ }
+ }
+#endif
+
+}
+
+#if PROFILE
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeProfileScore()
+//
+// Compute profile score for a single location.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ComputeProfileScore(RealT &profile_score, const int *pos, int dimensions, std::pair<RealT,RealT> *table)
+{
+ profile_score = 0;
+
+ // consider all sequences
+ for (int k = 0; k < N; k++)
+ {
+ bool valid = true;
+ int index = 0;
+ int *seq = &A[k*(L+1)];
+
+ // extract letters of the pattern for the current sequence
+ for (int d = 0; valid && d < dimensions; d++)
+ {
+ if (pos[d] < 1 || pos[d] > L)
+ valid = false;
+ else
+ {
+ BYTE c = seq[pos[d]];
+ if (c == BYTE(alphabet.size()))
+ valid = false;
+ else
+ index = index * (M+1) + c;
+ }
+ }
+
+ // add contribution of pattern to score
+ if (valid) profile_score += weights[k] * table[index].first;
+ }
+}
+
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::LoadValues()
+//
+// Load parameter values.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::LoadValues(const std::vector<RealT> &values)
+{
+ if (values.size() != parameter_manager->GetNumLogicalParameters()) Error("Parameter size mismatch.");
+
+ cache_initialized = false;
+ for (size_t i = 0; i < values.size(); i++)
+ {
+ std::vector<std::pair<RealT,RealT> *> physical_parameters = parameter_manager->GetPhysicalParameters(i);
+ for (size_t j = 0; j < physical_parameters.size(); j++)
+ physical_parameters[j]->first = values[i];
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::GetCounts()
+//
+// Return counts.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<RealT> InferenceEngine<RealT>::GetCounts()
+{
+ std::vector<RealT> counts(parameter_manager->GetNumLogicalParameters());
+
+ // clear counts for physical parameters
+ for (size_t i = 0; i < parameter_manager->GetNumLogicalParameters(); i++)
+ {
+ std::vector<std::pair<RealT,RealT> *> physical_parameters = parameter_manager->GetPhysicalParameters(i);
+ for (size_t j = 0; j < physical_parameters.size(); j++)
+ counts[i] += physical_parameters[j]->second;
+ }
+
+ return counts;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ClearCounts()
+//
+// Set all counts to zero.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ClearCounts()
+{
+ // clear counts for physical parameters
+ for (size_t i = 0; i < parameter_manager->GetNumLogicalParameters(); i++)
+ {
+ std::vector<std::pair<RealT,RealT> *> physical_parameters = parameter_manager->GetPhysicalParameters(i);
+ for (size_t j = 0; j < physical_parameters.size(); j++)
+ physical_parameters[j]->second = RealT(0);
+ }
+
+ // clear counts for cache
+#if PARAMS_BASE_PAIR_DIST
+ for (int i = 0; i <= BP_DIST_LAST_THRESHOLD; i++)
+ cache_score_base_pair_dist[i].second = RealT(0);
+#endif
+
+#if PARAMS_HAIRPIN_LENGTH
+ for (int i = 0; i <= D_MAX_HAIRPIN_LENGTH; i++)
+ cache_score_hairpin_length[i].second = RealT(0);
+#endif
+
+#if PARAMS_HELIX_LENGTH
+ for (int i = 0; i <= D_MAX_HELIX_LENGTH; i++)
+ cache_score_helix_length[i].second = RealT(0);
+#endif
+
+ for (int l1 = 0; l1 <= C_MAX_SINGLE_LENGTH; l1++)
+ for (int l2 = 0; l2 <= C_MAX_SINGLE_LENGTH; l2++)
+ cache_score_single[l1][l2].second = RealT(0);
+
+#if FAST_HELIX_LENGTHS
+ FillCounts(cache_score_helix_sums.begin(), cache_score_helix_sums.end(), RealT(0));
+#endif
+
+ // clear counts for profiles
+#if PROFILE
+
+#if PARAMS_BASE_PAIR
+ FillCounts(profile_score_base_pair.begin(), profile_score_base_pair.end(), RealT(0));
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ FillCounts(profile_score_terminal_mismatch.begin(), profile_score_terminal_mismatch.end(), RealT(0));
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ FillCounts(profile_score_hairpin_3_nucleotides.begin(), profile_score_hairpin_3_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ FillCounts(profile_score_hairpin_4_nucleotides.begin(), profile_score_hairpin_4_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ FillCounts(profile_score_bulge_0x1_nucleotides.begin(), profile_score_bulge_0x1_nucleotides.end(), RealT(0));
+ FillCounts(profile_score_bulge_1x0_nucleotides.begin(), profile_score_bulge_1x0_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ FillCounts(profile_score_bulge_0x2_nucleotides.begin(), profile_score_bulge_0x2_nucleotides.end(), RealT(0));
+ FillCounts(profile_score_bulge_2x0_nucleotides.begin(), profile_score_bulge_2x0_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ FillCounts(profile_score_bulge_0x3_nucleotides.begin(), profile_score_bulge_0x3_nucleotides.end(), RealT(0));
+ FillCounts(profile_score_bulge_3x0_nucleotides.begin(), profile_score_bulge_3x0_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ FillCounts(profile_score_internal_1x1_nucleotides.begin(), profile_score_internal_1x1_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ FillCounts(profile_score_internal_1x2_nucleotides.begin(), profile_score_internal_1x2_nucleotides.end(), RealT(0));
+ FillCounts(profile_score_internal_2x1_nucleotides.begin(), profile_score_internal_2x1_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ FillCounts(profile_score_internal_2x2_nucleotides.begin(), profile_score_internal_2x2_nucleotides.end(), RealT(0));
+#endif
+#if PARAMS_HELIX_STACKING
+ FillCounts(profile_score_helix_stacking.begin(), profile_score_helix_stacking.end(), RealT(0));
+#endif
+#if PARAMS_HELIX_CLOSING
+ FillCounts(profile_score_helix_closing.begin(), profile_score_helix_closing.end(), RealT(0));
+#endif
+#if PARAMS_DANGLE
+ FillCounts(profile_score_dangle_left.begin(), profile_score_dangle_left.end(), RealT(0));
+ FillCounts(profile_score_dangle_right.begin(), profile_score_dangle_right.end(), RealT(0));
+#endif
+
+#endif
+
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::FinalizeCounts()
+//
+// Apply any needed transformations to counts.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::FinalizeCounts()
+{
+#if FAST_HELIX_LENGTHS
+
+ // reverse helix partial sums
+ std::vector<std::pair<RealT,RealT> > reverse_sums(cache_score_helix_sums);
+
+ for (int i = 1; i <= L; i++)
+ {
+ for (int j = L; j >= i+3; j--)
+ {
+ // the "if" conditions here can be omitted
+
+ if (allow_paired[offset[i+1]+j-1])
+ {
+ CountBasePair(i+1,j-1,reverse_sums[(i+j)*L+j-i].second);
+ if (allow_paired[offset[i]+j])
+ {
+ CountHelixStacking(i,j,reverse_sums[(i+j)*L+j-i].second);
+ }
+ else
+ {
+ Assert(Abs(double(reverse_sums[(i+j)*L+j-i].second)) < 1e-8, "Should be zero.");
+ }
+ }
+ else
+ {
+ Assert(Abs(double(reverse_sums[(i+j)*L+j-i-2].second)) < 1e-8, "Should be zero.");
+ }
+
+ reverse_sums[(i+j)*L+j-i-2].second += reverse_sums[(i+j)*L+j-i].second;
+ }
+ }
+#endif
+
+ // perform transformations
+#if PARAMS_BASE_PAIR_DIST
+ for (int i = 0; i < D_MAX_BP_DIST_THRESHOLDS; i++)
+ for (int j = BP_DIST_THRESHOLDS[i]; j <= BP_DIST_LAST_THRESHOLD; j++)
+ score_base_pair_dist_at_least[i].second += cache_score_base_pair_dist[j].second;
+#endif
+
+#if PARAMS_HAIRPIN_LENGTH
+ for (int i = 0; i <= D_MAX_HAIRPIN_LENGTH; i++)
+ for (int j = i; j <= D_MAX_HAIRPIN_LENGTH; j++)
+ score_hairpin_length_at_least[i].second += cache_score_hairpin_length[j].second;
+#endif
+
+#if PARAMS_HELIX_LENGTH
+ for (int i = 0; i <= D_MAX_HELIX_LENGTH; i++)
+ for (int j = i; j <= D_MAX_HELIX_LENGTH; j++)
+ score_helix_length_at_least[i].second += cache_score_helix_length[j].second;
+#endif
+
+ // allocate temporary storage
+#if PARAMS_BULGE_LENGTH
+ RealT temp_cache_counts_bulge_length[D_MAX_BULGE_LENGTH+1];
+ std::fill(temp_cache_counts_bulge_length, temp_cache_counts_bulge_length + D_MAX_BULGE_LENGTH+1, RealT(0));
+#endif
+
+#if PARAMS_INTERNAL_LENGTH
+ RealT temp_cache_counts_internal_length[D_MAX_INTERNAL_LENGTH+1];
+ std::fill(temp_cache_counts_internal_length, temp_cache_counts_internal_length + D_MAX_INTERNAL_LENGTH+1, RealT(0));
+#endif
+
+#if PARAMS_INTERNAL_SYMMETRY
+ RealT temp_cache_counts_internal_symmetric_length[D_MAX_INTERNAL_SYMMETRIC_LENGTH+1];
+ std::fill(temp_cache_counts_internal_symmetric_length, temp_cache_counts_internal_symmetric_length + D_MAX_INTERNAL_SYMMETRIC_LENGTH+1, RealT(0));
+#endif
+
+#if PARAMS_INTERNAL_ASYMMETRY
+ RealT temp_cache_counts_internal_asymmetry[D_MAX_INTERNAL_ASYMMETRY+1];
+ std::fill(temp_cache_counts_internal_asymmetry, temp_cache_counts_internal_asymmetry + D_MAX_INTERNAL_ASYMMETRY+1, RealT(0));
+#endif
+
+ // compute contributions
+ for (int l1 = 0; l1 <= C_MAX_SINGLE_LENGTH; l1++)
+ {
+ for (int l2 = 0; l1+l2 <= C_MAX_SINGLE_LENGTH; l2++)
+ {
+ // skip over stacking pairs
+ if (l1 == 0 && l2 == 0) continue;
+
+ // consider bulge loops
+ if (l1 == 0 || l2 == 0)
+ {
+#if PARAMS_BULGE_LENGTH
+ temp_cache_counts_bulge_length[std::min(D_MAX_BULGE_LENGTH, l1+l2)] += cache_score_single[l1][l2].second;
+#endif
+ }
+
+ // consider internal loops
+ else
+ {
+#if PARAMS_INTERNAL_EXPLICIT
+ if (l1 <= D_MAX_INTERNAL_EXPLICIT_LENGTH && l2 <= D_MAX_INTERNAL_EXPLICIT_LENGTH)
+ score_internal_explicit[l1][l2].second += cache_score_single[l1][l2].second;
+#endif
+#if PARAMS_INTERNAL_LENGTH
+ temp_cache_counts_internal_length[std::min(D_MAX_INTERNAL_LENGTH, l1+l2)] += cache_score_single[l1][l2].second;
+#endif
+#if PARAMS_INTERNAL_SYMMETRY
+ if (l1 == l2)
+ temp_cache_counts_internal_symmetric_length[std::min(D_MAX_INTERNAL_SYMMETRIC_LENGTH, l1)] += cache_score_single[l1][l2].second;
+#endif
+#if PARAMS_INTERNAL_ASYMMETRY
+ temp_cache_counts_internal_asymmetry[std::min(D_MAX_INTERNAL_ASYMMETRY, Abs(l1-l2))] += cache_score_single[l1][l2].second;
+#endif
+ }
+ }
+ }
+
+#if PARAMS_BULGE_LENGTH
+ for (int i = 0; i <= D_MAX_BULGE_LENGTH; i++)
+ for (int j = i; j <= D_MAX_BULGE_LENGTH; j++)
+ score_bulge_length_at_least[i].second += temp_cache_counts_bulge_length[j];
+#endif
+
+#if PARAMS_INTERNAL_LENGTH
+ for (int i = 0; i <= D_MAX_INTERNAL_LENGTH; i++)
+ for (int j = i; j <= D_MAX_INTERNAL_LENGTH; j++)
+ score_internal_length_at_least[i].second += temp_cache_counts_internal_length[j];
+#endif
+
+#if PARAMS_INTERNAL_SYMMETRY
+ for (int i = 0; i <= D_MAX_INTERNAL_SYMMETRIC_LENGTH; i++)
+ for (int j = i; j <= D_MAX_INTERNAL_SYMMETRIC_LENGTH; j++)
+ score_internal_symmetric_length_at_least[i].second += temp_cache_counts_internal_symmetric_length[j];
+#endif
+
+#if PARAMS_INTERNAL_ASYMMETRY
+ for (int i = 0; i <= D_MAX_INTERNAL_ASYMMETRY; i++)
+ for (int j = i; j <= D_MAX_INTERNAL_ASYMMETRY; j++)
+ score_internal_asymmetry_at_least[i].second += temp_cache_counts_internal_asymmetry[j];
+#endif
+
+ // finalize profile counts
+#if PROFILE
+ for (int i = 0; i <= L; i++)
+ {
+ for (int j = 0; j <= L; j++)
+ {
+#if PARAMS_BASE_PAIR
+ {
+ const int pos[2] = {i, j};
+ ConvertProfileCount(profile_score_base_pair[i*(L+1)+j].second, pos, 2, reinterpret_cast<std::pair<RealT, RealT> *>(score_base_pair));
+ }
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+ {
+ const int pos[4] = {i, j+1, i+1, j};
+ ConvertProfileCount(profile_score_terminal_mismatch[i*(L+1)+j].second, pos, 4, reinterpret_cast<std::pair<RealT, RealT> *>(score_terminal_mismatch));
+ }
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+ if (j == 0)
+ {
+ const int pos[3] = {i+1, i+2, i+3};
+ ConvertProfileCount(profile_score_hairpin_3_nucleotides[i].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_hairpin_3_nucleotides));
+ }
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+ if (j == 0)
+ {
+ const int pos[4] = {i+1, i+2, i+3, i+4};
+ ConvertProfileCount(profile_score_hairpin_4_nucleotides[i].second, pos, 4, reinterpret_cast<std::pair<RealT, RealT> *>(score_hairpin_4_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[1] = {j};
+ ConvertProfileCount(profile_score_bulge_0x1_nucleotides[j].second, pos, 1, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_0x1_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[1] = {i+1};
+ ConvertProfileCount(profile_score_bulge_1x0_nucleotides[i].second, pos, 1, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_1x0_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[2] = {j-1, j};
+ ConvertProfileCount(profile_score_bulge_0x2_nucleotides[j].second, pos, 2, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_0x2_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[2] = {i+1, i+2};
+ ConvertProfileCount(profile_score_bulge_2x0_nucleotides[i].second, pos, 2, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_2x0_nucleotides));
+ }
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+ if (i == 0)
+ {
+ const int pos[3] = {j-2, j-1, j};
+ ConvertProfileCount(profile_score_bulge_0x3_nucleotides[j].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_0x3_nucleotides));
+ }
+ if (j == 0)
+ {
+ const int pos[3] = {i+1, i+2, i+3};
+ ConvertProfileCount(profile_score_bulge_3x0_nucleotides[i].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_bulge_3x0_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+ {
+ const int pos[2] = {i+1, j};
+ ConvertProfileCount(profile_score_internal_1x1_nucleotides[i*(L+1)+j].second, pos, 2, reinterpret_cast<std::pair<RealT, RealT> *>(score_internal_1x1_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+ {
+ const int pos[3] = {i+1, j-1, j};
+ ConvertProfileCount(profile_score_internal_1x2_nucleotides[i*(L+1)+j].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_internal_1x2_nucleotides));
+ }
+ {
+ const int pos[3] = {i+1, i+2, j};
+ ConvertProfileCount(profile_score_internal_2x1_nucleotides[i*(L+1)+j].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_internal_2x1_nucleotides));
+ }
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+ {
+ const int pos[4] = {i+1, i+2, j-1, j};
+ ConvertProfileCount(profile_score_internal_2x2_nucleotides[i*(L+1)+j].second, pos, 4, reinterpret_cast<std::pair<RealT, RealT> *>(score_internal_2x2_nucleotides));
+ }
+#endif
+#if PARAMS_HELIX_STACKING
+ {
+ const int pos[4] = {i, j, i+1, j-1};
+ ConvertProfileCount(profile_score_helix_stacking[i*(L+1)+j].second, pos, 4, reinterpret_cast<std::pair<RealT, RealT> *>(score_helix_stacking));
+ }
+#endif
+#if PARAMS_HELIX_CLOSING
+ {
+ const int pos[2] = {i, j+1};
+ ConvertProfileCount(profile_score_helix_closing[i*(L+1)+j].second, pos, 2, reinterpret_cast<std::pair<RealT, RealT> *>(score_helix_closing));
+ }
+#endif
+#if PARAMS_DANGLE
+ {
+ const int pos[3] = {i, j+1, i+1};
+ ConvertProfileCount(profile_score_dangle_left[i*(L+1)+j].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_dangle_left));
+ }
+ {
+ const int pos[3] = {i, j+1, j};
+ ConvertProfileCount(profile_score_dangle_right[i*(L+1)+j].second, pos, 3, reinterpret_cast<std::pair<RealT, RealT> *>(score_dangle_right));
+ }
+#endif
+ }
+ }
+#endif
+
+}
+
+#if PROFILE
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ConvertProfileCount()
+//
+// Convert profile count for a single location.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ConvertProfileCount(const RealT &profile_score, const int *pos, int dimensions, std::pair<RealT,RealT> *table)
+{
+ // consider all sequences
+ for (int k = 0; k < N; k++)
+ {
+ bool valid = true;
+ int index = 0;
+ int *seq = &A[k*(L+1)];
+
+ // extract letters of the pattern for the current sequence
+ for (int d = 0; valid && d < dimensions; d++)
+ {
+ if (pos[d] < 1 || pos[d] > L)
+ valid = false;
+ else
+ {
+ BYTE c = seq[pos[d]];
+ if (c == BYTE(alphabet.size()))
+ valid = false;
+ else
+ index = index * (M+1) + c;
+ }
+ }
+
+ // add contribution of pattern to score
+ if (valid) table[index].second += weights[k] * profile_score;
+ }
+}
+
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::UseLoss()
+//
+// Use per-position loss. A loss is incurred if true_mapping[i] !=
+// UNKNOWN && solution[i] != true_mapping[i].
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::UseLoss(const std::vector<int> &true_mapping, RealT example_loss)
+{
+ Assert(int(true_mapping.size()) == L+1, "Mapping of incorrect length!");
+ cache_initialized = false;
+
+ // compute number of pairings
+ int num_pairings = 0;
+ for (int i = 1; i <= L; i++)
+ if (true_mapping[i] != SStruct::UNKNOWN && true_mapping[i] != SStruct::UNPAIRED)
+ ++num_pairings;
+
+ RealT per_position_loss = example_loss / RealT(num_pairings);
+
+ // compute the penalty for each position that we declare to be unpaired
+ for (int i = 0; i <= L; i++)
+ {
+ loss_unpaired_position[i] =
+ ((i == 0 || true_mapping[i] == SStruct::UNKNOWN || true_mapping[i] == SStruct::UNPAIRED) ? RealT(0) : per_position_loss);
+ }
+
+ // now, compute the penalty for declaring ranges of positions to be unpaired;
+ // also, compute the penalty for matching positions s[i] and s[j].
+ for (int i = 0; i <= L; i++)
+ {
+ loss_unpaired[offset[i]+i] = RealT(0);
+ loss_paired[offset[i]+i] = RealT(NEG_INF);
+ for (int j = i+1; j <= L; j++)
+ {
+ loss_unpaired[offset[i]+j] =
+ loss_unpaired[offset[i]+j-1] +
+ loss_unpaired_position[j];
+ loss_paired[offset[i]+j] =
+ ((i == 0 || true_mapping[i] == SStruct::UNKNOWN || true_mapping[i] == SStruct::UNPAIRED || true_mapping[i] == j) ? RealT(0) : per_position_loss) +
+ ((i == 0 || true_mapping[j] == SStruct::UNKNOWN || true_mapping[j] == SStruct::UNPAIRED || true_mapping[j] == i) ? RealT(0) : per_position_loss);
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::UseConstraints()
+//
+// Use known secondary structure mapping.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::UseConstraints(const std::vector<int> &true_mapping)
+{
+ Assert(int(true_mapping.size()) == L+1, "Supplied mapping of incorrect length!");
+ cache_initialized = false;
+
+ // determine whether we allow each position to be unpaired
+ for (int i = 1; i <= L; i++)
+ {
+ allow_unpaired_position[i] =
+ (true_mapping[i] == SStruct::UNKNOWN ||
+ true_mapping[i] == SStruct::UNPAIRED);
+ }
+
+ // determine whether we allow ranges of positions to be unpaired;
+ // also determine which base-pairings we allow
+ for (int i = 0; i <= L; i++)
+ {
+ allow_unpaired[offset[i]+i] = 1;
+ allow_paired[offset[i]+i] = 0;
+ for (int j = i+1; j <= L; j++)
+ {
+ allow_unpaired[offset[i]+j] =
+ allow_unpaired[offset[i]+j-1] &&
+ allow_unpaired_position[j];
+ allow_paired[offset[i]+j] =
+ (i > 0 &&
+ (true_mapping[i] == SStruct::UNKNOWN || true_mapping[i] == j) &&
+ (true_mapping[j] == SStruct::UNKNOWN || true_mapping[j] == i) &&
+ (allow_noncomplementary || IsComplementary(i,j)));
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreJunctionA()
+// InferenceEngine::CountJunctionA()
+//
+// Returns the score for an asymmetric junction at positions i
+// and j such that (i,j+1) are base-paired and (i+1,j) are free.
+//
+// In an RNA structure, this would look like
+//
+// | |
+// x[i+1] x[j]
+// position i --------> o o <----- position j
+// x[i] -- x[j+1]
+// | |
+// x[i-1] -- x[j+2]
+//
+// Note that the difference between ScoreJunctionA() and
+// ScoreJunctionB() is that the former applies to multi-branch
+// loops whereas the latter is used for hairpin loops and
+// single-branch loops.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreJunctionA(int i, int j) const
+{
+ // i and j must be bounded away from the edges so that s[i] and s[j+1]
+ // refer to actual nucleotides. To allow us to use this macro when
+ // scoring the asymmetric junction for an exterior loop whose closing
+ // base pair include the first and last nucleotides of the sequence,
+ // we allow i to be as large as L and j to be as small as 0.
+
+ Assert(0 < i && i <= L && 0 <= j && j < L, "Invalid indices.");
+
+ return
+ RealT(0)
+#if PARAMS_HELIX_CLOSING
+#if PROFILE
+ + profile_score_helix_closing[i*(L+1)+j].first
+#else
+ + score_helix_closing[s[i]][s[j+1]].first
+#endif
+#endif
+#if PARAMS_DANGLE
+#if PROFILE
+ + (i < L ? profile_score_dangle_left[i*(L+1)+j].first : RealT(0))
+ + (j > 0 ? profile_score_dangle_right[i*(L+1)+j].first : RealT(0))
+#else
+ + (i < L ? score_dangle_left[s[i]][s[j+1]][s[i+1]].first : RealT(0))
+ + (j > 0 ? score_dangle_right[s[i]][s[j+1]][s[j]].first : RealT(0))
+#endif
+#endif
+ ;
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountJunctionA(int i, int j, RealT value)
+{
+ Assert(0 < i && i <= L && 0 <= j && j < L, "Invalid indices.");
+
+#if PARAMS_HELIX_CLOSING
+#if PROFILE
+ profile_score_helix_closing[i*(L+1)+j].second += value;
+#else
+ score_helix_closing[s[i]][s[j+1]].second += value;
+#endif
+#endif
+#if PARAMS_DANGLE
+#if PROFILE
+ if (i < L) profile_score_dangle_left[i*(L+1)+j].second += value;
+ if (j > 0) profile_score_dangle_right[i*(L+1)+j].second += value;
+#else
+ if (i < L) score_dangle_left[s[i]][s[j+1]][s[i+1]].second += value;
+ if (j > 0) score_dangle_right[s[i]][s[j+1]][s[j]].second += value;
+#endif
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreJunctionB()
+// InferenceEngine::CountJunctionB()
+//
+// Returns the score for a symmetric junction at positions i
+// and j such that (i,j+1) are base-paired and (i+1,j) are free.
+//
+// In an RNA structure, this would look like
+//
+// | |
+// x[i+1] x[j]
+// position i --------> o o <----- position j
+// x[i] -- x[j+1]
+// | |
+// x[i-1] -- x[j+2]
+//
+// Note that the difference between ScoreJunctionA() and
+// ScoreJunctionB() is that the former applies to multi-branch
+// loops whereas the latter is used for hairpin loops and
+// single-branch loops.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreJunctionB(int i, int j) const
+{
+ // The bounds here are similar to the asymmetric junction case, with
+ // the main difference being that symmetric junctions are not allowed
+ // for the exterior loop. For this reason, i and j are bounded away
+ // from the edges of the sequence (i.e., i < L && j > 0).
+
+ Assert(0 < i && i < L && 0 < j && j < L, "Invalid indices.");
+
+ return RealT(0)
+#if PARAMS_HELIX_CLOSING
+#if PROFILE
+ + profile_score_helix_closing[i*(L+1)+j].first
+#else
+ + score_helix_closing[s[i]][s[j+1]].first
+#endif
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+#if PROFILE
+ + profile_score_terminal_mismatch[i*(L+1)+j].first
+#else
+ + score_terminal_mismatch[s[i]][s[j+1]][s[i+1]][s[j]].first
+#endif
+#endif
+ ;
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountJunctionB(int i, int j, RealT value)
+{
+ Assert(0 < i && i < L && 0 < j && j < L, "Invalid indices.");
+
+#if PARAMS_HELIX_CLOSING
+#if PROFILE
+ profile_score_helix_closing[i*(L+1)+j].second += value;
+#else
+ score_helix_closing[s[i]][s[j+1]].second += value;
+#endif
+#endif
+#if PARAMS_TERMINAL_MISMATCH
+#if PROFILE
+ profile_score_terminal_mismatch[i*(L+1)+j].second += value;
+#else
+ score_terminal_mismatch[s[i]][s[j+1]][s[i+1]][s[j]].second += value;
+#endif
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreBasePair()
+// InferenceEngine::CountBasePair()
+//
+// Returns the score for a base-pairing between letters i and j.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreBasePair(int i, int j) const
+{
+
+ // Clearly, i and j must refer to actual letters of the sequence,
+ // and no letter may base-pair to itself.
+
+ Assert(0 < i && i <= L && 0 < j && j <= L && i != j, "Invalid base-pair");
+
+ return RealT(0)
+#if defined(HAMMING_LOSS)
+ + loss_paired[offset[i]+j]
+#endif
+#if PARAMS_BASE_PAIR
+#if PROFILE
+ + profile_score_base_pair[i*(L+1)+j].first
+#else
+ + score_base_pair[s[i]][s[j]].first
+#endif
+#endif
+#if PARAMS_BASE_PAIR_DIST
+ + cache_score_base_pair_dist[std::min(Abs(j - i), BP_DIST_LAST_THRESHOLD)].first
+#endif
+ ;
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountBasePair(int i, int j, RealT value)
+{
+ Assert(0 < i && i <= L && 0 < j && j <= L && i != j, "Invalid base-pair");
+
+#if PARAMS_BASE_PAIR
+#if PROFILE
+ profile_score_base_pair[i*(L+1)+j].second += value;
+#else
+ score_base_pair[s[i]][s[j]].second += value;
+#endif
+#endif
+#if PARAMS_BASE_PAIR_DIST
+ cache_score_base_pair_dist[std::min(Abs(j - i), BP_DIST_LAST_THRESHOLD)].second += value;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreHairpin()
+// InferenceEngine::CountHairpin()
+//
+// Returns the score for a hairpin spanning positions i to j.
+//
+// In an RNA structure, this would look like
+//
+// ...
+// / \.
+// x[i+2] x[j-1]
+// | |
+// x[i+1] x[j]
+// position i --------> o o <----- position j
+// x[i] -- x[j+1]
+// | |
+// x[i-1] -- x[j+2]
+//
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreHairpin(int i, int j) const
+{
+ // The constraints i > 0 && j < L ensure that s[i] and s[j+1] refer to
+ // nucleotides which could base-pair. The remaining constraint ensures
+ // that only valid hairpins are considered.
+
+ Assert(0 < i && i + C_MIN_HAIRPIN_LENGTH <= j && j < L, "Hairpin boundaries invalid.");
+
+ return
+ ScoreUnpaired(i,j)
+ + ScoreJunctionB(i,j)
+#if PARAMS_HAIRPIN_LENGTH
+ + cache_score_hairpin_length[std::min(j - i, D_MAX_HAIRPIN_LENGTH)].first
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+#if PROFILE
+ + (j - i == 3 ? profile_score_hairpin_3_nucleotides[i].first : RealT(0))
+#else
+ + (j - i == 3 ? score_hairpin_3_nucleotides[s[i+1]][s[i+2]][s[i+3]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+#if PROFILE
+ + (j - i == 4 ? profile_score_hairpin_4_nucleotides[i].first : RealT(0))
+#else
+ + (j - i == 4 ? score_hairpin_4_nucleotides[s[i+1]][s[i+2]][s[i+3]][s[i+4]].first : RealT(0))
+#endif
+#endif
+ ;
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountHairpin(int i, int j, RealT value)
+{
+ Assert(0 < i && i + C_MIN_HAIRPIN_LENGTH <= j && j < L, "Hairpin boundaries invalid.");
+
+ CountUnpaired(i,j,value);
+ CountJunctionB(i,j,value);
+#if PARAMS_HAIRPIN_LENGTH
+ cache_score_hairpin_length[std::min(j - i, D_MAX_HAIRPIN_LENGTH)].second += value;
+#endif
+#if PARAMS_HAIRPIN_3_NUCLEOTIDES
+#if PROFILE
+ if (j - i == 3) profile_score_hairpin_3_nucleotides[i].second += value;
+#else
+ if (j - i == 3) score_hairpin_3_nucleotides[s[i+1]][s[i+2]][s[i+3]].second += value;
+#endif
+#endif
+#if PARAMS_HAIRPIN_4_NUCLEOTIDES
+#if PROFILE
+ if (j - i == 4) profile_score_hairpin_4_nucleotides[i].second += value;
+#else
+ if (j - i == 4) score_hairpin_4_nucleotides[s[i+1]][s[i+2]][s[i+3]][s[i+4]].second += value;
+#endif
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreHelix()
+// InferenceEngine::CountHelix()
+//
+// Returns the score for a helix of length m starting at positions
+// i and j. All base-pairs except for x[i+1]-x[j] are scored.
+//
+// In an RNA structure, this would look like
+//
+// ...
+// \ /
+// position i+m -------> o o <----- position j-m
+// x[i+3] -- x[j-2]
+// | |
+// x[i+2] -- x[j-1]
+// | |
+// x[i+1] -- x[j]
+// position i ---------> o o <----- position j
+// / \.
+//
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreHelix(int i, int j, int m) const
+{
+ // First, i >= 0 && j <= L are obvious sanity-checks to make sure that
+ // things are within range. The check that i+2*m <= j ensures that there
+ // are enough nucleotides to allow a helix of length m.
+
+ Assert(0 <= i && i + 2 * m <= j && j <= L, "Helix boundaries invalid.");
+ Assert(2 <= m && m <= D_MAX_HELIX_LENGTH, "Helix length invalid.");
+
+#if FAST_HELIX_LENGTHS
+
+ return
+ cache_score_helix_sums[(i+j+1)*L+j-i-1].first - cache_score_helix_sums[(i+j+1)*L+j-i-m-m+1].first
+#if PARAMS_HELIX_LENGTH
+ + cache_score_helix_length[m].first
+#endif
+ ;
+
+#else
+
+ RealT ret = RealT(0);
+ for (int k = 1; k < m; k++)
+ ret += ScoreHelixStacking(i+k,j-k+1) + ScoreBasePair(i+k+1,j-k);
+
+#if PARAMS_HELIX_LENGTH
+ ret += cache_score_helix_length[m].first;
+#endif
+
+ return ret;
+
+#endif
+
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountHelix(int i, int j, int m, RealT value)
+{
+ Assert(0 <= i && i + 2 * m <= j && j <= L, "Helix boundaries invalid.");
+ Assert(2 <= m && m <= D_MAX_HELIX_LENGTH, "Helix length invalid.");
+
+#if FAST_HELIX_LENGTHS
+
+ cache_score_helix_sums[(i+j+1)*L+j-i-1].second += value;
+ cache_score_helix_sums[(i+j+1)*L+j-i-m-m+1].second -= value;
+
+#else
+
+ for (int k = 1; k < m; k++)
+ {
+ CountHelixStacking(i+k,j-k+1,value);
+ CountBasePair(i+k+1,j-k,value);
+ }
+
+#endif
+
+#if PARAMS_HELIX_LENGTH
+ cache_score_helix_length[m].second += value;
+#endif
+
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreSingleNucleotides()
+// InferenceEngine::CountSingleNucleotides()
+//
+// Returns the score for nucleotides in a single-branch loop
+// spanning i to j and p to q.
+//
+// In an RNA structure, this would look like
+//
+// ... ...
+// | |
+// x[p+1] -- x[q]
+// position p --------> o o <----- position q
+// x[p] x[q+1]
+// | |
+// ... ...
+// | |
+// x[i+1] x[j]
+// position i --------> o o <----- position j
+// x[i] -- x[j+1]
+// | |
+// x[i-1] -- x[j+2]
+//
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreSingleNucleotides(int i, int j, int p, int q) const
+{
+ // Nucleotides s[i] and s[j+1] must exist, hence the conditions i > 0 and j < L.
+ // the condition p+2 <= q comes from the fact that there must be enough room for
+ // at least one nucleotide on the other side of the single-branch loop. This
+ // loop should only be used for dealing with single-branch loops, not stacking pairs.
+
+ Assert(0 < i && i <= p && p + 2 <= q && q <= j && j < L, "Single-branch loop boundaries invalid.");
+
+#if (!defined(NDEBUG) || PARAMS_BULGE_0x1_NUCLEOTIDES || PARAMS_BULGE_0x2_NUCLEOTIDES || PARAMS_BULGE_0x3_NUCLEOTIDES || PARAMS_INTERNAL_1x1_NUCLEOTIDES || PARAMS_INTERNAL_1x2_NUCLEOTIDES || PARAMS_INTERNAL_2x2_NUCLEOTIDES)
+ const int l1 = p - i;
+ const int l2 = j - q;
+
+ Assert(l1 + l2 > 0 && l1 >= 0 && l2 >= 0 && l1 + l2 <= C_MAX_SINGLE_LENGTH, "Invalid single-branch loop size.");
+#endif
+
+ return
+ ScoreUnpaired(i,p)
+ + ScoreUnpaired(q,j)
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 0 && l2 == 1 ? profile_score_bulge_0x1_nucleotides[j].first : RealT(0))
+ + (l1 == 1 && l2 == 0 ? profile_score_bulge_1x0_nucleotides[i].first : RealT(0))
+#else
+ + (l1 == 0 && l2 == 1 ? score_bulge_0x1_nucleotides[s[j]].first : RealT(0))
+ + (l1 == 1 && l2 == 0 ? score_bulge_1x0_nucleotides[s[i+1]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 0 && l2 == 2 ? profile_score_bulge_0x2_nucleotides[j].first : RealT(0))
+ + (l1 == 2 && l2 == 0 ? profile_score_bulge_2x0_nucleotides[i].first : RealT(0))
+#else
+ + (l1 == 0 && l2 == 2 ? score_bulge_0x2_nucleotides[s[j-1]][s[j]].first : RealT(0))
+ + (l1 == 2 && l2 == 0 ? score_bulge_2x0_nucleotides[s[i+1]][s[i+2]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 0 && l2 == 3 ? profile_score_bulge_0x3_nucleotides[j].first : RealT(0))
+ + (l1 == 3 && l2 == 0 ? profile_score_bulge_3x0_nucleotides[i].first : RealT(0))
+#else
+ + (l1 == 0 && l2 == 3 ? score_bulge_0x3_nucleotides[s[j-2]][s[j-1]][s[j]].first : RealT(0))
+ + (l1 == 3 && l2 == 0 ? score_bulge_3x0_nucleotides[s[i+1]][s[i+2]][s[i+3]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 1 && l2 == 1 ? profile_score_internal_1x1_nucleotides[i*(L+1)+j].first : RealT(0))
+#else
+ + (l1 == 1 && l2 == 1 ? score_internal_1x1_nucleotides[s[i+1]][s[j]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 1 && l2 == 2 ? profile_score_internal_1x2_nucleotides[i*(L+1)+j].first : RealT(0))
+ + (l1 == 2 && l2 == 1 ? profile_score_internal_2x1_nucleotides[i*(L+1)+j].first : RealT(0))
+#else
+ + (l1 == 1 && l2 == 2 ? score_internal_1x2_nucleotides[s[i+1]][s[j-1]][s[j]].first : RealT(0))
+ + (l1 == 2 && l2 == 1 ? score_internal_2x1_nucleotides[s[i+1]][s[i+2]][s[j]].first : RealT(0))
+#endif
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+#if PROFILE
+ + (l1 == 2 && l2 == 2 ? profile_score_internal_2x2_nucleotides[i*(L+1)+j].first : RealT(0))
+#else
+ + (l1 == 2 && l2 == 2 ? score_internal_2x2_nucleotides[s[i+1]][s[i+2]][s[j-1]][s[j]].first : RealT(0))
+#endif
+#endif
+ ;
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountSingleNucleotides(int i, int j, int p, int q, RealT value)
+{
+ Assert(0 < i && i <= p && p + 2 <= q && q <= j && j < L, "Single-branch loop boundaries invalid.");
+
+#if (!defined(NDEBUG) || PARAMS_BULGE_0x1_NUCLEOTIDES || PARAMS_BULGE_0x2_NUCLEOTIDES || PARAMS_BULGE_0x3_NUCLEOTIDES || PARAMS_INTERNAL_1x1_NUCLEOTIDES || PARAMS_INTERNAL_1x2_NUCLEOTIDES || PARAMS_INTERNAL_2x2_NUCLEOTIDES)
+ const int l1 = p - i;
+ const int l2 = j - q;
+
+ Assert(l1 + l2 > 0 && l1 >= 0 && l2 >= 0 && l1 + l2 <= C_MAX_SINGLE_LENGTH, "Invalid single-branch loop size.");
+#endif
+
+ CountUnpaired(i,p,value);
+ CountUnpaired(q,j,value);
+#if PARAMS_BULGE_0x1_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 0 && l2 == 1) profile_score_bulge_0x1_nucleotides[j].second += value;
+ if (l1 == 1 && l2 == 0) profile_score_bulge_1x0_nucleotides[i].second += value;
+#else
+ if (l1 == 0 && l2 == 1) score_bulge_0x1_nucleotides[s[j]].second += value;
+ if (l1 == 1 && l2 == 0) score_bulge_1x0_nucleotides[s[i+1]].second += value;
+#endif
+#endif
+#if PARAMS_BULGE_0x2_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 0 && l2 == 2) profile_score_bulge_0x2_nucleotides[j].second += value;
+ if (l1 == 2 && l2 == 0) profile_score_bulge_2x0_nucleotides[i].second += value;
+#else
+ if (l1 == 0 && l2 == 2) score_bulge_0x2_nucleotides[s[j-1]][s[j]].second += value;
+ if (l1 == 2 && l2 == 0) score_bulge_2x0_nucleotides[s[i+1]][s[i+2]].second += value;
+#endif
+#endif
+#if PARAMS_BULGE_0x3_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 0 && l2 == 3) profile_score_bulge_0x3_nucleotides[j].second += value;
+ if (l1 == 3 && l2 == 0) profile_score_bulge_3x0_nucleotides[i].second += value;
+#else
+ if (l1 == 0 && l2 == 3) score_bulge_0x3_nucleotides[s[j-2]][s[j-1]][s[j]].second += value;
+ if (l1 == 3 && l2 == 0) score_bulge_3x0_nucleotides[s[i+1]][s[i+2]][s[i+3]].second += value;
+#endif
+#endif
+#if PARAMS_INTERNAL_1x1_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 1 && l2 == 1) profile_score_internal_1x1_nucleotides[i*(L+1)+j].second += value;
+#else
+ if (l1 == 1 && l2 == 1) score_internal_1x1_nucleotides[s[i+1]][s[j]].second += value;
+#endif
+#endif
+#if PARAMS_INTERNAL_1x2_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 1 && l2 == 2) profile_score_internal_1x2_nucleotides[i*(L+1)+j].second += value;
+ if (l1 == 2 && l2 == 1) profile_score_internal_2x1_nucleotides[i*(L+1)+j].second += value;
+#else
+ if (l1 == 1 && l2 == 2) score_internal_1x2_nucleotides[s[i+1]][s[j-1]][s[j]].second += value;
+ if (l1 == 2 && l2 == 1) score_internal_2x1_nucleotides[s[i+1]][s[i+2]][s[j]].second += value;
+#endif
+#endif
+#if PARAMS_INTERNAL_2x2_NUCLEOTIDES
+#if PROFILE
+ if (l1 == 2 && l2 == 2) profile_score_internal_2x2_nucleotides[i*(L+1)+j].second += value;
+#else
+ if (l1 == 2 && l2 == 2) score_internal_2x2_nucleotides[s[i+1]][s[i+2]][s[j-1]][s[j]].second += value;
+#endif
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ScoreSingle()
+// InferenceEngine::CountSingle()
+//
+// Returns the score for a single-branch loop spanning i to j and
+// p to q.
+//
+// In an RNA structure, this would look like
+//
+// ... ...
+// | |
+// x[p+1] -- x[q]
+// position p --------> o o <----- position q
+// x[p] x[q+1]
+// | |
+// ... ...
+// | |
+// x[i+1] x[j]
+// position i --------> o o <----- position j
+// x[i] -- x[j+1]
+// | |
+// x[i-1] -- x[j+2]
+//
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ScoreSingle(int i, int j, int p, int q) const
+{
+ const int l1 = p - i;
+ const int l2 = j - q;
+
+ // Nucleotides s[i] and s[j+1] must exist, hence the conditions i > 0 and j < L.
+ // the condition p+2 <= q comes from the fact that there must be enough room for
+ // at least one nucleotide on the other side of the single-branch loop. This
+ // loop should only be used for dealing with single-branch loops, not stacking pairs.
+
+ Assert(0 < i && i <= p && p + 2 <= q && q <= j && j < L, "Single-branch loop boundaries invalid.");
+ Assert(l1 + l2 > 0 && l1 >= 0 && l2 >= 0 && l1 + l2 <= C_MAX_SINGLE_LENGTH, "Invalid single-branch loop size.");
+
+ return
+ cache_score_single[l1][l2].first
+ + ScoreBasePair(p+1,q)
+ + ScoreJunctionB(i,j)
+ + ScoreJunctionB(q,p)
+ + ScoreSingleNucleotides(i,j,p,q);
+}
+
+template<class RealT>
+inline void InferenceEngine<RealT>::CountSingle(int i, int j, int p, int q, RealT value)
+{
+ const int l1 = p - i;
+ const int l2 = j - q;
+
+ Assert(0 < i && i <= p && p + 2 <= q && q <= j && j < L, "Single-branch loop boundaries invalid.");
+ Assert(l1 + l2 > 0 && l1 >= 0 && l2 >= 0 && l1 + l2 <= C_MAX_SINGLE_LENGTH, "Invalid single-branch loop size.");
+
+ cache_score_single[l1][l2].second += value;
+ CountBasePair(p+1,q,value);
+ CountJunctionB(i,j,value);
+ CountJunctionB(q,p,value);
+ CountSingleNucleotides(i,j,p,q,value);
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::EncodeTraceback()
+// InferenceEngine::DecodeTraceback()
+//
+// Encode and decode traceback as an integer. Here, i encodes
+// a traceback type, and j encodes a length.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline int InferenceEngine<RealT>::EncodeTraceback(int i, int j) const
+{
+ Assert(0 <= i && i < NUM_TRACEBACK_TYPES && j >= 0, "Invalid values to encode as traceback.");
+ return (j * NUM_TRACEBACK_TYPES) + i;
+}
+
+template<class RealT>
+inline std::pair<int,int> InferenceEngine<RealT>::DecodeTraceback(int s) const
+{
+ return std::make_pair (s % NUM_TRACEBACK_TYPES, s / NUM_TRACEBACK_TYPES);
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeViterbi()
+//
+// Run Viterbi algorithm.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ComputeViterbi()
+{
+ InitializeCache();
+
+#if SHOW_TIMINGS
+ double starting_time = GetSystemTime();
+#endif
+
+#if CANDIDATE_LIST
+ std::vector<int> candidates;
+ candidates.reserve(L+1);
+ long long int candidates_seen = 0;
+ long long int candidates_possible = 0;
+#endif
+
+ // initialization
+
+ F5t.clear(); F5t.resize(L+1, -1);
+ FCt.clear(); FCt.resize(SIZE, -1);
+ FMt.clear(); FMt.resize(SIZE, -1);
+ FM1t.clear(); FM1t.resize(SIZE, -1);
+
+ F5v.clear(); F5v.resize(L+1, RealT(NEG_INF));
+ FCv.clear(); FCv.resize(SIZE, RealT(NEG_INF));
+ FMv.clear(); FMv.resize(SIZE, RealT(NEG_INF));
+ FM1v.clear(); FM1v.resize(SIZE, RealT(NEG_INF));
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ FEt.clear(); FEt.resize(SIZE, -1);
+ FNt.clear(); FNt.resize(SIZE, -1);
+ FEv.clear(); FEv.resize(SIZE, RealT(NEG_INF));
+ FNv.clear(); FNv.resize(SIZE, RealT(NEG_INF));
+#endif
+
+ for (int i = L; i >= 0; i--)
+ {
+
+#if CANDIDATE_LIST
+ candidates.clear();
+#endif
+
+ for (int j = i; j <= L; j++)
+ {
+ // FM2[i,j] = MAX (i<k<j : FM1[i,k] + FM[k,j])
+
+ RealT FM2v = RealT(NEG_INF);
+ int FM2t = -1;
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ UPDATE_MAX(FM2v, FM2t, FM1v[offset[i]+k] + FMv[offset[k]+j], k);
+
+#else
+
+#if !CANDIDATE_LIST
+
+ if (i+2 <= j)
+ {
+ RealT *p1 = &(FM1v[offset[i]+i+1]);
+ RealT *p2 = &(FMv[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ UPDATE_MAX(FM2v, FM2t, (*p1) + (*p2), k);
+ ++p1;
+ p2 += L-k;
+ }
+ }
+
+#else
+
+ for (register size_t kp = 0; kp < candidates.size(); kp++)
+ {
+ register const int k = candidates[kp];
+ UPDATE_MAX(FM2v, FM2t, FM1v[offset[i]+k] + FMv[offset[k]+j], k);
+ }
+
+ candidates_seen += (long long int) candidates.size();
+ candidates_possible += (long long int) std::max(j-i-1,0);
+
+#endif
+
+#endif
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+
+ // FN[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // and the next interaction is not a stacking pair
+ //
+ // = MAX [ScoreHairpin(i,j),
+ // MAX (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + MAX (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ UPDATE_MAX(best_v, best_t, ScoreHairpin(i,j), EncodeTraceback(TB_FN_HAIRPIN,0));
+
+ // compute MAX (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ UPDATE_MAX(best_v, best_t,
+ ScoreSingle(i,j,p,q) + FCv[offset[p+1]+q-1],
+ EncodeTraceback(TB_FN_SINGLE,(p-i)*(C_MAX_SINGLE_LENGTH+1)+j-q));
+ }
+ }
+
+#else
+
+ {
+ RealT score_other = ScoreJunctionB(i,j);
+
+ int bp = -1, bq = -1;
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCv[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ RealT score = (score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) +
+ ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+
+ if (score > best_v)
+ {
+ best_v = score;
+ bp = p;
+ bq = q;
+ }
+ }
+ }
+
+ if (bp != -1 && bq != -1)
+ best_t = EncodeTraceback(TB_FN_SINGLE,(bp-i)*(C_MAX_SINGLE_LENGTH+1)+j-bq);
+ }
+#endif
+
+ // compute MAX (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ UPDATE_MAX(best_v, best_t,
+ FM2v + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase(),
+ EncodeTraceback(TB_FN_BIFURCATION,FM2t));
+
+ FNv[offset[i]+j] = best_v;
+ FNt[offset[i]+j] = best_t;
+ }
+
+ // FE[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) ... (i-D+1,j+D) are
+ // already base-paired
+ //
+ // = MAX [ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1] if i+2<=j,
+ // FN(i,j)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1]
+
+ if (i+2 <= j && allow_paired[offset[i+1]+j])
+ {
+ UPDATE_MAX(best_v, best_t,
+ ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FEv[offset[i+1]+j-1],
+ EncodeTraceback(TB_FE_STACKING,0));
+ }
+
+ // compute FN(i,j)
+
+ UPDATE_MAX(best_v, best_t, FNv[offset[i]+j], EncodeTraceback(TB_FE_FN,0));
+
+ FEv[offset[i]+j] = best_v;
+ FEt[offset[i]+j] = best_t;
+ }
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // but (i-1,j+2) are not
+ //
+ // = MAX [ScoreIsolated() + FN(i,j),
+ // MAX (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k)),
+ // FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute ScoreIsolated() + FN(i,j)
+
+ UPDATE_MAX(best_v, best_t, ScoreIsolated() + FNv[offset[i]+j], EncodeTraceback(TB_FC_FN,0));
+
+ // compute MAX (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k))
+
+ bool allowed = true;
+ for (int k = 2; k < D_MAX_HELIX_LENGTH; k++)
+ {
+ if (i + 2*k - 2 > j) break;
+ if (!allow_paired[offset[i+k-1]+j-k+2]) { allowed = false; break; }
+ UPDATE_MAX(best_v, best_t, ScoreHelix(i-1,j+1,k) + FNv[offset[i+k-1]+j-k+1], EncodeTraceback(TB_FC_HELIX,k));
+ }
+
+ // compute FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+
+ if (i + 2*D_MAX_HELIX_LENGTH-2 <= j)
+ {
+ if (allowed && allow_paired[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+2])
+ UPDATE_MAX(best_v, best_t, ScoreHelix(i-1,j+1,D_MAX_HELIX_LENGTH) +
+ FEv[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+1],
+ EncodeTraceback(TB_FC_FE,0));
+ }
+ FCv[offset[i]+j] = best_v;
+ FCt[offset[i]+j] = best_t;
+ }
+
+#else
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ //
+ // = MAX [ScoreHairpin(i,j),
+ // MAX (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + MAX (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ UPDATE_MAX(best_v, best_t, ScoreHairpin(i,j), EncodeTraceback(TB_FC_HAIRPIN,0));
+
+ // compute MAX (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ UPDATE_MAX(best_v, best_t,
+ FCv[offset[p+1]+q-1] +
+ (p == i && q == j ? ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : ScoreSingle(i,j,p,q)),
+ EncodeTraceback(TB_FC_SINGLE,(p-i)*(C_MAX_SINGLE_LENGTH+1)+j-q));
+ }
+ }
+
+#else
+
+ {
+ RealT score_helix = (i+2 <= j ? ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : 0);
+ RealT score_other = ScoreJunctionB(i,j);
+
+ int bp = -1, bq = -1;
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCv[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ RealT score = (p == i && q == j) ?
+ (score_helix + FCptr[q]) :
+ (score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) + ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+
+ if (score > best_v)
+ {
+ best_v = score;
+ bp = p;
+ bq = q;
+ }
+ }
+ }
+
+ if (bp != -1 && bq != -1)
+ best_t = EncodeTraceback(TB_FC_SINGLE,(bp-i)*(C_MAX_SINGLE_LENGTH+1)+j-bq);
+ }
+#endif
+
+ // compute MAX (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ UPDATE_MAX(best_v, best_t,
+ FM2v + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase(),
+ EncodeTraceback(TB_FC_BIFURCATION,FM2t));
+
+ FCv[offset[i]+j] = best_v;
+ FCt[offset[i]+j] = best_t;
+ }
+
+#endif
+
+ // FM1[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop containing a (k+1,j) base pair
+ // preceded by 5' unpaired nucleotides from i to k
+ // for some i <= k <= j-2
+ //
+ // = MAX [FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j) if i+2<=j,
+ // FM1[i+1,j] + b if i+2<=j]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j)
+
+ if (allow_paired[offset[i+1]+j])
+ {
+ UPDATE_MAX(best_v, best_t,
+ FCv[offset[i+1]+j-1] + ScoreJunctionA(j,i) +
+ ScoreMultiPaired() + ScoreBasePair(i+1,j),
+ EncodeTraceback(TB_FM1_PAIRED,0));
+ }
+
+ // compute FM1[i+1,j] + b
+
+ if (allow_unpaired_position[i+1])
+ {
+ UPDATE_MAX(best_v, best_t,
+ FM1v[offset[i+1]+j] + ScoreMultiUnpaired(i+1),
+ EncodeTraceback(TB_FM1_UNPAIRED,0));
+ }
+
+ FM1v[offset[i]+j] = best_v;
+ FM1t[offset[i]+j] = best_t;
+ }
+
+#if CANDIDATE_LIST
+
+ // If there exists some i <= k < j for which
+ // FM1[i,k] + FM[k,j] >= FM1[i,j]
+ // then for all j' > j, we know that
+ // FM1[i,k] + FM[k,j'] >= FM1[i,j] + FM[j,j'].
+ // since
+ // FM[k,j'] >= FM[k,j] + FM[j,j'].
+ //
+ // From this, it follows that we only need to consider
+ // j as a candidate partition point for future j' values
+ // only if FM1[i,j] > FM1[i,k] + FM[k,j] for all k.
+
+ if (FM1v[offset[i]+j] > FM2v)
+ candidates.push_back(j);
+#endif
+
+ // FM[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop which contains at least one
+ // helix
+ //
+ // = MAX [MAX (i<k<j : FM1[i,k] + FM[k,j]),
+ // FM[i,j-1] + b,
+ // FM1[i,j]]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute MAX (i<k<j : FM1[i,k] + FM[k,j])
+
+ UPDATE_MAX(best_v, best_t, FM2v, EncodeTraceback(TB_FM_BIFURCATION,FM2t));
+
+ // compute FM[i,j-1] + b
+
+ if (allow_unpaired_position[j])
+ {
+ UPDATE_MAX(best_v, best_t,
+ FMv[offset[i]+j-1] + ScoreMultiUnpaired(j),
+ EncodeTraceback(TB_FM_UNPAIRED,0));
+ }
+
+ // compute FM1[i,j]
+
+ UPDATE_MAX(best_v, best_t, FM1v[offset[i]+j], EncodeTraceback(TB_FM_FM1,0));
+
+ FMv[offset[i]+j] = best_v;
+ FMt[offset[i]+j] = best_t;
+ }
+ }
+ }
+
+ F5v[0] = RealT(0);
+ F5t[0] = EncodeTraceback(TB_F5_ZERO,0);
+ for (int j = 1; j <= L; j++)
+ {
+ // F5[j] = optimal energy for substructure between positions 0 and j
+ // (or 0 if j = 0)
+ //
+ // = MAX [F5[j-1] + ScoreExternalUnpaired(),
+ // MAX (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))]
+
+ RealT best_v = RealT(NEG_INF);
+ int best_t = -1;
+
+ // compute F5[j-1] + ScoreExternalUnpaired()
+
+ if (allow_unpaired_position[j])
+ {
+ UPDATE_MAX(best_v, best_t,
+ F5v[j-1] + ScoreExternalUnpaired(j),
+ EncodeTraceback(TB_F5_UNPAIRED,0));
+ }
+
+ // compute MAX (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))
+
+ for (int k = 0; k < j; k++)
+ {
+ if (allow_paired[offset[k+1]+j])
+ {
+ UPDATE_MAX(best_v, best_t,
+ F5v[k] + FCv[offset[k+1]+j-1] + ScoreExternalPaired() +
+ ScoreBasePair(k+1,j) + ScoreJunctionA(j,k),
+ EncodeTraceback(TB_F5_BIFURCATION,k));
+ }
+ }
+
+ F5v[j] = best_v;
+ F5t[j] = best_t;
+ }
+
+#if SHOW_TIMINGS
+ std::cerr << "Viterbi score: " << F5v[L] << " (" << GetSystemTime() - starting_time << " seconds)" << std::endl;
+#endif
+
+#if CANDIDATE_LIST
+ //std::cerr << "Candidates: " << candidates_seen << "/" << candidates_possible << " = " << double(candidates_seen)/candidates_possible << std::endl;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::GetViterbiScore()
+//
+// Return Viterbi score for a sequence.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::GetViterbiScore() const
+{
+ return F5v[L];
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::PredictPairingsViterbi()
+//
+// Use Viterbi decoding to predict pairings.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<int> InferenceEngine<RealT>::PredictPairingsViterbi() const
+{
+ std::vector<int> solution(L+1,SStruct::UNPAIRED);
+ solution[0] = SStruct::UNKNOWN;
+ //return solution;
+
+ std::queue<triple<const int *,int,int> > traceback_queue;
+ traceback_queue.push(make_triple(&F5t[0], 0, L));
+
+ while (!traceback_queue.empty())
+ {
+ triple<const int *,int,int> t = traceback_queue.front();
+ traceback_queue.pop();
+ const int *V = t.first;
+ const int i = t.second;
+ const int j = t.third;
+
+ std::pair<int,int> traceback = DecodeTraceback(V == &F5t[0] ? V[j] : V[offset[i]+j]);
+
+ //std::cerr << (V == FCt ? "FC " : V == FMt ? "FM " : V == FM1t ? "FM1 " : "F5 ");
+ //std::cerr << i << " " << j << ": " << traceback.first << " " << traceback.second << std::endl;
+
+ switch (traceback.first)
+ {
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ case TB_FN_HAIRPIN:
+ break;
+ case TB_FN_SINGLE:
+ {
+ const int p = i + traceback.second / (C_MAX_SINGLE_LENGTH+1);
+ const int q = j - traceback.second % (C_MAX_SINGLE_LENGTH+1);
+ solution[p+1] = q;
+ solution[q] = p+1;
+ traceback_queue.push(make_triple(&FCt[0], p+1, q-1));
+ }
+ break;
+ case TB_FN_BIFURCATION:
+ {
+ const int k = traceback.second;
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+ case TB_FE_STACKING:
+ {
+ solution[i+1] = j;
+ solution[j] = i+1;
+ traceback_queue.push(make_triple(&FEt[0], i+1, j-1));
+ }
+ break;
+ case TB_FE_FN:
+ {
+ traceback_queue.push(make_triple(&FNt[0], i, j));
+ }
+ break;
+ case TB_FC_FN:
+ {
+ traceback_queue.push(make_triple(&FNt[0], i, j));
+ }
+ break;
+ case TB_FC_HELIX:
+ {
+ const int m = traceback.second;
+ for (int k = 2; k <= m; k++)
+ {
+ solution[i+k-1] = j-k+2;
+ solution[j-k+2] = i+k-1;
+ }
+ traceback_queue.push(make_triple(&FNt[0], i+m-1, j-m+1));
+ }
+ break;
+ case TB_FC_FE:
+ {
+ const int m = D_MAX_HELIX_LENGTH;
+ for (int k = 2; k <= m; k++)
+ {
+ solution[i+k-1] = j-k+2;
+ solution[j-k+2] = i+k-1;
+ }
+ traceback_queue.push(make_triple(&FEt[0], i+m-1, j-m+1));
+ }
+ break;
+#else
+ case TB_FC_HAIRPIN:
+ break;
+ case TB_FC_SINGLE:
+ {
+ const int p = i + traceback.second / (C_MAX_SINGLE_LENGTH+1);
+ const int q = j - traceback.second % (C_MAX_SINGLE_LENGTH+1);
+ solution[p+1] = q;
+ solution[q] = p+1;
+ traceback_queue.push(make_triple(&FCt[0], p+1, q-1));
+ }
+ break;
+ case TB_FC_BIFURCATION:
+ {
+ const int k = traceback.second;
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+#endif
+ case TB_FM1_PAIRED:
+ {
+ solution[i+1] = j;
+ solution[j] = i+1;
+ traceback_queue.push(make_triple(&FCt[0], i+1, j-1));
+ }
+ break;
+ case TB_FM1_UNPAIRED:
+ {
+ traceback_queue.push(make_triple(&FM1t[0], i+1, j));
+ }
+ break;
+ case TB_FM_BIFURCATION:
+ {
+ const int k = traceback.second;
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+ case TB_FM_UNPAIRED:
+ {
+ traceback_queue.push(make_triple(&FMt[0], i, j-1));
+ }
+ break;
+ case TB_FM_FM1:
+ {
+ traceback_queue.push(make_triple(&FM1t[0], i, j));
+ }
+ break;
+ case TB_F5_ZERO:
+ break;
+ case TB_F5_UNPAIRED:
+ {
+ traceback_queue.push(make_triple(&F5t[0], 0, j-1));
+ }
+ break;
+ case TB_F5_BIFURCATION:
+ {
+ const int k = traceback.second;
+ solution[k+1] = j;
+ solution[j] = k+1;
+ traceback_queue.push(make_triple(&F5t[0], 0, k));
+ traceback_queue.push(make_triple(&FCt[0], k+1, j-1));
+ }
+ break;
+ default:
+ Assert(false, "Bad traceback.");
+ }
+ }
+
+ return solution;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeViterbiFeatureCounts()
+//
+// Use feature counts from Viterbi decoding.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<RealT> InferenceEngine<RealT>::ComputeViterbiFeatureCounts()
+{
+ std::queue<triple<int *,int,int> > traceback_queue;
+ traceback_queue.push(make_triple(&F5t[0], 0, L));
+
+ ClearCounts();
+
+ while (!traceback_queue.empty())
+ {
+ triple<int *,int,int> t = traceback_queue.front();
+ traceback_queue.pop();
+ const int *V = t.first;
+ const int i = t.second;
+ const int j = t.third;
+
+ std::pair<int,int> traceback = DecodeTraceback (V == &F5t[0] ? V[j] : V[offset[i]+j]);
+
+ //std::cout << (V == FCt ? "FC " : V == FMt ? "FM " : V == FM1t ? "FM1 " : "F5 ");
+ //std::cout << i << " " << j << ": " << traceback.first << " " << traceback.second << std::endl;
+
+ switch (traceback.first)
+ {
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ case TB_FN_HAIRPIN:
+ CountHairpin(i,j,RealT(1));
+ break;
+ case TB_FN_SINGLE:
+ {
+ const int p = i + traceback.second / (C_MAX_SINGLE_LENGTH+1);
+ const int q = j - traceback.second % (C_MAX_SINGLE_LENGTH+1);
+ CountSingle(i,j,p,q,RealT(1));
+ traceback_queue.push(make_triple(&FCt[0], p+1, q-1));
+ }
+ break;
+ case TB_FN_BIFURCATION:
+ {
+ const int k = traceback.second;
+ CountJunctionA(i,j,RealT(1));
+ CountMultiPaired(RealT(1));
+ CountMultiBase(RealT(1));
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+ case TB_FE_STACKING:
+ {
+ CountBasePair(i+1,j,RealT(1));
+ CountHelixStacking(i,j+1,RealT(1));
+ traceback_queue.push(make_triple(&FEt[0], i+1, j-1));
+ }
+ break;
+ case TB_FE_FN:
+ {
+ traceback_queue.push(make_triple(&FNt[0], i, j));
+ }
+ break;
+ case TB_FC_FN:
+ {
+ CountIsolated(RealT(1));
+ traceback_queue.push(make_triple(&FNt[0], i, j));
+ }
+ break;
+ case TB_FC_HELIX:
+ {
+ const int m = traceback.second;
+ CountHelix(i-1,j+1,m,RealT(1));
+ traceback_queue.push(make_triple(&FNt[0], i+m-1, j-m+1));
+ }
+ break;
+ case TB_FC_FE:
+ {
+ const int m = D_MAX_HELIX_LENGTH;
+ CountHelix(i-1,j+1,m,RealT(1));
+ traceback_queue.push(make_triple(&FEt[0], i+m-1, j-m+1));
+ }
+ break;
+#else
+ case TB_FC_HAIRPIN:
+ CountHairpin(i,j,RealT(1));
+ break;
+ case TB_FC_SINGLE:
+ {
+ const int p = i + traceback.second / (C_MAX_SINGLE_LENGTH+1);
+ const int q = j - traceback.second % (C_MAX_SINGLE_LENGTH+1);
+
+ if (p == i && q == j)
+ {
+ CountBasePair(i+1,j,RealT(1));
+ CountHelixStacking(i,j+1,RealT(1));
+ }
+ else
+ {
+ CountSingle(i,j,p,q,RealT(1));
+ }
+
+ traceback_queue.push(make_triple(&FCt[0], p+1, q-1));
+ }
+ break;
+ case TB_FC_BIFURCATION:
+ {
+ const int k = traceback.second;
+ CountJunctionA(i,j,RealT(1));
+ CountMultiPaired(RealT(1));
+ CountMultiBase(RealT(1));
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+#endif
+ case TB_FM1_PAIRED:
+ {
+ CountJunctionA(j,i,RealT(1));
+ CountMultiPaired(RealT(1));
+ CountBasePair(i+1,j,RealT(1));
+ traceback_queue.push(make_triple(&FCt[0], i+1, j-1));
+ }
+ break;
+ case TB_FM1_UNPAIRED:
+ {
+ CountMultiUnpaired(i+1,RealT(1));
+ traceback_queue.push(make_triple(&FM1t[0], i+1, j));
+ }
+ break;
+ case TB_FM_BIFURCATION:
+ {
+ const int k = traceback.second;
+ traceback_queue.push(make_triple(&FM1t[0], i, k));
+ traceback_queue.push(make_triple(&FMt[0], k, j));
+ }
+ break;
+ case TB_FM_UNPAIRED:
+ {
+ CountMultiUnpaired(j,RealT(1));
+ traceback_queue.push(make_triple(&FMt[0], i, j-1));
+ }
+ break;
+ case TB_FM_FM1:
+ traceback_queue.push(make_triple(&FM1t[0], i, j));
+ break;
+ case TB_F5_ZERO:
+ break;
+ case TB_F5_UNPAIRED:
+ CountExternalUnpaired(j,RealT(1));
+ traceback_queue.push(make_triple(&F5t[0], 0, j-1));
+ break;
+ case TB_F5_BIFURCATION:
+ {
+ const int k = traceback.second;
+ CountExternalPaired(RealT(1));
+ CountBasePair(k+1,j,RealT(1));
+ CountJunctionA(j,k,RealT(1));
+ traceback_queue.push(make_triple(&F5t[0], 0, k));
+ traceback_queue.push(make_triple(&FCt[0], k+1, j-1));
+ }
+ break;
+ default:
+ Assert(false, "Bad traceback.");
+ }
+ }
+
+ FinalizeCounts();
+ return GetCounts();
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeInside()
+//
+// Run inside algorithm.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ComputeInside()
+{
+ InitializeCache();
+
+#if SHOW_TIMINGS
+ double starting_time = GetSystemTime();
+#endif
+
+ // initialization
+
+ F5i.clear(); F5i.resize(L+1, RealT(NEG_INF));
+ FCi.clear(); FCi.resize(SIZE, RealT(NEG_INF));
+ FMi.clear(); FMi.resize(SIZE, RealT(NEG_INF));
+ FM1i.clear(); FM1i.resize(SIZE, RealT(NEG_INF));
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ FEi.clear(); FEi.resize(SIZE, RealT(NEG_INF));
+ FNi.clear(); FNi.resize(SIZE, RealT(NEG_INF));
+#endif
+
+ for (int i = L; i >= 0; i--)
+ {
+ for (int j = i; j <= L; j++)
+ {
+
+ // FM2[i,j] = SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+ RealT FM2i = RealT(NEG_INF);
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ Fast_LogPlusEquals(FM2i, FM1i[offset[i]+k] + FMi[offset[k]+j]);
+
+#else
+
+ if (i+2 <= j)
+ {
+ const RealT *p1 = &(FM1i[offset[i]+i+1]);
+ const RealT *p2 = &(FMi[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ Fast_LogPlusEquals(FM2i, (*p1) + (*p2));
+ ++p1;
+ p2 += L-k;
+ }
+ }
+
+#endif
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+
+ // FN[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // and the next interaction is not a stacking pair
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ Fast_LogPlusEquals(sum_i, ScoreHairpin(i,j));
+
+ // compute SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ Fast_LogPlusEquals(sum_i, ScoreSingle(i,j,p,q) + FCi[offset[p+1]+q-1]);
+ }
+ }
+
+#else
+
+ if (i+2 <= j)
+ {
+ RealT score_other = ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ RealT score = (score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) +
+ ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+
+ Fast_LogPlusEquals(sum_i, score);
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ Fast_LogPlusEquals(sum_i, FM2i + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+
+ FNi[offset[i]+j] = sum_i;
+ }
+
+ // FE[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) ... (i-D+1,j+D) are
+ // already base-paired
+ //
+ // = SUM [ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1] if i+2<=j,
+ // FN(i,j)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1]
+
+ if (i+2 <= j && allow_paired[offset[i+1]+j])
+ {
+ Fast_LogPlusEquals(sum_i, ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FEi[offset[i+1]+j-1]);
+ }
+
+ // compute FN(i,j)
+
+ Fast_LogPlusEquals(sum_i, FNi[offset[i]+j]);
+
+ FEi[offset[i]+j] = sum_i;
+ }
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // but (i-1,j+2) are not
+ //
+ // = SUM [ScoreIsolated() + FN(i,j),
+ // SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k)),
+ // FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute ScoreIsolated() + FN(i,j)
+
+ Fast_LogPlusEquals(sum_i, ScoreIsolated() + FNi[offset[i]+j]);
+
+ // compute SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k))
+
+ bool allowed = true;
+ for (int k = 2; k < D_MAX_HELIX_LENGTH; k++)
+ {
+ if (i + 2*k - 2 > j) break;
+ if (!allow_paired[offset[i+k-1]+j-k+2]) { allowed = false; break; }
+ Fast_LogPlusEquals(sum_i, ScoreHelix(i-1,j+1,k) + FNi[offset[i+k-1]+j-k+1]);
+ }
+
+ // compute FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+
+ if (i + 2*D_MAX_HELIX_LENGTH-2 <= j)
+ {
+ if (allowed && allow_paired[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+2])
+ Fast_LogPlusEquals(sum_i, ScoreHelix(i-1,j+1,D_MAX_HELIX_LENGTH) + FEi[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+1]);
+ }
+
+ FCi[offset[i]+j] = sum_i;
+ }
+
+#else
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ Fast_LogPlusEquals(sum_i, ScoreHairpin(i,j));
+
+ // compute SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ Fast_LogPlusEquals(sum_i,
+ FCi[offset[p+1]+q-1] +
+ (p == i && q == j ? ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : ScoreSingle(i,j,p,q)));
+ }
+ }
+
+#else
+
+ {
+ RealT score_helix = (i+2 <= j ? ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : 0);
+ RealT score_other = ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ RealT score = (p == i && q == j) ?
+ (score_helix + FCptr[q]) :
+ (score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) +
+ ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+
+ Fast_LogPlusEquals(sum_i, score);
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ Fast_LogPlusEquals(sum_i, FM2i + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+
+ FCi[offset[i]+j] = sum_i;
+ }
+
+#endif
+
+ // FM1[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop containing a (k+1,j) base pair
+ // preceded by 5' unpaired nucleotides from i to k
+ // for some i <= k <= j-2
+ //
+ // = SUM [FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j) if i+2<=j,
+ // FM1[i+1,j] + b if i+2<=j]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j)
+
+ if (allow_paired[offset[i+1]+j])
+ Fast_LogPlusEquals(sum_i, FCi[offset[i+1]+j-1] + ScoreJunctionA(j,i) + ScoreMultiPaired() + ScoreBasePair(i+1,j));
+
+ // compute FM1[i+1,j] + b
+
+ if (allow_unpaired_position[i+1])
+ Fast_LogPlusEquals(sum_i, FM1i[offset[i+1]+j] + ScoreMultiUnpaired(i+1));
+
+ FM1i[offset[i]+j] = sum_i;
+ }
+
+ // FM[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop which contains at least one
+ // helix
+ //
+ // = SUM [SUM (i<k<j : FM1[i,k] + FM[k,j]),
+ // FM[i,j-1] + b,
+ // FM1[i,j]]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+ Fast_LogPlusEquals(sum_i, FM2i);
+
+ // compute FM[i,j-1] + b
+
+ if (allow_unpaired_position[j])
+ Fast_LogPlusEquals(sum_i, FMi[offset[i]+j-1] + ScoreMultiUnpaired(j));
+
+ // compute FM1[i,j]
+
+ Fast_LogPlusEquals(sum_i, FM1i[offset[i]+j]);
+
+ FMi[offset[i]+j] = sum_i;
+ }
+ }
+ }
+
+ F5i[0] = RealT(0);
+ for (int j = 1; j <= L; j++)
+ {
+
+ // F5[j] = optimal energy for substructure between positions 0 and j
+ // (or 0 if j = 0)
+ //
+ // = SUM [F5[j-1] + ScoreExternalUnpaired(),
+ // SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))]
+
+ RealT sum_i = RealT(NEG_INF);
+
+ // compute F5[j-1] + ScoreExternalUnpaired()
+
+ if (allow_unpaired_position[j])
+ Fast_LogPlusEquals(sum_i, F5i[j-1] + ScoreExternalUnpaired(j));
+
+ // compute SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))
+
+ for (int k = 0; k < j; k++)
+ if (allow_paired[offset[k+1]+j])
+ Fast_LogPlusEquals(sum_i, F5i[k] + FCi[offset[k+1]+j-1] + ScoreExternalPaired() + ScoreBasePair(k+1,j) + ScoreJunctionA(j,k));
+
+ F5i[j] = sum_i;
+ }
+
+#if SHOW_TIMINGS
+ std::cerr << "Inside score: " << F5i[L] << " (" << GetSystemTime() - starting_time << " seconds)" << std::endl;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeOutside()
+//
+// Run outside algorithm.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ComputeOutside()
+{
+ InitializeCache();
+
+#if SHOW_TIMINGS
+ double starting_time = GetSystemTime();
+#endif
+
+ // initialization
+
+ F5o.clear(); F5o.resize(L+1, RealT(NEG_INF));
+ FCo.clear(); FCo.resize(SIZE, RealT(NEG_INF));
+ FMo.clear(); FMo.resize(SIZE, RealT(NEG_INF));
+ FM1o.clear(); FM1o.resize(SIZE, RealT(NEG_INF));
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+ FEo.clear(); FEo.resize(SIZE, RealT(NEG_INF));
+ FNo.clear(); FNo.resize(SIZE, RealT(NEG_INF));
+#endif
+
+ F5o[L] = RealT(0);
+ for (int j = L; j >= 1; j--)
+ {
+
+ // F5[j] = optimal energy for substructure between positions 0 and j
+ // (or 0 if j = 0)
+ //
+ // = SUM [F5[j-1] + ScoreExternalUnpaired(),
+ // SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))]
+
+ // compute F5[j-1] + ScoreExternalUnpaired()
+
+ if (allow_unpaired_position[j])
+ Fast_LogPlusEquals(F5o[j-1], F5o[j] + ScoreExternalUnpaired(j));
+
+ // compute SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))
+
+ {
+ for (int k = 0; k < j; k++)
+ {
+ if (allow_paired[offset[k+1]+j])
+ {
+ RealT temp = F5o[j] + ScoreExternalPaired() + ScoreBasePair(k+1,j) + ScoreJunctionA(j,k);
+ Fast_LogPlusEquals(F5o[k], temp + FCi[offset[k+1]+j-1]);
+ Fast_LogPlusEquals(FCo[offset[k+1]+j-1], temp + F5i[k]);
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i <= L; i++)
+ {
+ for (int j = L; j >= i; j--)
+ {
+ RealT FM2o = RealT(NEG_INF);
+
+ // FM[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop which contains at least one
+ // helix
+ //
+ // = SUM [SUM (i<k<j : FM1[i,k] + FM[k,j]),
+ // FM[i,j-1] + b,
+ // FM1[i,j]]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+ Fast_LogPlusEquals(FM2o, FMo[offset[i]+j]);
+
+ // compute FM[i,j-1] + b
+
+ if (allow_unpaired_position[j])
+ Fast_LogPlusEquals(FMo[offset[i]+j-1], FMo[offset[i]+j] + ScoreMultiUnpaired(j));
+
+ // compute FM1[i,j]
+
+ Fast_LogPlusEquals(FM1o[offset[i]+j], FMo[offset[i]+j]);
+ }
+
+ // FM1[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop containing a (k+1,j) base pair
+ // preceded by 5' unpaired nucleotides from i to k
+ // for some i <= k <= j-2
+ //
+ // = SUM [FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j) if i+2<=j,
+ // FM1[i+1,j] + b if i+2<=j]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+ // compute FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j)
+
+ if (allow_paired[offset[i+1]+j])
+ Fast_LogPlusEquals(FCo[offset[i+1]+j-1], FM1o[offset[i]+j] + ScoreJunctionA(j,i) + ScoreMultiPaired() + ScoreBasePair(i+1,j));
+
+ // compute FM1[i+1,j] + b
+
+ if (allow_unpaired_position[i+1])
+ Fast_LogPlusEquals(FM1o[offset[i+1]+j], FM1o[offset[i]+j] + ScoreMultiUnpaired(i+1));
+
+ }
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // but (i-1,j+2) are not
+ //
+ // = SUM [ScoreIsolated() + FN(i,j),
+ // SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k)),
+ // FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ // compute ScoreIsolated() + FN(i,j)
+
+ Fast_LogPlusEquals(FNo[offset[i]+j], ScoreIsolated() + FCo[offset[i]+j]);
+
+ // compute SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k))
+
+ bool allowed = true;
+ for (int k = 2; k < D_MAX_HELIX_LENGTH; k++)
+ {
+ if (i + 2*k - 2 > j) break;
+ if (!allow_paired[offset[i+k-1]+j-k+2]) { allowed = false; break; }
+ Fast_LogPlusEquals(FNo[offset[i+k-1]+j-k+1], ScoreHelix(i-1,j+1,k) + FCo[offset[i]+j]);
+ }
+
+ // compute FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+
+ if (i + 2*D_MAX_HELIX_LENGTH-2 <= j)
+ {
+ if (allowed && allow_paired[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+2])
+ Fast_LogPlusEquals(FEo[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+1],
+ ScoreHelix(i-1,j+1,D_MAX_HELIX_LENGTH) + FCo[offset[i]+j]);
+ }
+ }
+
+ // FE[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) ... (i-D+1,j+D) are
+ // already base-paired
+ //
+ // = SUM [ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1] if i+2<=j,
+ // FN(i,j)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ // compute ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1]
+
+ if (i+2 <= j && allow_paired[offset[i+1]+j])
+ {
+ Fast_LogPlusEquals(FEo[offset[i+1]+j-1], FEo[offset[i]+j] + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1));
+ }
+
+ // compute FN(i,j)
+
+ Fast_LogPlusEquals(FNo[offset[i]+j], FEo[offset[i]+j]);
+ }
+
+ // FN[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // and the next interaction is not a stacking pair
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ // compute ScoreHairpin(i,j) -- do nothing
+
+ // compute SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ {
+ RealT temp = FNo[offset[i]+j];
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ Fast_LogPlusEquals(FCo[offset[p+1]+q-1], temp + ScoreSingle(i,j,p,q));
+ }
+ }
+ }
+#else
+
+ {
+ RealT score_other = FNo[offset[i]+j] + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ RealT *FCptr = &(FCo[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ Fast_LogPlusEquals(FCptr[q], score_other + cache_score_single[p-i][j-q].first + ScoreBasePair(p+1,q) + ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ Fast_LogPlusEquals(FM2o, FNo[offset[i]+j] + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+
+ }
+
+#else
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ // compute ScoreHairpin(i,j) -- do nothing
+
+ // compute SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ {
+ RealT temp = FCo[offset[i]+j];
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ Fast_LogPlusEquals(FCo[offset[p+1]+q-1],
+ temp + (p == i && q == j ? ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : ScoreSingle(i,j,p,q)));
+ }
+ }
+ }
+#else
+
+ {
+ RealT score_helix = (i+2 <= j ? FCo[offset[i]+j] + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : 0);
+ RealT score_other = FCo[offset[i]+j] + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ RealT *FCptr = &(FCo[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ Fast_LogPlusEquals(FCptr[q],
+ (p == i && q == j) ?
+ score_helix :
+ score_other + cache_score_single[p-i][j-q].first + ScoreBasePair(p+1,q) + ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ Fast_LogPlusEquals(FM2o, FCo[offset[i]+j] + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+
+ }
+
+#endif
+
+ // FM2[i,j] = SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ {
+ Fast_LogPlusEquals(FM1o[offset[i]+k], FM2o + FMi[offset[k]+j]);
+ Fast_LogPlusEquals(FMo[offset[k]+j], FM2o + FM1i[offset[i]+k]);
+ }
+
+#else
+ if (i+2 <= j)
+ {
+ RealT *p1i = &(FM1i[offset[i]+i+1]);
+ RealT *p2i = &(FMi[offset[i+1]+j]);
+ RealT *p1o = &(FM1o[offset[i]+i+1]);
+ RealT *p2o = &(FMo[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ Fast_LogPlusEquals(*p1o, FM2o + *p2i);
+ Fast_LogPlusEquals(*p2o, FM2o + *p1i);
+ ++p1i;
+ ++p1o;
+ p2i += L-k;
+ p2o += L-k;
+ }
+ }
+
+#endif
+ }
+ }
+
+#if SHOW_TIMINGS
+ std::cerr << "Outside score: " << F5o[0] << " (" << GetSystemTime() - starting_time << " seconds)" << std::endl;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeLogPartitionCoefficient()
+//
+// Return partition coefficient.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+inline RealT InferenceEngine<RealT>::ComputeLogPartitionCoefficient() const
+{
+ // NOTE: This should be equal to F5o[0].
+
+ return F5i[L];
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputeFeatureCountExpectations()
+//
+// Combine the results of the inside and outside algorithms
+// in order to compute feature count expectations.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<RealT> InferenceEngine<RealT>::ComputeFeatureCountExpectations()
+{
+#if SHOW_TIMINGS
+ double starting_time = GetSystemTime();
+#endif
+
+ //std::cerr << "Inside score: " << F5i[L].GetLogRepresentation() << std::endl;
+ //std::cerr << "Outside score: " << F5o[0].GetLogRepresentation() << std::endl;
+
+ const RealT Z = ComputeLogPartitionCoefficient();
+
+ ClearCounts();
+
+ for (int i = L; i >= 0; i--)
+ {
+ for (int j = i; j <= L; j++)
+ {
+
+ // FM2[i,j] = SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+ RealT FM2i = RealT(NEG_INF);
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ Fast_LogPlusEquals(FM2i, FM1i[offset[i]+k] + FMi[offset[k]+j]);
+
+#else
+
+ if (i+2 <= j)
+ {
+ const RealT *p1 = &(FM1i[offset[i]+i+1]);
+ const RealT *p2 = &(FMi[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ Fast_LogPlusEquals(FM2i, (*p1) + (*p2));
+ ++p1;
+ p2 += L-k;
+ }
+ }
+
+#endif
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+
+ // FN[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // and the next interaction is not a stacking pair
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT outside = FNo[offset[i]+j] - Z;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ CountHairpin(i,j,Fast_Exp(outside + ScoreHairpin(i,j)));
+
+ // compute SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ CountSingle(i,j,p,q,Fast_Exp(outside + ScoreSingle(i,j,p,q) + FCi[offset[p+1]+q-1]));
+ }
+ }
+
+#else
+
+ {
+ RealT score_other = outside + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ RealT value = Fast_Exp(score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) + ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ cache_score_single[p-i][j-q].second += value;
+ CountBasePair(p+1,q,value);
+ CountJunctionB(i,j,value);
+ CountJunctionB(q,p,value);
+ CountSingleNucleotides(i,j,p,q,value);
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ {
+ RealT value = Fast_Exp(outside + FM2i + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+ CountJunctionA(i,j,value);
+ CountMultiPaired(value);
+ CountMultiBase(value);
+ }
+ }
+
+ // FE[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) ... (i-D+1,j+D) are
+ // already base-paired
+ //
+ // = SUM [ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1] if i+2<=j,
+ // FN(i,j)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT outside = FEo[offset[i]+j] - Z;
+
+ // compute ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1]
+
+ if (i+2 <= j && allow_paired[offset[i+1]+j])
+ {
+ RealT value = Fast_Exp(outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FEi[offset[i+1]+j-1]);
+ CountBasePair(i+1,j,value);
+ CountHelixStacking(i,j+1,value);
+ }
+
+ // compute FN(i,j) -- do nothing
+
+ }
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // but (i-1,j+2) are not
+ //
+ // = SUM [ScoreIsolated() + FN(i,j),
+ // SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k)),
+ // FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT outside = FCo[offset[i]+j] - Z;
+
+ // compute ScoreIsolated() + FN(i,j)
+
+ CountIsolated(Fast_Exp(outside + ScoreIsolated() + FNi[offset[i]+j]));
+
+ // compute SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k))
+
+ bool allowed = true;
+ for (int k = 2; k < D_MAX_HELIX_LENGTH; k++)
+ {
+ if (i + 2*k - 2 > j) break;
+ if (!allow_paired[offset[i+k-1]+j-k+2]) { allowed = false; break; }
+ CountHelix(i-1,j+1,k,Fast_Exp(outside + ScoreHelix(i-1,j+1,k) + FNi[offset[i+k-1]+j-k+1]));
+ }
+
+ // compute FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+
+ if (i + 2*D_MAX_HELIX_LENGTH-2 <= j)
+ {
+ if (allowed && allow_paired[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+2])
+ CountHelix(i-1,j+1,D_MAX_HELIX_LENGTH,
+ Fast_Exp(outside + ScoreHelix(i-1,j+1,D_MAX_HELIX_LENGTH) + FEi[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+1]));
+ }
+ }
+
+#else
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT outside = FCo[offset[i]+j] - Z;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ CountHairpin(i,j,Fast_Exp(outside + ScoreHairpin(i,j)));
+
+ // compute SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ if (p == i && q == j)
+ {
+ RealT value = Fast_Exp(outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FCi[offset[p+1]+q-1]);
+ CountBasePair(i+1,j,value);
+ CountHelixStacking(i,j+1,value);
+ }
+ else
+ {
+ CountSingle(i,j,p,q,Fast_Exp(outside + ScoreSingle(i,j,p,q) + FCi[offset[p+1]+q-1]));
+ }
+ }
+ }
+
+#else
+
+ {
+ RealT score_helix = (i+2 <= j ? outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : 0);
+ RealT score_other = outside + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ if (p == i && q == j)
+ {
+ RealT value = Fast_Exp(score_helix + FCptr[q]);
+ cache_score_single[0][0].second += value;
+ CountBasePair(i+1,j,value);
+ CountHelixStacking(i,j+1,value);
+ }
+ else
+ {
+ RealT value = Fast_Exp(score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) +
+ ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ cache_score_single[p-i][j-q].second += value;
+ CountBasePair(p+1,q,value);
+ CountJunctionB(i,j,value);
+ CountJunctionB(q,p,value);
+ CountSingleNucleotides(i,j,p,q,value);
+ }
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c)
+
+ {
+ RealT value = Fast_Exp(outside + FM2i + ScoreJunctionA(i,j) + ScoreMultiPaired() + ScoreMultiBase());
+ CountJunctionA(i,j,value);
+ CountMultiPaired(value);
+ CountMultiBase(value);
+ }
+ }
+
+#endif
+
+ // FM1[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop containing a (k+1,j) base pair
+ // preceded by 5' unpaired nucleotides from i to k
+ // for some i <= k <= j-2
+ //
+ // = SUM [FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j) if i+2<=j,
+ // FM1[i+1,j] + b if i+2<=j]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+
+ // compute FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j)
+
+ if (allow_paired[offset[i+1]+j])
+ {
+ RealT value = Fast_Exp(FM1o[offset[i]+j] + FCi[offset[i+1]+j-1] + ScoreJunctionA(j,i) + ScoreMultiPaired() + ScoreBasePair(i+1,j) - Z);
+ CountJunctionA(j,i,value);
+ CountMultiPaired(value);
+ CountBasePair(i+1,j,value);
+ }
+
+ // compute FM1[i+1,j] + b
+
+ if (allow_unpaired_position[i+1])
+ {
+ CountMultiUnpaired(i+1,Fast_Exp(FM1o[offset[i]+j] + FM1i[offset[i+1]+j] + ScoreMultiUnpaired(i+1) - Z));
+ }
+ }
+
+ // FM[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop which contains at least one
+ // helix
+ //
+ // = SUM [SUM (i<k<j : FM1[i,k] + FM[k,j]),
+ // FM[i,j-1] + b,
+ // FM1[i,j]]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j]) -- do nothing
+
+ // compute FM[i,j-1] + b
+
+ if (allow_unpaired_position[j])
+ CountMultiUnpaired(j,Fast_Exp(FMo[offset[i]+j] + FMi[offset[i]+j-1] + ScoreMultiUnpaired(j) - Z));
+
+ // compute FM1[i,j] -- do nothing
+ }
+ }
+ }
+
+ for (int j = 1; j <= L; j++)
+ {
+
+ // F5[j] = optimal energy for substructure between positions 0 and j
+ // (or 0 if j = 0)
+ //
+ // = SUM [F5[j-1] + ScoreExternalUnpaired(),
+ // SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))]
+
+ RealT outside = F5o[j] - Z;
+
+ // compute F5[j-1] + ScoreExternalUnpaired()
+
+ if (allow_unpaired_position[j])
+ CountExternalUnpaired(j,Fast_Exp(outside + F5i[j-1] + ScoreExternalUnpaired(j)));
+
+ // compute SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))
+
+ for (int k = 0; k < j; k++)
+ {
+ if (allow_paired[offset[k+1]+j])
+ {
+ RealT value = Fast_Exp(outside + F5i[k] + FCi[offset[k+1]+j-1] + ScoreExternalPaired() + ScoreBasePair(k+1,j) + ScoreJunctionA(j,k));
+ CountExternalPaired(value);
+ CountBasePair(k+1,j,value);
+ CountJunctionA(j,k,value);
+ }
+ }
+ }
+
+ FinalizeCounts();
+
+#if SHOW_TIMINGS
+ std::cerr << "Feature expectations (" << GetSystemTime() - starting_time << " seconds)" << std::endl;
+#endif
+
+ return GetCounts();
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::ComputePosterior()
+//
+// Combine the results of the inside and outside algorithms
+// in order to compute posterior probabilities of base pairing.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InferenceEngine<RealT>::ComputePosterior()
+{
+ posterior.clear();
+ posterior.resize(SIZE, RealT(0));
+
+ //double starting_time = GetSystemTime();
+
+ const RealT Z = ComputeLogPartitionCoefficient();
+
+ for (int i = L; i >= 0; i--)
+ {
+ for (int j = i; j <= L; j++)
+ {
+
+ // FM2[i,j] = SUM (i<k<j : FM1[i,k] + FM[k,j])
+
+ RealT FM2i = RealT(NEG_INF);
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ Fast_LogPlusEquals(FM2i, FM1i[offset[i]+k] + FMi[offset[k]+j]);
+
+#else
+
+ if (i+2 <= j)
+ {
+ const RealT *p1 = &(FM1i[offset[i]+i+1]);
+ const RealT *p2 = &(FMi[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ Fast_LogPlusEquals(FM2i, (*p1) + (*p2));
+ ++p1;
+ p2 += L-k;
+ }
+ }
+
+#endif
+
+#if PARAMS_HELIX_LENGTH || PARAMS_ISOLATED_BASE_PAIR
+
+ // FN[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // and the next interaction is not a stacking pair
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT outside = FNo[offset[i]+j] - Z;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ CountHairpin(i,j,Fast_Exp(outside + ScoreHairpin(i,j)));
+
+ // compute SUM (i<=p<p+2<=q<=j, p-i+j-q>0 : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ posterior[offset[p+1]+q] += Fast_Exp(outside + ScoreSingle(i,j,p,q) + FCi[offset[p+1]+q-1]);
+ }
+ }
+
+#else
+
+ {
+ RealT score_other = outside + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+ if (i == p && j == q) continue;
+
+ posterior[offset[p+1]+q] +=
+ Fast_Exp(score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) + ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ }
+ }
+ }
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c) -- do nothing
+
+ }
+
+ // FE[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) ... (i-D+1,j+D) are
+ // already base-paired
+ //
+ // = SUM [ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1] if i+2<=j,
+ // FN(i,j)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+ RealT outside = FEo[offset[i]+j] - Z;
+
+ // compute ScoreBP(i+1,j) + ScoreHelixStacking(i,j+1) + FE[i+1,j-1]
+
+ if (i+2 <= j && allow_paired[offset[i+1]+j])
+ posterior[offset[i]+j] += Fast_Exp(outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FEi[offset[i+1]+j-1]);
+
+ // compute FN(i,j) -- do nothing
+
+ }
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ // but (i-1,j+2) are not
+ //
+ // = SUM [ScoreIsolated() + FN(i,j),
+ // SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k)),
+ // FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+ //
+ // (assuming 0 < i <= j < L)
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT outside = FCo[offset[i]+j] - Z;
+
+ // compute ScoreIsolated() + FN(i,j) -- do nothing
+
+ CountIsolated(Fast_Exp(outside + ScoreIsolated() + FNi[offset[i]+j]));
+
+ // compute SUM (2<=k<D : FN(i+k-1,j-k+1) + ScoreHelix(i-1,j+1,k))
+
+ bool allowed = true;
+ for (int k = 2; k < D_MAX_HELIX_LENGTH; k++)
+ {
+ if (i + 2*k - 2 > j) break;
+ if (!allow_paired[offset[i+k-1]+j-k+2]) { allowed = false; break; }
+ RealT value = Fast_Exp(outside + ScoreHelix(i-1,j+1,k) + FNi[offset[i+k-1]+j-k+1]);
+ for (int p = 1; p < k; p++)
+ posterior[offset[i+p]+j-p+1] += value;
+ }
+
+ // compute FE(i+D-1,j-D+1) + ScoreHelix(i-1,j+1,D)]
+
+ if (i + 2*D_MAX_HELIX_LENGTH-2 <= j)
+ {
+ if (allowed && allow_paired[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+2]) {
+ RealT value = Fast_Exp(outside + ScoreHelix(i-1,j+1,D_MAX_HELIX_LENGTH) + FEi[offset[i+D_MAX_HELIX_LENGTH-1]+j-D_MAX_HELIX_LENGTH+1]);
+
+ for (int k = 1; k < D_MAX_HELIX_LENGTH; k++)
+ posterior[offset[i+k]+j-k+1] += value;
+ }
+ }
+ }
+
+#else
+
+ // FC[i,j] = optimal energy for substructure between positions
+ // i and j such that letters (i,j+1) are base-paired
+ //
+ // = SUM [ScoreHairpin(i,j),
+ // SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1]),
+ // ScoreJunctionA(i,j) + a + c + SUM (i<k<j : FM1[i,k] + FM[k,j])]
+ //
+ // (assuming 0 < i <= j < L)
+ //
+ // Multi-branch loops are scored as [a + b * (# unpaired) + c * (# branches)]
+
+ if (0 < i && j < L && allow_paired[offset[i]+j+1])
+ {
+
+ RealT outside = FCo[offset[i]+j] - Z;
+
+ // compute ScoreHairpin(i,j)
+
+ if (allow_unpaired[offset[i]+j] && j-i >= C_MIN_HAIRPIN_LENGTH)
+ CountHairpin(i,j,Fast_Exp(outside + ScoreHairpin(i,j)));
+
+ // compute SUM (i<=p<p+2<=q<=j : ScoreSingle(i,j,p,q) + FC[p+1,q-1])
+
+#if !FAST_SINGLE_BRANCH_LOOPS
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ if (p == i && q == j)
+ {
+ posterior[offset[p+1]+q] += Fast_Exp(outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) + FCi[offset[p+1]+q-1]);
+ }
+ else
+ {
+ posterior[offset[p+1]+q] += Fast_Exp(outside + ScoreSingle(i,j,p,q) + FCi[offset[p+1]+q-1]);
+ }
+ }
+ }
+
+#else
+
+ {
+ RealT score_helix = (i+2 <= j ? outside + ScoreBasePair(i+1,j) + ScoreHelixStacking(i,j+1) : 0);
+ RealT score_other = outside + ScoreJunctionB(i,j);
+
+ for (int p = i; p <= std::min(i+C_MAX_SINGLE_LENGTH,j); p++)
+ {
+ if (p > i && !allow_unpaired_position[p]) break;
+ int q_min = std::max(p+2,p-i+j-C_MAX_SINGLE_LENGTH);
+ const RealT *FCptr = &(FCi[offset[p+1]-1]);
+ for (int q = j; q >= q_min; q--)
+ {
+ if (q < j && !allow_unpaired_position[q+1]) break;
+ if (!allow_paired[offset[p+1]+q]) continue;
+
+ posterior[offset[p+1]+q] +=
+ Fast_Exp(p == i && q == j ?
+ score_helix + FCptr[q] :
+ score_other + cache_score_single[p-i][j-q].first + FCptr[q] + ScoreBasePair(p+1,q) +
+ ScoreJunctionB(q,p) + ScoreSingleNucleotides(i,j,p,q));
+ }
+ }
+ }
+
+#endif
+
+ // compute SUM (i<k<j : FM1[i,k] + FM[k,j] + ScoreJunctionA(i,j) + a + c) -- do nothing
+
+ }
+
+#endif
+
+ // FM1[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop containing a (k+1,j) base pair
+ // preceded by 5' unpaired nucleotides from i to k
+ // for some i <= k <= j-2
+ //
+ // = SUM [FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j) if i+2<=j,
+ // FM1[i+1,j] + b if i+2<=j]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ if (0 < i && i+2 <= j && j < L)
+ {
+
+ // Compute FC[i+1,j-1] + ScoreJunctionA(j,i) + c + ScoreBP(i+1,j)
+
+ if (allow_paired[offset[i+1]+j])
+ posterior[offset[i+1]+j] += Fast_Exp(FM1o[offset[i]+j] + FCi[offset[i+1]+j-1] + ScoreJunctionA(j,i) + ScoreMultiPaired() + ScoreBasePair(i+1,j) - Z);
+
+ // Compute FM1[i+1,j] + b -- do nothing
+
+ }
+
+ // FM[i,j] = optimal energy for substructure belonging to a
+ // multibranch loop which contains at least one
+ // helix
+ //
+ // = SUM [SUM (i<k<j : FM1[i,k] + FM[k,j]),
+ // FM[i,j-1] + b,
+ // FM1[i,j]]
+ //
+ // (assuming 0 < i < i+2 <= j < L)
+
+ // Compute SUM (i<k<j : FM1[i,k] + FM[k,j]) -- do nothing
+
+ // Compute FM[i,j-1] + b -- do nothing
+
+ // Compute FM1[i,j] -- do nothing
+ }
+ }
+
+ for (int j = 1; j <= L; j++)
+ {
+
+ // F5[j] = optimal energy for substructure between positions 0 and j
+ // (or 0 if j = 0)
+ //
+ // = SUM [F5[j-1] + ScoreExternalUnpaired(),
+ // SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))]
+
+ RealT outside = F5o[j] - Z;
+
+ // compute F5[j-1] + ScoreExternalUnpaired() -- do nothing
+
+ // compute SUM (0<=k<j : F5[k] + FC[k+1,j-1] + ScoreExternalPaired() + ScoreBP(k+1,j) + ScoreJunctionA(j,k))
+
+ for (int k = 0; k < j; k++)
+ {
+ if (allow_paired[offset[k+1]+j])
+ posterior[offset[k+1]+j] += Fast_Exp(outside + F5i[k] + FCi[offset[k+1]+j-1] + ScoreExternalPaired() + ScoreBasePair(k+1,j) + ScoreJunctionA(j,k));
+ }
+ }
+
+ for (int i = 1; i <= L; i++)
+ {
+ for (int j = i+1; j <= L; j++)
+ {
+ posterior[offset[i]+j] = Clip(posterior[offset[i]+j], RealT(0), RealT(1));
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::PredictPairingsPosterior()
+//
+// Use posterior decoding to predict pairings.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<int> InferenceEngine<RealT>::PredictPairingsPosterior(const RealT gamma) const
+{
+ Assert(gamma > 0, "Non-negative gamma expected.");
+
+#if SHOW_TIMINGS
+ double starting_time = GetSystemTime();
+#endif
+ RealT* unpaired_posterior = new RealT[L+1];
+ RealT* score = new RealT[SIZE];
+ int* traceback = new int[SIZE];
+
+ // compute the scores for unpaired nucleotides
+
+ for (int i = 1; i <= L; i++)
+ {
+ unpaired_posterior[i] = RealT(1);
+ for (int j = 1; j < i; j++) unpaired_posterior[i] -= posterior[offset[j]+i];
+ for (int j = i+1; j <= L; j++) unpaired_posterior[i] -= posterior[offset[i]+j];
+ }
+
+ for (int i = 1; i <= L; i++) unpaired_posterior[i] /= 2 * gamma;
+
+ // initialize matrices
+
+ std::fill(score, score+SIZE, RealT(-1.0));
+ std::fill(traceback, traceback+SIZE, -1);
+
+ // dynamic programming
+
+ for (int i = L; i >= 0; i--)
+ {
+ for (int j = i; j <= L; j++)
+ {
+ RealT &this_score = score[offset[i]+j];
+ int &this_traceback = traceback[offset[i]+j];
+
+ if (i == j)
+ {
+ UPDATE_MAX(this_score, this_traceback, RealT(0), 0);
+ }
+ else
+ {
+ if (allow_unpaired_position[i+1])
+ UPDATE_MAX(this_score, this_traceback, unpaired_posterior[i+1] + score[offset[i+1]+j], 1);
+ if (allow_unpaired_position[j])
+ UPDATE_MAX(this_score, this_traceback, unpaired_posterior[j] + score[offset[i]+j-1], 2);
+ if (i+2 <= j)
+ {
+ if (allow_paired[offset[i+1]+j])
+ UPDATE_MAX(this_score, this_traceback, posterior[offset[i+1]+j] + score[offset[i+1]+j-1], 3);
+
+#if SIMPLE_FM2
+
+ for (int k = i+1; k < j; k++)
+ UPDATE_MAX(this_score, this_traceback, score[offset[i]+k] + score[offset[k]+j], k+4);
+
+#else
+
+ RealT *p1 = &(score[offset[i]+i+1]);
+ RealT *p2 = &(score[offset[i+1]+j]);
+ for (register int k = i+1; k < j; k++)
+ {
+ UPDATE_MAX(this_score, this_traceback, (*p1) + (*p2), k+4);
+ ++p1;
+ p2 += L-k;
+ }
+
+#endif
+ }
+ }
+ }
+ }
+
+#if SHOW_TIMINGS
+ std::cerr << "Time: " << GetSystemTime() - starting_time << std::endl;
+#endif
+
+ // perform traceback
+
+ std::vector<int> solution(L+1,SStruct::UNPAIRED);
+ solution[0] = SStruct::UNKNOWN;
+
+ std::queue<std::pair<int,int> > traceback_queue;
+ traceback_queue.push(std::make_pair(0, L));
+
+ while (!traceback_queue.empty())
+ {
+ std::pair<int,int> t = traceback_queue.front();
+ traceback_queue.pop();
+ const int i = t.first;
+ const int j = t.second;
+
+ switch (traceback[offset[i]+j])
+ {
+ case -1:
+ Assert(false, "Should not get here.");
+ break;
+ case 0:
+ break;
+ case 1:
+ traceback_queue.push(std::make_pair(i+1,j));
+ break;
+ case 2:
+ traceback_queue.push(std::make_pair(i,j-1));
+ break;
+ case 3:
+ solution[i+1] = j;
+ solution[j] = i+1;
+ traceback_queue.push(std::make_pair(i+1,j-1));
+ break;
+ default:
+ {
+ const int k = traceback[offset[i]+j] - 4;
+ traceback_queue.push(std::make_pair(i,k));
+ traceback_queue.push(std::make_pair(k,j));
+ }
+ break;
+ }
+ }
+
+ return solution;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InferenceEngine::GetPosterior()
+//
+// Return posterior probability matrix, thresholded.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT *InferenceEngine<RealT>::GetPosterior(const RealT posterior_cutoff) const
+{
+ RealT *ret = new RealT[SIZE];
+ for (int i = 0; i < SIZE; i++)
+ ret[i] = (posterior[i] >= posterior_cutoff ? posterior[i] : RealT(0));
+ return ret;
+}
diff --git a/src/InnerOptimizationWrapper.hpp b/src/InnerOptimizationWrapper.hpp
new file mode 100644
index 0000000..d94ac2d
--- /dev/null
+++ b/src/InnerOptimizationWrapper.hpp
@@ -0,0 +1,47 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapper.hpp
+//
+// Generic class for performing optimization of model with respect to
+// a particular set of parameters.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INNEROPTIMIZATIONWRAPPER_HPP
+#define INNEROPTIMIZATIONWRAPPER_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "LBFGS.hpp"
+
+template<class RealT>
+class OptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class InnerOptimizationWrapper
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InnerOptimizationWrapper
+{
+
+protected:
+
+ OptimizationWrapper<RealT> *optimization_wrapper;
+ const std::vector<int> units;
+ const std::vector<RealT> C;
+ std::vector<RealT> bias;
+
+public:
+
+ InnerOptimizationWrapper(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C);
+
+ virtual ~InnerOptimizationWrapper();
+
+ void LoadBias(const std::vector<RealT> &bias);
+
+ virtual RealT Minimize(std::vector<RealT> &x0) = 0;
+};
+
+#include "InnerOptimizationWrapper.ipp"
+
+#endif
diff --git a/src/InnerOptimizationWrapper.ipp b/src/InnerOptimizationWrapper.ipp
new file mode 100644
index 0000000..9cc4519
--- /dev/null
+++ b/src/InnerOptimizationWrapper.ipp
@@ -0,0 +1,43 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapper.ipp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapper<RealT>::InnerOptimizationWrapper()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InnerOptimizationWrapper<RealT>::InnerOptimizationWrapper(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C) :
+ optimization_wrapper(optimization_wrapper),
+ units(units),
+ C(C),
+ bias(optimization_wrapper->GetParameterManager().GetNumLogicalParameters())
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapper<RealT>::~InnerOptimizationWrapper()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InnerOptimizationWrapper<RealT>::~InnerOptimizationWrapper()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapper<RealT>::LoadBias()
+//
+// Load linear bias.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapper<RealT>::LoadBias(const std::vector<RealT> &bias)
+{
+ this->bias = bias;
+}
diff --git a/src/InnerOptimizationWrapperBundleMethod.hpp b/src/InnerOptimizationWrapperBundleMethod.hpp
new file mode 100644
index 0000000..8fcc6ed
--- /dev/null
+++ b/src/InnerOptimizationWrapperBundleMethod.hpp
@@ -0,0 +1,44 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod.hpp
+//
+// Inner optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INNEROPTIMIZATIONWRAPPERBUNDLEMETHOD_HPP
+#define INNEROPTIMIZATIONWRAPPERBUNDLEMETHOD_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "BundleMethod.hpp"
+
+template<class RealT>
+class BundleMethod;
+
+template<class RealT>
+class OptimizationWrapper;
+
+template<class RealT>
+class InnerOptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class InnerOptimizationWrapperBundleMethod
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InnerOptimizationWrapperBundleMethod : public BundleMethod<RealT>, public InnerOptimizationWrapper<RealT>
+{
+public:
+ InnerOptimizationWrapperBundleMethod(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C);
+
+ RealT ComputeFunction(const std::vector<RealT> &x);
+ void ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &x);
+ void Report(int iteration, const std::vector<RealT> &x, RealT f, const std::vector<RealT> &g,
+ RealT norm_bound, RealT step_size);
+ void Report(const std::string &s);
+ RealT Minimize(std::vector<RealT> &x0);
+};
+
+#include "InnerOptimizationWrapperBundleMethod.ipp"
+
+#endif
diff --git a/src/InnerOptimizationWrapperBundleMethod.ipp b/src/InnerOptimizationWrapperBundleMethod.ipp
new file mode 100644
index 0000000..f4af9fd
--- /dev/null
+++ b/src/InnerOptimizationWrapperBundleMethod.ipp
@@ -0,0 +1,83 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod.ipp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod::InnerOptimizationWrapperBundleMethod()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InnerOptimizationWrapperBundleMethod<RealT>::InnerOptimizationWrapperBundleMethod(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C) :
+ BundleMethod<double>(1000,C[1]),
+ InnerOptimizationWrapper<RealT>(optimization_wrapper, units, C)
+{
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod::ComputeFunction()
+//
+// Compute the regularized logloss using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperBundleMethod<RealT>::ComputeFunction(const std::vector<RealT> &w)
+{
+ return this->optimization_wrapper->GetComputationWrapper().ComputeFunction(this->units, w, true, true, this->optimization_wrapper->GetOptions().GetRealValue("log_base"));
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod::ComputeSubgradient()
+//
+// Compute the regularized logloss gradient using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperBundleMethod<RealT>::ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &w)
+{
+ g = this->optimization_wrapper->GetComputationWrapper().ComputeGradient(this->units, w, true, true, this->optimization_wrapper->GetOptions().GetRealValue("log_base"));
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod::Report()
+//
+// Routines for printing results and messages from the optimizer.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperBundleMethod<RealT>::Report(int iteration, const std::vector<RealT> &w, RealT f, const std::vector<RealT> &g, RealT norm_bound, RealT step_size)
+{
+ // write results to disk
+ this->optimization_wrapper->GetParameterManager().WriteToFile(SPrintF("optimize.params.iter%d", iteration), w);
+
+ // write results to console
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner iteration %d: f = %lf (%lf), |w| = %lf, |g| = %lf, norm bound = %lf, step = %lf, efficiency = %lf%%",
+ iteration, double(f), double(f - RealT(0.5) * DotProduct(this->C, w*w)),
+ double(Norm(w)), double(step_size), double(Norm(g)), double(norm_bound),
+ double(this->optimization_wrapper->GetComputationEngine().GetEfficiency())));
+}
+
+template<class RealT>
+void InnerOptimizationWrapperBundleMethod<RealT>::Report(const std::string &s)
+{
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner message: %s", s.c_str()));
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperBundleMethod::Minimize()
+//
+// Perform subgradient optimization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperBundleMethod<RealT>::Minimize(std::vector<RealT> &x0)
+{
+ return BundleMethod<RealT>::Minimize(x0);
+}
diff --git a/src/InnerOptimizationWrapperLBFGS.hpp b/src/InnerOptimizationWrapperLBFGS.hpp
new file mode 100644
index 0000000..040606e
--- /dev/null
+++ b/src/InnerOptimizationWrapperLBFGS.hpp
@@ -0,0 +1,45 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS.hpp
+//
+// Inner optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INNEROPTIMIZATIONWRAPPERLBFGS_HPP
+#define INNEROPTIMIZATIONWRAPPERLBFGS_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "LBFGS.hpp"
+
+template<class RealT>
+class LBFGS;
+
+template<class RealT>
+class OptimizationWrapper;
+
+template<class RealT>
+class InnerOptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class InnerOptimizationWrapperLBFGS
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InnerOptimizationWrapperLBFGS : public LBFGS<RealT>, public InnerOptimizationWrapper<RealT>
+{
+ RealT log_base;
+
+public:
+ InnerOptimizationWrapperLBFGS(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C);
+
+ RealT ComputeFunction(const std::vector<RealT> &x);
+ void ComputeGradient(std::vector<RealT> &g, const std::vector<RealT> &x);
+ void Report(int iteration, const std::vector<RealT> &x, RealT f, RealT step_size);
+ void Report(const std::string &s);
+ RealT Minimize(std::vector<RealT> &x0);
+};
+
+#include "InnerOptimizationWrapperLBFGS.ipp"
+
+#endif
diff --git a/src/InnerOptimizationWrapperLBFGS.ipp b/src/InnerOptimizationWrapperLBFGS.ipp
new file mode 100644
index 0000000..6b8d6a7
--- /dev/null
+++ b/src/InnerOptimizationWrapperLBFGS.ipp
@@ -0,0 +1,99 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS.ipp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS::InnerOptimizationWrapperLBFGS()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InnerOptimizationWrapperLBFGS<RealT>::InnerOptimizationWrapperLBFGS(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C) :
+ //LBFGS<RealT>(20, 1e-5, 1000, 1e-4),
+ LBFGS<RealT>(20, 1e-5, 100),
+ InnerOptimizationWrapper<RealT>(optimization_wrapper, units, C),
+ log_base(optimization_wrapper->GetOptions().GetRealValue("log_base"))
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS::ComputeFunction()
+//
+// Compute the regularized logloss using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperLBFGS<RealT>::ComputeFunction(const std::vector<RealT> &w)
+{
+ return this->optimization_wrapper->GetComputationWrapper().ComputeFunction(this->units, w, false, true, log_base) + RealT(0.5) * DotProduct(this->C, w*w) + DotProduct(w, this->bias);
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS::ComputeGradient()
+//
+// Compute the regularized logloss gradient using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperLBFGS<RealT>::ComputeGradient(std::vector<RealT> &g, const std::vector<RealT> &w)
+{
+ g = this->optimization_wrapper->GetComputationWrapper().ComputeGradient(this->units, w, false, true, log_base) + this->C * w + this->bias;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS::Report()
+//
+// Routines for printing results and messages from the optimizer.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperLBFGS<RealT>::Report(int iteration, const std::vector<RealT> &w, RealT f, RealT step_size)
+{
+ // write results to disk
+ this->optimization_wrapper->GetParameterManager().WriteToFile(SPrintF("optimize.params.iter%d", iteration), w);
+
+ // write results to console
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner iteration %d: f = %lf (%lf) [%lf], |w| = %lf, step = %lf, efficiency = %lf%%",
+ iteration, double(f), double(f - RealT(0.5) * DotProduct(this->C, w*w)),
+ double(this->optimization_wrapper->GetComputationWrapper().ComputeFunction(this->units, w, true, true, log_base) + RealT(0.5) * DotProduct(this->C, w*w) + DotProduct(w, this->bias)),
+ double(Norm(w)), double(step_size),
+ double(this->optimization_wrapper->GetComputationEngine().GetEfficiency())));
+}
+
+template<class RealT>
+void InnerOptimizationWrapperLBFGS<RealT>::Report(const std::string &s)
+{
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner message: %s", s.c_str()));
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperLBFGS::Minimize()
+//
+// Perform LBFGS optimization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperLBFGS<RealT>::Minimize(std::vector<RealT> &x0)
+{
+ return LBFGS<RealT>::Minimize(x0);
+
+ /*
+ RealT f = RealT(1e20);
+
+ for (log_base = 1; log_base < RealT(1e6); log_base *= 2)
+ {
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner optimization using LOG_BASE = %lf", double(log_base)));
+ this->optimization_wrapper->Indent();
+ f = LBFGS<RealT>::Minimize(x0);
+ this->optimization_wrapper->Unindent();
+ }
+
+ return f;
+ */
+}
diff --git a/src/InnerOptimizationWrapperSubgradientMethod.hpp b/src/InnerOptimizationWrapperSubgradientMethod.hpp
new file mode 100644
index 0000000..430247d
--- /dev/null
+++ b/src/InnerOptimizationWrapperSubgradientMethod.hpp
@@ -0,0 +1,44 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod.hpp
+//
+// Inner optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INNEROPTIMIZATIONWRAPPERSUBGRADIENTMETHOD_HPP
+#define INNEROPTIMIZATIONWRAPPERSUBGRADIENTMETHOD_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "SubgradientMethod.hpp"
+
+template<class RealT>
+class SubgradientMethod;
+
+template<class RealT>
+class OptimizationWrapper;
+
+template<class RealT>
+class InnerOptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class InnerOptimizationWrapperSubgradientMethod
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InnerOptimizationWrapperSubgradientMethod : public SubgradientMethod<RealT>, public InnerOptimizationWrapper<RealT>
+{
+public:
+ InnerOptimizationWrapperSubgradientMethod(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C);
+
+ RealT ComputeFunction(const std::vector<RealT> &x);
+ void ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &x);
+ void Report(int iteration, const std::vector<RealT> &x, RealT f, const std::vector<RealT> &g,
+ RealT norm_bound, RealT step_size);
+ void Report(const std::string &s);
+ RealT Minimize(std::vector<RealT> &x0);
+};
+
+#include "InnerOptimizationWrapperSubgradientMethod.ipp"
+
+#endif
diff --git a/src/InnerOptimizationWrapperSubgradientMethod.ipp b/src/InnerOptimizationWrapperSubgradientMethod.ipp
new file mode 100644
index 0000000..f2607c7
--- /dev/null
+++ b/src/InnerOptimizationWrapperSubgradientMethod.ipp
@@ -0,0 +1,85 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod.ipp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod::InnerOptimizationWrapperSubgradientMethod()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+InnerOptimizationWrapperSubgradientMethod<RealT>::InnerOptimizationWrapperSubgradientMethod(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<int> &units,
+ const std::vector<RealT> &C) :
+ SubgradientMethod<double>(1000,
+ optimization_wrapper->GetComputationWrapper().ComputeSolutionNormBound(units, C, optimization_wrapper->GetOptions().GetRealValue("log_base")),
+ optimization_wrapper->GetComputationWrapper().ComputeGradientNormBound(units, C, optimization_wrapper->GetOptions().GetRealValue("log_base")),
+ Min(C)),
+ InnerOptimizationWrapper<RealT>(optimization_wrapper, units, C)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod::ComputeFunction()
+//
+// Compute the regularized logloss using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperSubgradientMethod<RealT>::ComputeFunction(const std::vector<RealT> &w)
+{
+ return this->optimization_wrapper->GetComputationWrapper().ComputeFunction(this->units, w, true, true, this->optimization_wrapper->GetOptions().GetRealValue("log_base")) + RealT(0.5) * DotProduct(this->C, w*w) + DotProduct(w, this->bias);
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod::ComputeSubgradient()
+//
+// Compute the regularized logloss gradient using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperSubgradientMethod<RealT>::ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &w)
+{
+ g = this->optimization_wrapper->GetComputationWrapper().ComputeGradient(this->units, w, true, true, this->optimization_wrapper->GetOptions().GetRealValue("log_base")) + this->C * w + this->bias;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod::Report()
+//
+// Routines for printing results and messages from the optimizer.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void InnerOptimizationWrapperSubgradientMethod<RealT>::Report(int iteration, const std::vector<RealT> &w, RealT f, const std::vector<RealT> &g, RealT norm_bound, RealT step_size)
+{
+ // write results to disk
+ this->optimization_wrapper->GetParameterManager().WriteToFile(SPrintF("optimize.params.iter%d", iteration), w);
+
+ // write results to console
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner iteration %d: f = %lf (%lf), |w| = %lf, |g| = %lf, norm bound = %lf, step = %lf, efficiency = %lf%%",
+ iteration, double(f), double(f - RealT(0.5) * DotProduct(this->C, w*w)),
+ double(Norm(w)), double(step_size), double(Norm(g)), double(norm_bound),
+ double(this->optimization_wrapper->GetComputationEngine().GetEfficiency())));
+}
+
+template<class RealT>
+void InnerOptimizationWrapperSubgradientMethod<RealT>::Report(const std::string &s)
+{
+ this->optimization_wrapper->PrintMessage(SPrintF("Inner message: %s", s.c_str()));
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperSubgradientMethod::Minimize()
+//
+// Perform subgradient optimization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT InnerOptimizationWrapperSubgradientMethod<RealT>::Minimize(std::vector<RealT> &x0)
+{
+ return SubgradientMethod<RealT>::Minimize(x0);
+}
diff --git a/src/InnerOptimizationWrapperViterbi.hpp b/src/InnerOptimizationWrapperViterbi.hpp
new file mode 100644
index 0000000..c7faed0
--- /dev/null
+++ b/src/InnerOptimizationWrapperViterbi.hpp
@@ -0,0 +1,44 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi.hpp
+//
+// Inner optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef INNEROPTIMIZATIONWRAPPERVITERBI_HPP
+#define INNEROPTIMIZATIONWRAPPERVITERBI_HPP
+
+#include "Computation.hpp"
+#include "OptimizationWrapper.hpp"
+#include "InnerOptimizationWrapperViterbi.hpp"
+#include "SubgradientDescent.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class InnerOptimizationWrapperViterbi
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class InnerOptimizationWrapperViterbi : public SubgradientDescent<double>
+{
+ OptimizationWrapper *optimizer;
+ const std::vector<int> units;
+ const std::vector<double> C;
+ std::vector<double> best_x;
+ std::vector<double> bias;
+ double best_f;
+
+public:
+ InnerOptimizationWrapperViterbi(OptimizationWrapper *optimizer,
+ const std::vector<int> &units,
+ const std::vector<double> &C);
+
+ void LoadBias(const std::vector<double> &bias);
+ double ComputeFunction(const std::vector<double> &x);
+ void ComputeSubgradient(std::vector<double> &g, const std::vector<double> &x);
+ void Report(int iteration, double f, const std::vector<double> &x, const std::vector<double> &g,
+ double norm_bound, double step_size);
+ void Report(const std::string &s);
+};
+
+#include "InnerOptimizationWrapperViterbi.ipp"
+
+#endif
diff --git a/src/InnerOptimizationWrapperViterbi.ipp b/src/InnerOptimizationWrapperViterbi.ipp
new file mode 100644
index 0000000..6b0cbb4
--- /dev/null
+++ b/src/InnerOptimizationWrapperViterbi.ipp
@@ -0,0 +1,89 @@
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi.cpp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+#include "InnerOptimizationWrapperViterbi.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi::InnerOptimizationWrapperViterbi()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+InnerOptimizationWrapperViterbi::InnerOptimizationWrapperViterbi(OptimizationWrapper *optimizer,
+ const std::vector<int> &units,
+ const std::vector<double> &C) :
+ SubgradientDescent<double>(1000,
+ optimizer->computation.ComputeParameterNormBound(units, optimizer->params.ExpandHyperparameters(C)),
+ optimizer->computation.ComputeGradientNormBound(units, optimizer->params.ExpandHyperparameters(C)),
+ Min(optimizer->params.ExpandHyperparameters(C))),
+ optimizer(optimizer), units(units), C(C), bias(optimizer->params.GetNumParameters()), best_f(std::numeric_limits<double>::infinity())
+{}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi::LoadBias()
+//
+// Load linear bias.
+//////////////////////////////////////////////////////////////////////
+
+void InnerOptimizationWrapperViterbi::LoadBias(const std::vector<double> &bias)
+{
+ this->bias = bias;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi::ComputeFunction()
+//
+// Compute the regularized logloss using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+double InnerOptimizationWrapperViterbi::ComputeFunction(const std::vector<double> &x)
+{
+ std::vector<double> Ce = optimizer->params.ExpandHyperparameters(C);
+ return optimizer->computation.ComputeFunction(units, x + optimizer->base_values, false) + 0.5 * DotProduct(Ce, x*x) + DotProduct(x, bias);
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi::ComputeSubgradient()
+//
+// Compute the regularized logloss gradient using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+void InnerOptimizationWrapperViterbi::ComputeSubgradient(std::vector<double> &g, const std::vector<double> &x)
+{
+ std::vector<double> Ce = optimizer->params.ExpandHyperparameters(C);
+ g = optimizer->computation.ComputeGradient(units, x + optimizer->base_values, false) + Ce * x + bias;
+}
+
+//////////////////////////////////////////////////////////////////////
+// InnerOptimizationWrapperViterbi::Report()
+//
+// Routines for printing results and messages from the optimizer.
+//////////////////////////////////////////////////////////////////////
+
+void InnerOptimizationWrapperViterbi::Report(int iteration, double f, const std::vector<double> &x, const std::vector<double> &g,
+ double norm_bound, double step_size)
+{
+ // write results to disk
+
+ best_f = f;
+ best_x = x;
+ optimizer->params.WriteToFile(SPrintF("optimize.params.iter%d", iteration), best_x + optimizer->base_values);
+
+ // write results to console
+
+ std::vector<double> Ce = optimizer->params.ExpandHyperparameters(C);
+ const double unregularized = f - 0.5 * DotProduct(Ce, x*x);
+ optimizer->PrintMessage(SPrintF("Inner iteration %d: f = %lf (%lf), |x| = %lf, step = %lf, efficiency = %lf%%",
+ iteration, f, unregularized, Norm(x), step_size, optimizer->computation.GetEfficiency()));
+}
+
+void InnerOptimizationWrapperViterbi::Report(const std::string &s)
+{
+ optimizer->PrintMessage(SPrintF("Inner message: %s", s.c_str()));
+}
+
diff --git a/src/LBFGS.hpp b/src/LBFGS.hpp
new file mode 100644
index 0000000..88d4779
--- /dev/null
+++ b/src/LBFGS.hpp
@@ -0,0 +1,54 @@
+//////////////////////////////////////////////////////////////////////
+// LBFGS.hpp
+//
+// This file contains an implementation of the standard
+// limited-memory BFGS optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef LBFGS_HPP
+#define LBFGS_HPP
+
+#include <vector>
+#include "Utilities.hpp"
+#include "LineSearch.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// LBFGS()
+//
+// Implementation of L-BFGS optimization routine.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+class LBFGS : public LineSearch<Real>
+{
+ const int M;
+ const Real TERMINATION_RATIO;
+ const int MAX_ITERATIONS;
+ const Real SMALL_STEP_RATIO;
+ const int MAX_SMALL_STEPS;
+ const Real MAX_STEP_NORM;
+
+public:
+ LBFGS
+ (
+ const int M = 20, // number of previous gradients to remember
+ const Real TERMINATION_RATIO = Real(1e-5), // required ratio of gradient norm to parameter norm for termination
+ const int MAX_ITERATIONS = 1000, // maximum number of iterations to run L-BFGS
+ const Real SMALL_STEP_RATIO = Real(1e-5), // ratio beneath which steps are considered "small"
+ const int MAX_SMALL_STEPS = 3, // maximum number of small steps before we quit
+ const Real MAX_STEP_NORM = Real(1e10) // maximum norm for a single step
+ );
+
+ virtual ~LBFGS() {}
+
+ Real Minimize(std::vector<Real> &x0);
+
+ virtual double ComputeFunction(const std::vector<double> &x) = 0;
+ virtual void ComputeGradient(std::vector<double> &g, const std::vector<double> &x) = 0;
+ virtual void Report(int iteration, const std::vector<double> &x, double f, double step_size) = 0;
+ virtual void Report(const std::string &s) = 0;
+};
+
+#include "LBFGS.ipp"
+
+#endif
diff --git a/src/LBFGS.ipp b/src/LBFGS.ipp
new file mode 100644
index 0000000..35aa875
--- /dev/null
+++ b/src/LBFGS.ipp
@@ -0,0 +1,207 @@
+//////////////////////////////////////////////////////////////////////
+// LBFGS.ipp
+//
+// This file contains an implementation of the standard
+// limited-memory BFGS optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// LBFGS::LBFGS()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+LBFGS<Real>::LBFGS
+(
+ const int M, // number of previous gradients to remember
+ const Real TERMINATION_RATIO, // required ratio of gradient norm to parameter norm for termination
+ const int MAX_ITERATIONS, // maximum number of iterations to run L-BFGS
+ const Real SMALL_STEP_RATIO, // ratio beneath which steps are considered "small"
+ const int MAX_SMALL_STEPS, // maximum number of small steps before we quit
+ const Real MAX_STEP_NORM // maximum norm for a single step
+) :
+ LineSearch<Real>(),
+ M(M),
+ TERMINATION_RATIO(TERMINATION_RATIO),
+ MAX_ITERATIONS(MAX_ITERATIONS),
+ SMALL_STEP_RATIO(SMALL_STEP_RATIO),
+ MAX_SMALL_STEPS(MAX_SMALL_STEPS),
+ MAX_STEP_NORM(MAX_STEP_NORM)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// LBFGS::Minimize()
+//
+// Implementation of L-BFGS optimization routine.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+Real LBFGS<Real>::Minimize(std::vector<Real> &x0)
+{
+ // initialize
+
+ const int n = int(x0.size());
+ std::vector<Real> f(2);
+ std::vector<Real> gamma(2);
+ std::vector<std::vector<Real> > x(2, std::vector<Real>(n));
+ std::vector<std::vector<Real> > g(2, std::vector<Real>(n));
+ std::vector<std::vector<Real> > s(M, std::vector<Real>(n));
+ std::vector<std::vector<Real> > y(M, std::vector<Real>(n));
+ std::vector<Real> rho(M);
+ Real gradient_ratio;
+ Real f0;
+
+ // check for termination criteria at beginning
+
+ x[0] = x0;
+ f[0] = f0 = ComputeFunction(x[0]);
+
+ if (f[0] > Real(1e20))
+ {
+ Report(SPrintF("Termination before optimization: function value too big (%lf > %lf)", f[0], 1e20));
+ return f[0];
+ }
+
+ ComputeGradient(g[0], x[0]);
+ gradient_ratio = Norm(g[0]) / std::max(Real(1), Norm(x[0]));
+ if (gradient_ratio < TERMINATION_RATIO)
+ {
+ Report(SPrintF("Termination before optimization: gradient vector small (%lf < %lf)", gradient_ratio, TERMINATION_RATIO));
+ return f[0];
+ }
+
+ // initial scaling
+
+ gamma[0] = Real(1) / Norm(g[0]);
+
+ // report initial iteration
+
+ Report(0, x[0], f[0], 0);
+
+ // main loop
+
+ bool progress_made = false;
+ int num_consecutive_small_steps = 0;
+ int k = 0;
+
+ while (true)
+ {
+ // compute search direction, d = -H[k] g[k]
+
+ std::vector<Real> d(-g[k%2]);
+ std::vector<Real> a(M);
+ for (int i = k-1; i >= k-M; i--)
+ {
+ a[(i+M)%M] = rho[(i+M)%M] * DotProduct(s[(i+M)%M], d);
+ d -= a[(i+M)%M] * y[(i+M)%M];
+ }
+
+ d *= gamma[k%2];
+
+ for (int i = k-M; i <= k-1; i++)
+ {
+ Real b = rho[(i+M)%M] * DotProduct(y[(i+M)%M], d);
+ d += (a[(i+M)%M] - b) * s[(i+M)%M];
+ }
+
+ // perform line search, update f, and take step
+
+ Real step = DoLineSearch(x[k%2], f[k%2], g[k%2], d,
+ x[(k+1)%2], f[(k+1)%2], g[(k+1)%2],
+ Real(0), std::min(Real(10), MAX_STEP_NORM / std::max(Real(1), Norm(d))));
+
+ Report(k+1, x[(k+1)%2], f[(k+1)%2], step);
+
+ // check termination conditions
+
+ if (k+1 >= MAX_ITERATIONS)
+ {
+ Report("Termination condition: maximum number of iterations reached");
+ break;
+ }
+
+ // check gradient termination condition
+
+ gradient_ratio = Norm(g[(k+1)%2]) / std::max(Real(1), Norm(x[(k+1)%2]));
+ if (gradient_ratio < TERMINATION_RATIO)
+ {
+ Report(SPrintF("Termination condition: gradient vector small (%lf < %lf)", gradient_ratio, TERMINATION_RATIO));
+ break;
+ }
+
+ // heuristics for detecting slow progress (needed for large-scale
+ // problems due to floating-point precision problems in gradient
+ // computation)
+
+ // check for slow progress
+
+ if (step == Real(0))
+ num_consecutive_small_steps = MAX_SMALL_STEPS;
+ else if ((f[k%2] - f[(k+1)%2]) / std::max(Real(1), f0 - f[(k+1)%2]) < SMALL_STEP_RATIO)
+ num_consecutive_small_steps++;
+ else
+ {
+ num_consecutive_small_steps = 0;
+ progress_made = true;
+ }
+
+ // if we're making slow progress
+
+ if (num_consecutive_small_steps == MAX_SMALL_STEPS)
+ {
+ // give us a second chance if we made some
+ // progress since the last restart
+
+ if (M > 0 && progress_made)
+ {
+ progress_made = false;
+ num_consecutive_small_steps = 0;
+ Report("Restart: Too many consecutive small steps");
+
+ for (int i = 0; i < M; i++)
+ {
+ std::fill(s[i].begin(), s[i].end(), Real(0));
+ std::fill(y[i].begin(), y[i].end(), Real(0));
+ rho[i] = Real(0);
+ }
+ }
+ else
+ {
+ Report("Termination: Too many consecutive small steps");
+ break;
+ }
+ }
+
+ // update iterates
+
+ s[k%M] = x[(k+1)%2] - x[k%2];
+ y[k%M] = g[(k+1)%2] - g[k%2];
+ rho[k%M] = Real(1) / DotProduct(y[k%M], s[k%M]);
+
+ // skip update if non-positive-definite Hessian update
+ // (setting all of these quantities to zero is equivalent
+ // to skipping the update, based on the BFGS recursions)
+
+ if (!std::isfinite(rho[k%M]) || rho[k%M] <= Real(0))
+ {
+ std::fill(s[k%M].begin(), s[k%M].end(), Real(0));
+ std::fill(y[k%M].begin(), y[k%M].end(), Real(0));
+ rho[k%M] = Real(0);
+ }
+
+ // update scaling factor
+
+ gamma[(k+1)%2] = DotProduct(s[(k-1+M)%M], y[(k-1+M)%M]) / DotProduct(y[(k-1+M)%M], y[(k-1+M)%M]);
+ if (!std::isfinite(gamma[(k+1)%2]))
+ {
+ gamma[(k+1)%2] = gamma[k%2];
+ }
+
+ ++k;
+ }
+
+ x0 = x[(k+1)%2];
+ return f[(k+1)%2];
+}
+
diff --git a/src/LineSearch.hpp b/src/LineSearch.hpp
new file mode 100644
index 0000000..ecea558
--- /dev/null
+++ b/src/LineSearch.hpp
@@ -0,0 +1,72 @@
+//////////////////////////////////////////////////////////////////////
+// LineSearch.hpp
+//
+// This file contains an implementation of a safeguarded
+// backtracking line search algorithm for cubic polynomial
+// interpolation.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef LINESEARCH_HPP
+#define LINESEARCH_HPP
+
+#include <vector>
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// LineSearchFunctor()
+//
+// Implementation of backtracking line-search. Returns a
+// value of t, corresponding to the step size needed for
+// updating
+//
+// x <-- x + t*d
+//
+// As a side effect, this function also updates the value of
+// the function f(x) to its new value f(x + t*d).
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+class LineSearch
+{
+ const Real T_INIT;
+ const Real MU;
+ const Real MIN_IMPROVEMENT_RATIO;
+ const int MAX_EVALUATIONS;
+ const Real GAMMA1;
+ const Real GAMMA2;
+
+public:
+ LineSearch
+ (
+ const Real T_INIT = Real(1), // initial step size
+ const Real MU = Real(0.001), // phi(t) <= phi(0) + MU * phi'(0) t (sufficient decrease)
+ const Real MIN_IMPROVEMENT_RATIO = Real(0.1), // minimum proportion of overall improvement required to keep going
+ const int MAX_EVALUATIONS = 10, // maximum number of function evaluations
+ const Real GAMMA1 = Real(0.01), // maximum step length shrinkage
+ const Real GAMMA2 = Real(0.8) // minimum step length shrinkage
+ );
+
+ virtual ~LineSearch() {}
+
+ Real DoLineSearch
+ (
+ const std::vector<Real> &x, // initial parameters
+ const Real f, // initial function value
+ const std::vector<Real> &g, // initial gradient vector
+ const std::vector<Real> &d, // initial direction vector
+
+ std::vector<Real> &new_x, // new parameters
+ Real &new_f, // new function value
+ std::vector<Real> &new_g, // new gradient vector
+
+ const Real T_MIN, // minimum step size
+ const Real T_MAX // maximum step size
+ );
+
+ virtual double ComputeFunction(const std::vector<double> &x) = 0;
+ virtual void ComputeGradient(std::vector<double> &g, const std::vector<double> &x) = 0;
+};
+
+#include "LineSearch.ipp"
+
+#endif
diff --git a/src/LineSearch.ipp b/src/LineSearch.ipp
new file mode 100644
index 0000000..1eb7e75
--- /dev/null
+++ b/src/LineSearch.ipp
@@ -0,0 +1,168 @@
+//////////////////////////////////////////////////////////////////////
+// LineSearch.ipp
+//
+// This file contains an implementation of a safeguarded
+// backtracking line search algorithm for cubic polynomial
+// interpolation.
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+LineSearch<Real>::LineSearch
+(
+ const Real T_INIT, // initial step size
+ const Real MU, // phi(t) <= phi(0) + MU * phi'(0) t (sufficient decrease)
+ const Real MIN_IMPROVEMENT_RATIO, // minimum proportion of overall improvement required to keep going
+ const int MAX_EVALUATIONS, // maximum number of function evaluations
+ const Real GAMMA1, // maximum step length shrinkage
+ const Real GAMMA2 // minimum step length shrinkage
+) :
+ T_INIT(T_INIT),
+ MU(MU),
+ MIN_IMPROVEMENT_RATIO(MIN_IMPROVEMENT_RATIO),
+ MAX_EVALUATIONS(MAX_EVALUATIONS),
+ GAMMA1(GAMMA1),
+ GAMMA2(GAMMA2)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// UpdateQuoc()
+//
+// UpdateQuoc best line search result seen so far.
+//////////////////////////////////////////////////////////////////////
+
+#define UpdateQuoc(t_new, f_new) \
+{ \
+ if (f_new < f_best) \
+ { \
+ f_best = f_new; \
+ t_best = t_new; \
+ } \
+ if (f_new <= f + MU * t_new * dot_prod) \
+ sufficient_decrease = true; \
+}
+
+//////////////////////////////////////////////////////////////////////
+// DoLineSearch()
+//
+// Implementation of backtracking line-search. Returns a
+// value of t, corresponding to the step size needed for
+// updating
+//
+// x <-- x + t*d
+//
+// As a side effect, this function also updates the value of
+// the function f(x) to its new value f(x + t*d).
+//////////////////////////////////////////////////////////////////////
+
+template<class Real>
+Real LineSearch<Real>::DoLineSearch
+(
+ const std::vector<Real> &x, // initial parameters
+ const Real f, // initial function value
+ const std::vector<Real> &g, // initial gradient vector
+ const std::vector<Real> &d, // initial direction vector
+
+ std::vector<Real> &new_x, // new parameters
+ Real &new_f, // new function value
+ std::vector<Real> &new_g, // new gradient vector
+
+ const Real T_MIN, // minimum step size
+ const Real T_MAX // maximum step size
+)
+{
+ Assert(T_MIN <= T_MAX, "Line search called with T_MIN > T_MAX.");
+ const Real dot_prod = DotProduct(d, g);
+ bool sufficient_decrease = false;
+
+ // try initial point
+
+ Real t_best = Real(0), t_last = T_INIT, t_prev = Real(0);
+ t_last = std::max(T_MIN, t_last);
+ t_last = std::min(T_MAX, t_last);
+ Real f_best = f, f_last = ComputeFunction(x + t_last * d), f_prev = Real(0);
+ UpdateQuoc(t_last, f_last);
+
+ for (int iteration = 2; iteration <= MAX_EVALUATIONS; ++iteration)
+ {
+ // termination criteria
+
+ if (sufficient_decrease && iteration > 2 && (f_prev - f_last) / std::max(Real(1), f - f_best) < MIN_IMPROVEMENT_RATIO) break;
+
+ Real t_new;
+
+ if (iteration == 2)
+ {
+
+ // fit using a quadratic: at^2 + bt + c
+ //
+ // This function must be equal to f at t=0, f_last at t=t_last,
+ // and must match the directional derivative of f at t=0.
+
+ Real a = ((f_last - f) / t_last - dot_prod) / t_last;
+ Real b = dot_prod;
+
+ t_new = -b / (Real(2)*a);
+
+ }
+ else
+ {
+
+ // fit using a cubic: at^3 + bt^2 + ct + d
+ //
+ // This function must be equal to f at t=0, f_last at t=t_last,
+ // f_prev at t=t_prev, and must match the directional derivative
+ // of f at t=0.
+ //
+ // c and d are easily obtained. The remaining two terms amount
+ // to solving a linear system of the form
+ //
+ // [ t_prev^3 t_prev^2 ] [ a ] = [ f_prev - d - c t_prev ]
+ // [ t_last^3 t_last^2 ] [ b ] [ f_last - d - c t_last ]
+
+ Real c = dot_prod;
+ Real d = f;
+
+ Real f_l = f_last - d - c * t_last;
+ Real f_p = f_prev - d - c * t_prev;
+
+ Real a = (f_l / (t_last * t_last) - f_p / (t_prev * t_prev)) / (t_last - t_prev);
+ Real b = (f_l / (t_last * t_last * t_last) - f_p / (t_prev * t_prev * t_prev)) / (Real(1) / t_last - Real(1) / t_prev);
+
+ Real A = Real(3)*a;
+ Real B = Real(2)*b;
+ Real C = c;
+
+ t_new = (-B + sqrt(B*B - Real(4)*A*C)) / (Real(2)*A); // pick the left root
+ }
+
+ // use safe-guarding: clip t to a safe range and evaluate function
+
+ if (!std::isfinite(t_new)) t_new = t_last;
+
+ Real lower_bound = T_MIN;
+ Real upper_bound = T_MAX;
+
+ if (iteration > 3)
+ {
+ lower_bound = std::max(lower_bound, GAMMA1 * t_last);
+ upper_bound = std::min(upper_bound, GAMMA2 * t_last);
+ }
+
+ if (lower_bound > upper_bound) break;
+
+ t_new = std::max(lower_bound, t_new);
+ t_new = std::min(upper_bound, t_new);
+
+ // now, move to this point and update iterates
+
+ Real f_new = ComputeFunction(x + t_new * d);
+ UpdateQuoc(t_new, f_new);
+ t_prev = t_last; f_prev = f_last;
+ t_last = t_new; f_last = f_new;
+ }
+
+ new_f = f_best;
+ new_x = x + t_best * d;
+ ComputeGradient(new_g, new_x);
+ return t_best;
+}
diff --git a/src/LogSpace.hpp b/src/LogSpace.hpp
new file mode 100644
index 0000000..a5c7f34
--- /dev/null
+++ b/src/LogSpace.hpp
@@ -0,0 +1,280 @@
+//////////////////////////////////////////////////////////////////////
+// LogSpace.hpp
+//
+// Routines for dealing with numbers in log space.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef LOGSPACE_HPP
+#define LOGSPACE_HPP
+
+#include "Utilities.hpp"
+
+#define NEG_INF -2e20
+
+//////////////////////////////////////////////////////////////////////
+// Fast_Exp
+//
+// Fast exponentiation using Chebyshev approximating polynomials,
+// optimized for negative inputs.
+//////////////////////////////////////////////////////////////////////
+
+inline double Fast_Exp(double x)
+{
+ if (x <= double(NEG_INF/2)) return 0;
+ return exp(x);
+}
+
+inline float Fast_Exp(float x)
+{
+ // Bounds for tolerance of 4.96e-05: (-9.91152, 0)
+ // Approximating interval: (-9.91152, -5.86228) --> ((T(0.0000803850)*x+T(0.0021627428))*x+T(0.0194708555))*x+T(0.0588080014);
+ // Approximating interval: (-5.86228, -3.83966) --> ((T(0.0013889414)*x+T(0.0244676474))*x+T(0.1471290604))*x+T(0.3042757740);
+ // Approximating interval: (-3.83966, -2.4915) --> ((T(0.0072335607)*x+T(0.0906002677))*x+T(0.3983111356))*x+T(0.6245959221);
+ // Approximating interval: (-2.4915, -1.48054) --> ((T(0.0232410351)*x+T(0.2085645908))*x+T(0.6906367911))*x+T(0.8682322329);
+ // Approximating interval: (-1.48054, -0.672505) --> ((T(0.0573782771)*x+T(0.3580258429))*x+T(0.9121133217))*x+T(0.9793091728);
+ // Approximating interval: (-0.672505, -3.9145e-11) --> ((T(0.1199175927)*x+T(0.4815668234))*x+T(0.9975991939))*x+T(0.9999505077);
+ // 6 polynomials needed.
+
+ if (x < float(-2.4915033807))
+ {
+ if (x < float(-5.8622823336))
+ {
+ if (x < float(-9.91152))
+ return float(0);
+ return ((float(0.0000803850)*x+float(0.0021627428))*x+float(0.0194708555))*x+float(0.0588080014);
+ }
+ if (x < float(-3.8396630909))
+ return ((float(0.0013889414)*x+float(0.0244676474))*x+float(0.1471290604))*x+float(0.3042757740);
+ return ((float(0.0072335607)*x+float(0.0906002677))*x+float(0.3983111356))*x+float(0.6245959221);
+ }
+ if (x < float(-0.6725053211))
+ {
+ if (x < float(-1.4805375919))
+ return ((float(0.0232410351)*x+float(0.2085645908))*x+float(0.6906367911))*x+float(0.8682322329);
+ return ((float(0.0573782771)*x+float(0.3580258429))*x+float(0.9121133217))*x+float(0.9793091728);
+ }
+ if (x < float(0))
+ return ((float(0.1199175927)*x+float(0.4815668234))*x+float(0.9975991939))*x+float(0.9999505077);
+ return (x > float(46.052) ? float(1e20) : expf(x));
+}
+
+//////////////////////////////////////////////////////////////////////
+// Fast_LogExpPlusOne()
+//
+// Fast log(exp(x)+1) using Chebyshev approximating polynomials.
+//////////////////////////////////////////////////////////////////////
+
+inline double Fast_LogExpPlusOne(double x)
+{
+ Assert(double(0) <= x && x <= double(30), "Argument out-of-range.");
+ return log (exp(x) + double(1));
+}
+
+inline float Fast_LogExpPlusOne(float x){
+
+ // Bounds for tolerance of 7.05e-06: (0, 11.8625)
+ // Approximating interval: (0, 0.661537) --> ((T(-0.0065591595)*x+T(0.1276442762))*x+T(0.4996554598))*x+T(0.6931542306);
+ // Approximating interval: (0.661537, 1.63202) --> ((T(-0.0155157557)*x+T(0.1446775699))*x+T(0.4882939746))*x+T(0.6958092989);
+ // Approximating interval: (1.63202, 2.49126) --> ((T(-0.0128909247)*x+T(0.1301028251))*x+T(0.5150398748))*x+T(0.6795585882);
+ // Approximating interval: (2.49126, 3.37925) --> ((T(-0.0072142647)*x+T(0.0877540853))*x+T(0.6208708362))*x+T(0.5909675829);
+ // Approximating interval: (3.37925, 4.42617) --> ((T(-0.0031455354)*x+T(0.0467229449))*x+T(0.7592532310))*x+T(0.4348794399);
+ // Approximating interval: (4.42617, 5.78907) --> ((T(-0.0010110698)*x+T(0.0185943421))*x+T(0.8831730747))*x+T(0.2523695427);
+ // Approximating interval: (5.78907, 7.81627) --> ((T(-0.0001962780)*x+T(0.0046084408))*x+T(0.9634431978))*x+T(0.0983148903);
+ // Approximating interval: (7.81627, 11.8625) --> ((T(-0.0000113994)*x+T(0.0003734731))*x+T(0.9959107193))*x+T(0.0149855051);
+ // 8 polynomials needed.
+
+ Assert(float(0.0000000000) <= x && x <= float(11.8624794162), "Argument out-of-range.");
+ if (x < float(3.3792499610))
+ {
+ if (x < float(1.6320158198))
+ {
+ if (x < float(0.6615367791))
+ return ((float(-0.0065591595)*x+float(0.1276442762))*x+float(0.4996554598))*x+float(0.6931542306);
+ return ((float(-0.0155157557)*x+float(0.1446775699))*x+float(0.4882939746))*x+float(0.6958092989);
+ }
+ if (x < float(2.4912588184))
+ return ((float(-0.0128909247)*x+float(0.1301028251))*x+float(0.5150398748))*x+float(0.6795585882);
+ return ((float(-0.0072142647)*x+float(0.0877540853))*x+float(0.6208708362))*x+float(0.5909675829);
+ }
+ if (x < float(5.7890710412))
+ {
+ if (x < float(4.4261691294))
+ return ((float(-0.0031455354)*x+float(0.0467229449))*x+float(0.7592532310))*x+float(0.4348794399);
+ return ((float(-0.0010110698)*x+float(0.0185943421))*x+float(0.8831730747))*x+float(0.2523695427);
+ }
+ if (x < float(7.8162726752))
+ return ((float(-0.0001962780)*x+float(0.0046084408))*x+float(0.9634431978))*x+float(0.0983148903);
+ return ((float(-0.0000113994)*x+float(0.0003734731))*x+float(0.9959107193))*x+float(0.0149855051);
+
+ /*
+ // Bounds for tolerance of 9.99e-05: (0, 9.21129)
+ // Approximating interval: (0, 1.40131) --> ((T(-0.0118287252)*x+T(0.1342168806))*x+T(0.4976005362))*x+T(0.6932470806);
+ // Approximating interval: (1.40131, 3.06792) --> ((T(-0.0117040733)*x+T(0.1232945547))*x+T(0.5276092444))*x+T(0.6721240615);
+ // Approximating interval: (3.06792, 5.15409) --> ((T(-0.0027005983)*x+T(0.0419040665))*x+T(0.7762991688))*x+T(0.4152395732);
+ // Approximating interval: (5.15409, 9.21129) --> ((T(-0.0001617326)*x+T(0.0040111354))*x+T(0.9666890441))*x+T(0.0929363811);
+ // 4 polynomials needed.
+
+ Assert(float(0.0000000000) <= x && x <= float(9.2112909219), "Argument out-of-range.");
+ if (x < float(3.0679202382))
+ {
+ if (x < float(1.4013117629))
+ return ((float(-0.0118287252)*x+float(0.1342168806))*x+float(0.4976005362))*x+float(0.6932470806);
+ return ((float(-0.0117040733)*x+float(0.1232945547))*x+float(0.5276092444))*x+float(0.6721240615);
+ }
+ if (x < float(5.1540922927))
+ return ((float(-0.0027005983)*x+float(0.0419040665))*x+float(0.7762991688))*x+float(0.4152395732);
+ return ((float(-0.0001617326)*x+float(0.0040111354))*x+float(0.9666890441))*x+float(0.0929363811);
+ */
+}
+
+//////////////////////////////////////////////////////////////////////
+// Fast_LogExpMinusOne()
+//
+// Fast log(exp(x)-1) using Chebyshev approximating polynomials.
+//////////////////////////////////////////////////////////////////////
+
+inline double Fast_LogExpMinusOne(double x)
+{
+ Assert(double(0) <= x && x <= double(30), "Argument out-of-range.");
+ return log(exp(x) - double(1));
+}
+
+inline float Fast_LogExpMinusOne(float x)
+{
+ // Bounds for tolerance of 9.07e-06: (0.01, 11.6105)
+ // Approximating interval: (0.01, 0.0159005) --> (((T(-9371727.3239750639)*x+T(645158.5209300558))*x+T(-18614.2673037550))*x+T(316.6449790062))*x+T(-6.4566212567);
+ // Approximating interval: (0.0159005, 0.0252825) --> (((T(-1466149.1313003162)*x+T(160485.3209227881))*x+T(-7362.4729488413))*x+T(199.3272540294))*x+T(-5.9928567315);
+ // Approximating interval: (0.0252825, 0.0402005) --> (((T(-229370.0200164427)*x+T(39921.2557091097))*x+T(-2912.0525520632))*x+T(125.5447724820))*x+T(-5.5290922070);
+ // Approximating interval: (0.0402005, 0.0639207) --> (((T(-35883.5301576035)*x+T(9930.5447448950))*x+T(-1151.7784915849))*x+T(79.1421117756))*x+T(-5.0653276832);
+ // Approximating interval: (0.0639207, 0.101637) --> (((T(-5613.7580302035)*x+T(2470.2559349975))*x+T(-455.5375751385))*x+T(49.9589387475))*x+T(-4.6015631591);
+ // Approximating interval: (0.101637, 0.161608) --> (((T(-878.2382307251)*x+T(614.4843546811))*x+T(-180.1535174345))*x+T(31.6053018071))*x+T(-4.1377986348);
+ // Approximating interval: (0.161608, 0.256964) --> (((T(-137.3952630777)*x+T(152.8550193085))*x+T(-71.2309051609))*x+T(20.0624860440))*x+T(-3.6740341093);
+ // Approximating interval: (0.256964, 0.408586) --> (((T(-21.4949296304)*x+T(38.0231865170))*x+T(-28.1487599617))*x+T(12.8030746284))*x+T(-3.2102695618);
+ // Approximating interval: (0.408586, 0.649671) --> (((T(-3.3630233327)*x+T(9.4583683684))*x+T(-11.1084741802))*x+T(8.2375430535))*x+T(-2.7465046655);
+ // Approximating interval: (0.649671, 1.033) --> (((T(-0.5263704166)*x+T(2.3527028558))*x+T(-4.3684611166))*x+T(5.3661953909))*x+T(-2.2827344486);
+ // Approximating interval: (1.033, 1.64244) --> (((T(-0.0825320025)*x+T(0.5848902657))*x+T(-1.7021855216))*x+T(3.5600909460))*x+T(-1.8188898105);
+ // Approximating interval: (1.64244, 2.61014) --> (((T(-0.0129828609)*x+T(0.1444031861))*x+T(-0.6457761629))*x+T(2.4222036000))*x+T(-1.3542078422);
+ // Approximating interval: (2.61014, 4.13932) --> (((T(-0.0019714155)*x+T(0.0335196695))*x+T(-0.2229718131))*x+T(1.6981586065))*x+T(-0.8841398010);
+ // Approximating interval: (4.13932, 6.61007) --> (((T(-0.0002180722)*x+T(0.0055578600))*x+T(-0.0541456822))*x+T(1.2404314137))*x+T(-0.4137049114);
+ // Approximating interval: (6.61007, 11.6105) --> (((T(-0.0000062593)*x+T(0.0002549731))*x+T(-0.0039028514))*x+T(1.0266538999))*x+T(-0.0686856567);
+ // 15 polynomials needed.
+
+ Assert(float(0) <= x && x <= float(11.6105428289), "Argument out-of-range.");
+ if (x < float(0.2569641966))
+ {
+ if (x < float(0.0402004692))
+ {
+ if (x < float(0.0159004851))
+ {
+ if (x < float(0.01))
+ return Log (Exp(x) - float(1));
+ return (((float(-9371727.3239750639)*x+float(645158.5209300558))*x+float(-18614.2673037550))*x+float(316.6449790062))*x+float(-6.4566212567);
+ }
+ if (x < float(0.0252825426))
+ return (((float(-1466149.1313003162)*x+float(160485.3209227881))*x+float(-7362.4729488413))*x+float(199.3272540294))*x+float(-5.9928567315);
+ return (((float(-229370.0200164427)*x+float(39921.2557091097))*x+float(-2912.0525520632))*x+float(125.5447724820))*x+float(-5.5290922070);
+ }
+ if (x < float(0.1016370074))
+ {
+ if (x < float(0.0639206961))
+ return (((float(-35883.5301576035)*x+float(9930.5447448950))*x+float(-1151.7784915849))*x+float(79.1421117756))*x+float(-5.0653276832);
+ return (((float(-5613.7580302035)*x+float(2470.2559349975))*x+float(-455.5375751385))*x+float(49.9589387475))*x+float(-4.6015631591);
+ }
+ if (x < float(0.1616077721))
+ return (((float(-878.2382307251)*x+float(614.4843546811))*x+float(-180.1535174345))*x+float(31.6053018071))*x+float(-4.1377986348);
+ return (((float(-137.3952630777)*x+float(152.8550193085))*x+float(-71.2309051609))*x+float(20.0624860440))*x+float(-3.6740341093);
+ }
+ if (x < float(1.6424387600))
+ {
+ if (x < float(0.6496706424))
+ {
+ if (x < float(0.4085855305))
+ return (((float(-21.4949296304)*x+float(38.0231865170))*x+float(-28.1487599617))*x+float(12.8030746284))*x+float(-3.2102695618);
+ return (((float(-3.3630233327)*x+float(9.4583683684))*x+float(-11.1084741802))*x+float(8.2375430535))*x+float(-2.7465046655);
+ }
+ if (x < float(1.0330037540))
+ return (((float(-0.5263704166)*x+float(2.3527028558))*x+float(-4.3684611166))*x+float(5.3661953909))*x+float(-2.2827344486);
+ return (((float(-0.0825320025)*x+float(0.5848902657))*x+float(-1.7021855216))*x+float(3.5600909460))*x+float(-1.8188898105);
+ }
+ if (x < float(4.1393216929))
+ {
+ if (x < float(2.6101444897))
+ return (((float(-0.0129828609)*x+float(0.1444031861))*x+float(-0.6457761629))*x+float(2.4222036000))*x+float(-1.3542078422);
+ return (((float(-0.0019714155)*x+float(0.0335196695))*x+float(-0.2229718131))*x+float(1.6981586065))*x+float(-0.8841398010);
+ }
+ if (x < float(6.6100708779))
+ return (((float(-0.0002180722)*x+float(0.0055578600))*x+float(-0.0541456822))*x+float(1.2404314137))*x+float(-0.4137049114);
+ return (((float(-0.0000062593)*x+float(0.0002549731))*x+float(-0.0039028514))*x+float(1.0266538999))*x+float(-0.0686856567);
+}
+
+//////////////////////////////////////////////////////////////////////
+// Fast_LogAdd()
+// Fast_LogPlusEquals()
+//
+// Compute log(exp(x)+exp(y)).
+//////////////////////////////////////////////////////////////////////
+
+inline double Fast_LogAdd(double x, double y)
+{
+ if (x < y) std::swap (x, y);
+ if (y <= double(NEG_INF/2) || x-y >= double(30)) return x;
+ return Fast_LogExpPlusOne(x-y) + y;
+}
+
+inline float Fast_LogAdd(float x, float y)
+{
+ if (x < y) std::swap (x, y);
+ if (y <= float(NEG_INF/2) || x-y >= float(11.8624794162)) return x;
+ return Fast_LogExpPlusOne(x-y) + y;
+}
+
+inline void Fast_LogPlusEquals (double &x, double y)
+{
+ if (x < y) std::swap (x, y);
+ if (y > double(NEG_INF/2) && x-y < double(30))
+ x = Fast_LogExpPlusOne(x-y) + y;
+}
+
+inline void Fast_LogPlusEquals (float &x, float y)
+{
+ if (x < y) std::swap (x, y);
+ if (y > float(NEG_INF/2) && x-y < float(11.8624794162))
+ x = Fast_LogExpPlusOne(x-y) + y;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Fast_LogSubtract()
+// Fast_LogMinusEquals()
+//
+// Compute log(exp(x)-exp(y)).
+//////////////////////////////////////////////////////////////////////
+
+inline double Fast_LogSubtract (double x, double y)
+{
+ Assert(x > y, "Cannot represent negative numbers in log space.");
+ if (y <= double(NEG_INF/2) || x-y >= double(30)) return x;
+ return Fast_LogExpMinusOne(x-y) + y;
+}
+
+inline float Fast_LogSubtract (float x, float y)
+{
+ Assert(x > y, "Cannot represent negative numbers in log space.");
+ if (y <= float(NEG_INF/2) || x-y >= float(11.6105428289)) return x;
+ return Fast_LogExpMinusOne(x-y) + y;
+}
+
+inline void Fast_LogMinusEquals (double &x, double y)
+{
+ Assert(x > y, "Cannot represent negative numbers in log space.");
+ if (y > double(NEG_INF/2) && x-y < double(30))
+ x = Fast_LogExpMinusOne(x-y) + y;
+}
+
+inline void Fast_LogMinusEquals (float &x, float y)
+{
+ Assert(x > y, "Cannot represent negative numbers in log space.");
+ if (y > float(NEG_INF/2) && x-y < float(11.6105428289))
+ x = Fast_LogExpMinusOne(x-y) + y;
+}
+
+#endif
diff --git a/src/MakeCoords.cpp b/src/MakeCoords.cpp
new file mode 100644
index 0000000..01b59e8
--- /dev/null
+++ b/src/MakeCoords.cpp
@@ -0,0 +1,413 @@
+/////////////////////////////////////////////////////////////////
+// MakeCoords.cpp
+/////////////////////////////////////////////////////////////////
+
+#include "MakeCoords.hpp"
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::GetParams()
+//
+// Retrieve unrolled parameter vector.
+/////////////////////////////////////////////////////////////////
+
+std::vector<double> MakeCoords::GetParams(const std::vector<Point> &points) const
+{
+ std::vector<double> res;
+ for (size_t i = 1; i < points.size(); i++)
+ {
+ res.push_back(points[i].x);
+ res.push_back(points[i].y);
+ }
+ return res;
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::SetParams()
+//
+// Load unrolled parameter vector.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::SetParams(const std::vector<double> &values)
+{
+ coords = std::vector<Point>(sstruct.GetLength()+1);
+ gradients = std::vector<Point>(sstruct.GetLength()+1);
+
+ int k = 0;
+ for (size_t i = 1; i < coords.size(); i++)
+ {
+ coords[i].x = values[k++];
+ coords[i].y = values[k++];
+ }
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::PrintParams()
+//
+// Print parameters.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::PrintParams(const std::string &filename) const
+{
+ std::ofstream outfile(filename.c_str());
+ if (!outfile) Error("Could not open coordinates file for writing.");
+ for (int i = 1; i <= sstruct.GetLength(); i++)
+ {
+ outfile << coords[i].x << " " << coords[i].y << std::endl;
+ }
+ outfile.close();
+}
+
+/////////////////////////////////////////////////////////////////
+// ComputeLoop()
+//
+// Compute the residues involved in a loop.
+/////////////////////////////////////////////////////////////////
+
+std::vector<int> MakeCoords::ComputeLoop(const std::vector<int> &mapping, int left) const
+{
+ std::vector<int> ret(1, left);
+ int level = 0;
+
+ // look for other letters in the loop
+
+ for (int k = left+1; k < mapping[left]; k++)
+ {
+ if (mapping[k] == 0)
+ {
+ if (level == 0) ret.push_back(k);
+ }
+ else
+ {
+ if (mapping[k] < k) level--;
+ if (level == 0) ret.push_back(k);
+ if (mapping[k] > k)
+ {
+ if (level == 0)
+ for (int i = 0; i < STEM_WIDTH-1; i++)
+ ret.push_back(0);
+ level++;
+ }
+ }
+ }
+
+ ret.push_back(mapping[left]);
+ for (int i = 0; i < STEM_WIDTH-1; i++) ret.push_back(0);
+
+ return ret;
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::ComputeLoopCenter()
+//
+// Given two points (x1,y1) and (x2,y2) that span a central
+// angle of 2*PI*k/n (going counterclockwise), compute the
+// center (xc,yc).
+/////////////////////////////////////////////////////////////////
+
+Point MakeCoords::ComputeLoopCenter(Point p1, Point p2, int k, int n) const
+{
+ double theta = PI*(0.5 - double(k)/n);
+ return Point(p1.x + 0.5 * ((p2.x-p1.x) - std::tan(theta) * (p2.y-p1.y)),
+ p1.y + 0.5 * (std::tan(theta) * (p2.x-p1.x) + (p2.y-p1.y)));
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::ComputeLoopPositions()
+//
+// Given the initial and center point of a loop, and the number
+// of residues in the loop, compute the positions of each residue
+// in the loop (clockwise).
+/////////////////////////////////////////////////////////////////
+
+std::vector<Point> MakeCoords::ComputeLoopPositions(Point p1, Point center, int n) const
+{
+ std::vector<Point> ret;
+ double alpha = 2*PI/n;
+ double dx = p1.x - center.x;
+ double dy = p1.y - center.y;
+ for (int i = 0; i < n; i++)
+ ret.push_back(Point(center.x + std::cos(-alpha*i)*dx - std::sin(-alpha*i)*dy,
+ center.y + std::sin(-alpha*i)*dx + std::cos(-alpha*i)*dy));
+ return ret;
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::InitialPlacement()
+//
+// Initial placement of all coordinates using deterministic
+// procedure.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::InitialPlacement()
+{
+ coords = std::vector<Point>(sstruct.GetLength()+1);
+ gradients = std::vector<Point>(sstruct.GetLength()+1);
+
+ std::vector<int> mapping = sstruct.GetMapping();
+ const int L = int(mapping.size()) - 1;
+
+ // set any unknown pairings to be unpaired
+
+ for (int i = 1; i <= L; i++) mapping[i] = std::max(0, mapping[i]);
+
+ // temporarily force a pairing of the first and last residues
+
+ if (mapping[1] != 0) mapping[mapping[1]] = 0;
+ if (mapping[L] != 0) mapping[mapping[L]] = 0;
+ mapping[1] = L;
+ mapping[L] = 1;
+
+ // set coordinates of first and last residues
+
+ coords[1].x = 0;
+ coords[1].y = 0;
+ coords[L].x = STEM_WIDTH;
+ coords[L].y = 0;
+
+ // iteratively find internal loops and grow a structure from them
+
+ for (int i = 1; i <= L; i++)
+ {
+ if (mapping[i] > i)
+ {
+ std::vector<int> loop = ComputeLoop(mapping, i);
+ int j = mapping[i];
+ Point center = ComputeLoopCenter(coords[i], coords[j], STEM_WIDTH, loop.size());
+ std::vector<Point> positions = ComputeLoopPositions(coords[i], center, loop.size());
+ for (int k = 0; k < int(loop.size()); k++)
+ {
+ if (loop[k] > 0) coords[loop[k]] = positions[k];
+ }
+ }
+ }
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::Distance()
+//
+// Compute squared distance between two points.
+/////////////////////////////////////////////////////////////////
+
+double MakeCoords::Distance(Point p, Point q) const
+{
+ p.x -= q.x;
+ p.y -= q.y;
+ return std::sqrt(p.x*p.x + p.y*p.y);
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::AddConstraints()
+//
+// Add all constraints associated with loops in RNA structure.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::AddConstraints()
+{
+ std::vector<int> mapping = sstruct.GetMapping();
+ const int L = int(mapping.size()) - 1;
+
+ // set any unknown pairings to be unpaired
+
+ for (int i = 1; i <= L; i++) mapping[i] = std::max(0, mapping[i]);
+
+ // add constraints for loops
+
+ for (int i = 1; i <= L; i++)
+ {
+ if (mapping[i] > i)
+ {
+ std::vector<int> loop = ComputeLoop (mapping, i);
+ for (size_t k1 = 0; k1 < loop.size(); k1++)
+ {
+ if (loop[k1] <= 0) continue;
+ for (size_t k2 = k1+1; k2 < loop.size(); k2++)
+ {
+ if (loop[k2] <= 0) continue;
+ double dist = Distance(coords[loop[k1]], coords[loop[k2]]);
+ constraints.push_back(Constraint(ConstraintType_LENGTH, loop[k1], loop[k2], dist, LOOP_STRENGTH / std::max(1, int(loop.size()) - 7)));
+ }
+ }
+ }
+ }
+
+ // add backbone constraints
+
+ for (int i = 1; i < L; i++)
+ {
+ double dist = Distance(coords[i], coords[i+1]);
+ constraints.push_back(Constraint(ConstraintType_LENGTH, i, i+1, dist, BACKBONE_STRENGTH));
+ }
+
+ // add repulsive constraints
+
+ for (int i = 1; i <= L; i++)
+ for (int j = i+1; j <= L; j++)
+ constraints.push_back(Constraint(ConstraintType_REPULSIVE, i, j, 0, REPULSIVE_STRENGTH));
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::MakeCoords()
+//
+// Constructor.
+/////////////////////////////////////////////////////////////////
+
+MakeCoords::MakeCoords(const SStruct &sstruct) :
+ LBFGS<double>(), sstruct(sstruct)
+{
+ InitialPlacement();
+ AddConstraints();
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::ComputeGradient()
+//
+// Compute objective gradient.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::ComputeGradient(std::vector<double> &gradient, const std::vector<double> &values)
+{
+ SetParams(values);
+
+ for (size_t i = 0; i < constraints.size(); i++)
+ {
+
+ // precompute points involved in constraint
+
+ const Point &p1 = coords[constraints[i].i];
+ const Point &p2 = coords[constraints[i].j];
+ const double r = Distance(p1, p2);
+
+ // check type of constraint
+
+ switch (constraints[i].type)
+ {
+
+ case ConstraintType_LENGTH:
+ {
+ double a = constraints[i].strength * constraints[i].dist;
+ double b = constraints[i].strength / (2 * constraints[i].dist * constraints[i].dist);
+ double dVdr = -a/(r*r) + 2*b*r;
+
+ gradients[constraints[i].i].x += dVdr * (p1.x - p2.x) / r;
+ gradients[constraints[i].i].y += dVdr * (p1.y - p2.y) / r;
+ gradients[constraints[i].j].x += dVdr * (p2.x - p1.x) / r;
+ gradients[constraints[i].j].y += dVdr * (p2.y - p1.y) / r;
+ }
+ break;
+
+ case ConstraintType_REPULSIVE:
+ {
+ double a = constraints[i].strength;
+ double dVdr = -a/(r*r);
+
+ gradients[constraints[i].i].x += dVdr * (p1.x - p2.x) / r;
+ gradients[constraints[i].i].y += dVdr * (p1.y - p2.y) / r;
+ gradients[constraints[i].j].x += dVdr * (p2.x - p1.x) / r;
+ gradients[constraints[i].j].y += dVdr * (p2.y - p1.y) / r;
+ }
+ break;
+
+ default:
+ Error("Invalid constraint type.");
+ }
+ }
+
+ gradient = GetParams(gradients);
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::ComputeFunction()
+//
+// Compute objective function.
+/////////////////////////////////////////////////////////////////
+
+double MakeCoords::ComputeFunction(const std::vector<double> &values)
+{
+ SetParams(values);
+
+ double ret = 0;
+ for (size_t i = 0; i < constraints.size(); i++)
+ {
+ // precompute points involved in constraint
+
+ const Point &p1 = coords[constraints[i].i];
+ const Point &p2 = coords[constraints[i].j];
+ const double r = Distance(p1, p2);
+ const double d = constraints[i].dist;
+
+ // check type of constraint
+
+ switch (constraints[i].type)
+ {
+
+ case ConstraintType_LENGTH:
+ {
+ double a = constraints[i].strength * constraints[i].dist;
+ double b = constraints[i].strength / (2 * constraints[i].dist * constraints[i].dist);
+ double V = a/r + b*r*r - (a/d + b*d*d);
+
+ ret += V;
+ }
+ break;
+
+ case ConstraintType_REPULSIVE:
+ {
+ double a = constraints[i].strength;
+ double V = a/r;
+
+ ret += V;
+ }
+ break;
+
+ default:
+ Error("Invalid constraint type.");
+ }
+ }
+
+ return ret;
+}
+
+/////////////////////////////////////////////////////////////////
+// MakeCoords::Report()
+//
+// Print feedback from minimizer.
+/////////////////////////////////////////////////////////////////
+
+void MakeCoords::Report(int iteration, const std::vector<double> &theta, double objective, double step_length)
+{
+ std::cerr << "Minimizer: iteration " << iteration
+ << ", f = " << objective
+ << ", step length = " << step_length << std::endl;
+}
+
+void MakeCoords::Report(const std::string &s)
+{
+ std::cerr << "Minimizer: " << s << std::endl;
+}
+
+/////////////////////////////////////////////////////////////////
+// main()
+//
+// Main program.
+/////////////////////////////////////////////////////////////////
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ std::cerr << std::endl
+ << "Usage: " << argv[0] << " BPSEQFILE OUTFILE" << std::endl
+ << std::endl
+ << " where BPSEQFILE is the name of the input BPSEQ file" << std::endl
+ << " OUTFILE is the name of the output coordinates file" << std::endl
+ << std::endl;
+ exit (1);
+ }
+
+ SStruct sstruct(argv[1]);
+ MakeCoords folder(sstruct);
+ std::vector<double> theta = folder.GetParams(folder.coords);
+ folder.Minimize(theta);
+ folder.SetParams(theta);
+ folder.PrintParams(argv[2]);
+}
diff --git a/src/MakeCoords.hpp b/src/MakeCoords.hpp
new file mode 100644
index 0000000..bab662c
--- /dev/null
+++ b/src/MakeCoords.hpp
@@ -0,0 +1,87 @@
+/////////////////////////////////////////////////////////////////
+// MakeCoords.hpp
+/////////////////////////////////////////////////////////////////
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <cstring>
+#include <cstdio>
+#include "LBFGS.hpp"
+#include "Utilities.hpp"
+#include "SStruct.hpp"
+
+/////////////////////////////////////////////////////////////////
+// Constants
+/////////////////////////////////////////////////////////////////
+
+const double LOOP_STRENGTH = 30;
+const double REPULSIVE_STRENGTH = 0.2;
+const double BACKBONE_STRENGTH = 50;
+const double PI = 3.141592653589793238462643383279502884197169399;
+const int STEM_WIDTH = 3;
+
+/////////////////////////////////////////////////////////////////
+// Types
+/////////////////////////////////////////////////////////////////
+
+struct Point
+{
+ double x, y;
+ Point() : x(0), y(0) {}
+ Point(double x, double y) : x(x), y(y) {}
+};
+
+enum ConstraintType
+{
+ ConstraintType_LENGTH,
+ ConstraintType_REPULSIVE
+};
+
+struct Constraint
+{
+ ConstraintType type;
+ int i;
+ int j;
+ double dist;
+ double strength;
+
+ Constraint(ConstraintType type, int i, int j, double dist, double strength) :
+ type(type), i(i), j(j), dist(dist), strength(strength) {}
+};
+
+/////////////////////////////////////////////////////////////////
+// class MakeCoords
+/////////////////////////////////////////////////////////////////
+
+class MakeCoords : public LBFGS<double>
+{
+ const SStruct &sstruct;
+ std::vector<Constraint> constraints;
+
+public:
+ std::vector<Point> coords;
+ std::vector<Point> gradients;
+
+ std::vector<double> GetParams(const std::vector<Point> &points) const;
+ void SetParams(const std::vector<double> &values);
+ void PrintParams(const std::string &filename) const;
+
+ std::vector<int> ComputeLoop(const std::vector<int> &mapping, int left) const;
+ Point ComputeLoopCenter(Point p1, Point p2, int k, int n) const;
+ std::vector<Point> ComputeLoopPositions(Point p1, Point center, int n) const;
+ void InitialPlacement();
+
+ double Distance(Point p, Point q) const;
+ void AddConstraints();
+
+
+ MakeCoords(const SStruct &sstruct);
+ void ComputeGradient(std::vector<double> &gradient, const std::vector<double> &values);
+ double ComputeFunction(const std::vector<double> &values);
+ void Report(int iteration, const std::vector<double> &theta, double objective, double step_length);
+ void Report(const std::string &s);
+};
+
diff --git a/src/MakeDefaults.pl b/src/MakeDefaults.pl
new file mode 100755
index 0000000..6ab7698
--- /dev/null
+++ b/src/MakeDefaults.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+
+use strict;
+
+sub WriteDefaultValues
+{
+ my ($filename, $function_name) = @_;
+ open (INFILE, "<$filename");
+
+ my @names = ();
+ my @values = ();
+
+ while (my $line = <INFILE>)
+ {
+ $line =~ /(\S+)\s+(\S+)/;
+ push (@names, $1);
+ push (@values, $2);
+ }
+ my $length = @names;
+
+ close INFILE;
+
+ print OUTFILE "/////////////////////////////////////////////////////////////////\n";
+ print OUTFILE "// $function_name()\n";
+ print OUTFILE "//\n";
+ print OUTFILE "// Retrieve default parameter values.\n";
+ print OUTFILE "/////////////////////////////////////////////////////////////////\n";
+ print OUTFILE "\n";
+ print OUTFILE "template<class RealT>\n";
+ print OUTFILE "std::vector<RealT> $function_name()\n";
+ print OUTFILE "{\n";
+ print OUTFILE " RealT values[] =\n";
+ print OUTFILE " {\n";
+ for (my $i = 0; $i < @values; $i++)
+ {
+ my $value = sprintf("%-20s", sprintf("%.10lf%s", $values[$i], ($i == $#values ? " " : ",")));
+ print OUTFILE " $value // $names[$i]\n";
+ }
+ print OUTFILE " };\n\n";
+ print OUTFILE " return std::vector<RealT>(values, values + ".scalar(@values).");\n";
+ print OUTFILE "}\n\n";
+}
+
+sub main
+{
+ if (@ARGV != 3)
+ {
+ print STDERR "Usage: perl MakeDefaults.pl PARAMFILE_COMPLEMENTARY PARAMFILE_NONCOMPLEMENTARY PARAMFILE_PROFILE\n";
+ exit(1);
+ }
+
+ open(OUTFILE, ">Defaults.ipp");
+ WriteDefaultValues($ARGV[0], "GetDefaultComplementaryValues");
+ print OUTFILE "\n";
+ WriteDefaultValues($ARGV[1], "GetDefaultNoncomplementaryValues");
+ print OUTFILE "\n";
+ WriteDefaultValues($ARGV[2], "GetDefaultProfileValues");
+ close OUTFILE;
+}
+
+main();
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..091270e
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,106 @@
+CXX = g++
+
+CXXFLAGS = -O3 -DNDEBUG -W -pipe -Wundef -Winline --param large-function-growth=100000 -Wall
+LINKFLAGS = -lm
+GDLINKFLAGS = -lgd -lpng
+
+CONTRAFOLD_SRCS = \
+ Contrafold.cpp \
+ FileDescription.cpp \
+ Options.cpp \
+ SStruct.cpp \
+ Utilities.cpp
+
+MAKECOORDS_SRCS = \
+ MakeCoords.cpp \
+ SStruct.cpp \
+ Utilities.cpp
+
+PLOTRNA_SRCS = \
+ EncapsulatedPostScript.cpp \
+ PlotRNA.cpp \
+ SStruct.cpp \
+ Utilities.cpp
+
+SCOREPREDICTION_SRCS = \
+ ScorePrediction.cpp \
+ SStruct.cpp \
+ Utilities.cpp
+
+CONTRAFOLD_OBJS = $(CONTRAFOLD_SRCS:%.cpp=%.o)
+MAKECOORDS_OBJS = $(MAKECOORDS_SRCS:%.cpp=%.o)
+PLOTRNA_OBJS = $(PLOTRNA_SRCS:%.cpp=%.o)
+SCOREPREDICTION_OBJS = $(SCOREPREDICTION_SRCS:%.cpp=%.o)
+
+.PHONY: all viz clean
+
+all: contrafold score_prediction
+viz: make_coords plot_rna
+
+contrafold: $(CONTRAFOLD_OBJS)
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) $(CONTRAFOLD_OBJS) $(LINKFLAGS) -o contrafold
+
+Defaults.ipp: MakeDefaults.pl *.params.*
+ perl MakeDefaults.pl contrafold.params.complementary contrafold.params.noncomplementary contrafold.params.profile
+
+Contrafold.o: Contrafold.cpp Defaults.ipp
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) -c Contrafold.cpp
+
+make_coords: $(MAKECOORDS_OBJS)
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) $(MAKECOORDS_OBJS) $(LINKFLAGS) -o make_coords
+
+plot_rna: $(PLOTRNA_OBJS)
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) $(PLOTRNA_OBJS) $(LINKFLAGS) $(GDLINKFLAGS) -o plot_rna
+
+score_prediction: $(SCOREPREDICTION_OBJS)
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) $(SCOREPREDICTION_OBJS) $(LINKFLAGS) -o score_prediction
+
+##########################################
+
+gccathlon64:
+ make all OTHERFLAGS="-march=athlon64 -fomit-frame-pointer -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param inline-unit-growth=100000 --param large-function-growth=100000"
+
+gccathlon64multi:
+ make all CXX="mpiCC" OTHERFLAGS="-DMULTI -march=athlon64 -fomit-frame-pointer -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param inline-unit-growth=100000 --param large-function-growth=100000"
+
+gccathlon64multiprofile:
+ make all CXX="mpiCC" OTHERFLAGS="-DMULTI -march=athlon64 -fomit-frame-pointer -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param inline-unit-growth=100000 --param large-function-growth=100000 -pg -g"
+
+intel:
+ make all CXX="icpc" OTHERFLAGS="-xN -no-ipo -static"
+
+intelmulti:
+ make all LAMHCP="icpc" CXX="mpiCC" OTHERFLAGS="-DMULTI -xN -no-ipo"
+
+multi:
+ make all CXX="mpiCC" OTHERFLAGS="-DMULTI"
+
+gccp4:
+ make all OTHERFLAGS="-march=pentium4 -fomit-frame-pointer -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param large-function-growth=100000 --param inline-unit-growth=100000"
+
+gccp4multi:
+ make all CXX="mpiCC" OTHERFLAGS="-DMULTI -march=pentium4 -fomit-frame-pointer -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param large-function-growth=100000 --param inline-unit-growth=100000"
+
+gcc64:
+ make all OTHERFLAGS="-m64 -funroll-all-loops -fomit-frame-pointer -ffast-math"
+
+profile:
+ make all OTHERFLAGS="-pg -g"
+
+gccp4profile:
+ make all OTHERFLAGS="-march=pentium4 -ffast-math -funroll-all-loops -funsafe-math-optimizations -fpeel-loops --param max-inline-insns-single=100000 --param large-function-growth=100000 --param inline-unit-growth=100000 -pg -g"
+
+debug:
+ make all CXXFLAGS="-g -fno-inline -W -Wall"
+
+debugmulti:
+ make all CXX="mpiCC" OTHERFLAGS="-DMULTI" CXXFLAGS="-g -fno-inline -W -Wall"
+
+assembly:
+ make all OTHERFLAGS="-Wa,-a,-ad"
+
+%.o: %.cpp *.hpp *.ipp
+ $(CXX) $(CXXFLAGS) $(OTHERFLAGS) -c $<
+
+clean:
+ rm -f contrafold make_coords plot_rna score_prediction *.o Defaults.ipp
diff --git a/src/OptimizationWrapper.hpp b/src/OptimizationWrapper.hpp
new file mode 100644
index 0000000..af33097
--- /dev/null
+++ b/src/OptimizationWrapper.hpp
@@ -0,0 +1,56 @@
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef OPTIMIZATIONWRAPPER_HPP
+#define OPTIMIZATIONWRAPPER_HPP
+
+#include "Config.hpp"
+#include "Utilities.hpp"
+#include "ComputationWrapper.hpp"
+#include "CGOptimizationWrapper.hpp"
+#include "InnerOptimizationWrapper.hpp"
+#include "InnerOptimizationWrapperLBFGS.hpp"
+#include "InnerOptimizationWrapperSubgradientMethod.hpp"
+#if BMRM_AVAILABLE
+#include "InnerOptimizationWrapperBundleMethod.hpp"
+#endif
+#include "OuterOptimizationWrapper.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class OptimizationWrapper
+//
+// Wrapper class for performing optimization.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class OptimizationWrapper
+{
+ ComputationWrapper<RealT> &computation_wrapper;
+ std::ofstream logfile;
+ int indent;
+
+public:
+
+ OptimizationWrapper(ComputationWrapper<RealT> &computation_wrapper);
+ ~OptimizationWrapper();
+
+ RealT Train(const std::vector<int> &units, std::vector<RealT> &w, const std::vector<RealT> &C);
+ void LearnHyperparameters(std::vector<int> units, std::vector<RealT> &values);
+
+ void Indent();
+ void Unindent();
+ void PrintMessage(const std::string &s);
+
+ // getters
+ const Options &GetOptions() const { return computation_wrapper.GetOptions(); }
+ const std::vector<FileDescription> &GetDescriptions() const { return computation_wrapper.GetDescriptions(); }
+ InferenceEngine<RealT> &GetInferenceEngine() { return computation_wrapper.GetInferenceEngine(); }
+ ParameterManager<RealT> &GetParameterManager() { return computation_wrapper.GetParameterManager(); }
+ ComputationEngine<RealT> &GetComputationEngine() { return computation_wrapper.GetComputationEngine(); }
+ ComputationWrapper<RealT> &GetComputationWrapper() { return computation_wrapper; }
+};
+
+#include "OptimizationWrapper.ipp"
+
+#endif
diff --git a/src/OptimizationWrapper.ipp b/src/OptimizationWrapper.ipp
new file mode 100644
index 0000000..60b23a6
--- /dev/null
+++ b/src/OptimizationWrapper.ipp
@@ -0,0 +1,362 @@
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper.ipp
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::OptimizationWrapper()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+OptimizationWrapper<RealT>::OptimizationWrapper(ComputationWrapper<RealT> &computation_wrapper) :
+ computation_wrapper(computation_wrapper),
+ indent(0)
+{
+ logfile.open("optimize.log");
+ if (logfile.fail()) Error("Could not open log file for writing.");
+}
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::~OptimizationWrapper()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+OptimizationWrapper<RealT>::~OptimizationWrapper()
+{
+ logfile.close();
+}
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::Indent()
+// OptimizationWrapper<RealT>::Unindent()
+// OptimizationWrapper<RealT>::PrintMessage()
+//
+// Print indented message.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void OptimizationWrapper<RealT>::Indent() { indent++; }
+
+template<class RealT>
+void OptimizationWrapper<RealT>::Unindent() { indent--; Assert(indent >= 0, "Cannot unindent!"); }
+
+template<class RealT>
+void OptimizationWrapper<RealT>::PrintMessage(const std::string &s)
+{
+ for (int i = 0; i < indent; i++) std::cerr << " ";
+ for (int i = 0; i < indent; i++) logfile << " ";
+ std::cerr << s << std::endl;
+ logfile << s << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::Train()
+//
+// Run optimization algorithm with fixed regularization
+// constants.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT OptimizationWrapper<RealT>::Train(const std::vector<int> &units,
+ std::vector<RealT> &w,
+ const std::vector<RealT> &C)
+{
+ static std::vector<int> cached_units;
+ static std::vector<RealT> cached_initial_w;
+ static std::vector<RealT> cached_C;
+ static std::vector<RealT> cached_learned_w;
+ static RealT cached_f;
+
+ if (cached_units != units ||
+ cached_initial_w != w ||
+ cached_C != C)
+ {
+ cached_units = units;
+ cached_initial_w = w;
+ cached_C = C;
+ cached_learned_w = w;
+
+ WriteProgressMessage("Starting training...");
+
+#if STOCHASTIC_GRADIENT
+ Error("Not yet implemented.");
+#else
+
+ const std::vector<RealT> Ce = GetParameterManager().ExpandParameterGroupValues(C);
+ const RealT log_base = RealT(GetOptions().GetRealValue("log_base"));
+
+ if (GetOptions().GetBoolValue("viterbi_parsing"))
+ {
+ std::vector<RealT> bias(w.size());
+
+
+
+#if SMOOTH_MAX_MARGIN
+ InnerOptimizationWrapperLBFGS<RealT> inner_optimization_wrapper(this, units, Ce);
+#else
+#if BMRM_AVAILABLE
+ InnerOptimizationWrapperBundleMethod<RealT> inner_optimization_wrapper(this, units, Ce);
+#else
+ InnerOptimizationWrapperSubgradientMethod<RealT> inner_optimization_wrapper(this, units, Ce);
+ PrintMessage("BMRM not available, so defaulting to subgradient algorithm.");
+#endif
+#endif
+
+ for (int i = 0; i < NUM_CCCP_STEPS; i++)
+ {
+ PrintMessage(SPrintF("Starting inner loop (pass %d)...", i));
+ if (i > 0) bias = -RealT(NONCONVEX_MULTIPLIER) * computation_wrapper.ComputeGradient(units, cached_learned_w, true, false, log_base);
+ std::cerr << bias << std::endl;
+ inner_optimization_wrapper.LoadBias(bias);
+ cached_f = inner_optimization_wrapper.Minimize(cached_learned_w);
+ GetParameterManager().WriteToFile(SPrintF("optimize.params.stage%d", i+1), cached_learned_w);
+
+ RealT loss = computation_wrapper.ComputeLoss(units, cached_learned_w, log_base);
+ PrintMessage(SPrintF("Current loss: %lf", loss));
+ if (RealT(NONCONVEX_MULTIPLIER) == RealT(0)) break;
+ }
+
+ }
+ else
+ {
+ InnerOptimizationWrapperLBFGS<RealT> inner_optimization_wrapper(this, units, Ce);
+
+ cached_f = inner_optimization_wrapper.Minimize(cached_learned_w);
+ }
+
+#endif
+ }
+ else
+ {
+ PrintMessage ("Using cached result from Train()...");
+ }
+
+ w = cached_learned_w;
+ return cached_f;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::LearnHyperparameters()
+//
+// Use holdout cross validation in order to estimate
+// regularization constants.
+//////////////////////////////////////////////////////////////////////
+
+#if HYPERPARAMETER_GRID_SEARCH
+template<class RealT>
+void OptimizationWrapper<RealT>::LearnHyperparameters(std::vector<int> units,
+ std::vector<RealT> &w)
+{
+ // split data into training and holdout sets
+ //std::random_shuffle(units.begin(), units.end());
+
+ const RealT holdout_ratio = GetOptions().GetRealValue("holdout_ratio");
+ const std::vector<int> holdout(units.begin(), units.begin() + int(units.size() * holdout_ratio));
+ const std::vector<int> training(units.begin() + int(units.size() * holdout_ratio), units.end());
+
+ if (training.size() == 0 || holdout.size() == 0)
+ Error("Not enough training samples for cross-validation.");
+
+ // do hyperparameter optimization
+ PrintMessage("Starting hyperparameter optimization...");
+ Indent();
+
+ PrintMessage("List of hyperparameters:");
+ Indent();
+ const std::vector<ParameterGroup> &groups = GetParameterManager().GetParameterGroups();
+ for (size_t i = 0; i < groups.size(); i++)
+ PrintMessage(SPrintF("Parameter group %d: %s", i+1, groups[i].name.c_str()));
+ Unindent();
+
+ RealT best_C = 0, best_holdout_loss = 1e20;
+ std::vector<RealT> C = std::vector<RealT>(GetParameterManager().GetNumParameterGroups());
+
+ // perform cross-validation
+ for (int k = -5; k <= 10; k++)
+ {
+ // perform training
+ std::fill(C.begin(), C.end(), Pow(2.0, RealT(k)));
+ PrintMessage(SPrintF("Performing optimization using C = %lf", C[0]));
+ Indent();
+ std::vector<RealT> x(w);
+ const RealT f = Train(training, x, C);
+ Unindent();
+
+ // compute holdout loss
+#if CROSS_VALIDATE_USING_LOGLOSS
+ if (GetOptions().GetBoolValue("viterbi_parsing")) Error("Cannot use logloss for cross validation if Viterbi parsing.");
+ RealT loss = computation_wrapper.ComputeFunction(holdout, x, false, false);
+#else
+ RealT loss = computation_wrapper.ComputeLoss(holdout, x, true);
+#endif
+
+ PrintMessage(SPrintF("Using C = %lf, regularized training loss = %lf, holdout loss = %lf", double(C[0]), double(f), double(loss)));
+
+ if (loss < best_holdout_loss)
+ {
+ best_holdout_loss = loss;
+ best_C = C[0];
+ }
+ }
+
+ Unindent();
+ PrintMessage(SPrintF("Chose C = %lf, holdout loss = %lf", best_C, best_holdout_loss));
+ std::fill(C.begin(), C.end(), best_C / (1.0 - holdout_ratio));
+
+ // now, retrain on all data
+ PrintMessage("Retraining on entire training set...");
+ Indent();
+ Train(units, w, C);
+ Unindent();
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::LearnHyperparameters()
+//
+// Use gradient-based holdout cross-validation in order estimate
+// regularization constants.
+//////////////////////////////////////////////////////////////////////
+
+#if HYPERPARAMETER_GRADIENT_OPTIMIZATION
+template<class RealT>
+void OptimizationWrapper<RealT>::LearnHyperparameters(std::vector<int> units,
+ std::vector<RealT> &w)
+{
+ // split data into training and holdout sets
+ //std::random_shuffle(units.begin(), units.end());
+
+ const RealT holdout_ratio = GetOptions().GetRealValue("holdout_ratio");
+ const std::vector<int> holdout(units.begin(), units.begin() + int(units.size() * holdout_ratio));
+ const std::vector<int> training(units.begin() + int(units.size() * holdout_ratio), units.end());
+
+ if (training.size() == 0 || holdout.size() == 0)
+ Error("Not enough training samples for cross-validation.");
+
+ // do hyperparameter optimization
+ PrintMessage("Starting hyperparameter optimization...");
+ Indent();
+
+ PrintMessage("List of hyperparameters:");
+ Indent();
+ const std::vector<ParameterGroup> &groups = GetParameterManager().GetParameterGroups();
+ for (size_t i = 0; i < groups.size(); i++)
+ PrintMessage(SPrintF("Parameter group %d: %s", i+1, groups[i].name.c_str()));
+ Unindent();
+
+ std::vector<RealT> log_C = std::vector<RealT>(GetParameterManager().GetNumParameterGroups(), RealT(INITIAL_LOG_C));
+
+ if (GetOptions().GetBoolValue("viterbi_parsing"))
+ {
+ Error("Not yet implemented.");
+ }
+ else
+ {
+ OuterOptimizationWrapper<RealT> outer_optimization_wrapper(this, w, training, holdout);
+ outer_optimization_wrapper.Minimize(log_C);
+ }
+
+ Unindent();
+ std::ostringstream oss;
+ const std::vector<RealT> C = Exp(log_C);
+ oss << "Chose hyperparameters, C = " << C;
+ PrintMessage(oss.str());
+
+ // Now, retrain on all data
+ PrintMessage("Retraining on entire training set...");
+ Indent();
+ Train(units, w, C);
+ Unindent();
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////
+// OptimizationWrapper<RealT>::LearnHyperparameters()
+//
+// Use Bayesian hyperparameter selection algorithm in order to
+// estimate regularization constants.
+//////////////////////////////////////////////////////////////////////
+
+#if HYPERPARAMETER_MAJORIZATION_MINIMIZATION
+template<class RealT>
+void OptimizationWrapper<RealT>::LearnHyperparameters(std::vector<int> units,
+ std::vector<RealT> &w,
+ RealT holdout_ratio,
+ bool toggle_viterbi)
+{
+ // do hyperparameter optimization
+
+ PrintMessage("Starting hyperparameter optimization...");
+ Indent();
+
+ PrintMessage("List of hyperparameters:");
+ Indent();
+ const std::vector<HyperparameterGroup> &groups = params.GetHyperparameterGroups();
+ for (size_t i = 0; i < groups.size(); i++)
+ PrintMessage(SPrintF("Hyperparameter group %d: %s", i+1, groups[i].name.c_str()));
+ Unindent();
+
+ std::vector<RealT> C = std::vector<RealT>(params.GetNumHyperparameterGroups(), 1);
+
+ // iterative relinearization
+
+ for (int iters = 0; iters < NUM_ITERATIVE_RELINEARIZATION_STEPS; iters++)
+ {
+ // show current set of hyperparameters
+
+ PrintMessage("Current hyperparameters:");
+ Indent();
+ const std::vector<HyperparameterGroup> &groups = params.GetHyperparameterGroups();
+ for (size_t i = 0; i < groups.size(); i++)
+ PrintMessage(SPrintF("Hyperparameter group %d (%s): %lf", i+1, groups[i].name.c_str(), C[i]));
+ Unindent();
+
+ // perform training
+
+ std::ostringstream oss;
+ const std::vector<RealT> Ce = params.ExpandHyperparameters(C);
+ oss << "Performing optimization using C = " << C;
+ PrintMessage(oss.str());
+ Indent();
+ std::vector<RealT> x(w);
+ const RealT f = Train(units, x, C, toggle_viterbi);
+ Unindent();
+
+ // compute new hyperparameters
+
+ for (size_t g = 0; g < groups.size(); g++)
+ {
+ RealT numerator = (groups[g].end - groups[g].begin + 1.0) / 2.0;
+ RealT denominator = RealT(MM_SMOOTHING);
+ for (int i = groups[g].begin; i < groups[g].end; i++)
+ denominator += 0.5 * x[i] * x[i];
+ C[g] = numerator / denominator;
+ }
+
+ // adjust for Viterbi mode
+
+ if (toggle_viterbi)
+ {
+ const RealT loss = f - 0.5 * DotProduct(Ce, x*x);
+ const RealT loss_multiplier = RealT(units.size()) / (RealT(MM_SMOOTHING) + loss);
+ C /= loss_multiplier;
+ }
+ }
+
+ Unindent();
+ std::ostringstream oss;
+ oss << "Chose hyperparameters, C = " << C;
+ PrintMessage(oss.str());
+
+ // now, retrain on all data
+
+ PrintMessage("Retraining on entire training set...");
+ Indent();
+ Train(units, w, C, toggle_viterbi);
+ Unindent();
+}
+#endif
diff --git a/src/Options.cpp b/src/Options.cpp
new file mode 100644
index 0000000..66a0295
--- /dev/null
+++ b/src/Options.cpp
@@ -0,0 +1,131 @@
+//////////////////////////////////////////////////////////////////////
+// Options.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "Options.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// Options::Options()
+// Options::Options()
+// Options::operator=()
+//
+// Constructor, copy constructor, and assignment operator
+//////////////////////////////////////////////////////////////////////
+
+Options::Options()
+{}
+
+Options::Options(const Options &rhs) :
+ mapping(rhs.mapping)
+{}
+
+Options &Options::operator=(const Options &rhs)
+{
+ if (this != &rhs)
+ {
+ mapping = rhs.mapping;
+ }
+ return *this;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// Options::GetBoolValue()
+//
+// Get boolean value.
+//////////////////////////////////////////////////////////////////////
+
+bool Options::GetBoolValue(const std::string &key) const
+{
+ std::map<std::string, std::string>::const_iterator iter = mapping.find(key);
+ if (iter == mapping.end()) Error("Requested key '%s' not found in configuration.", key.c_str());
+ if (iter->second != "true" && iter->second != "false") Error("Failed to parse boolean value in configuration.");
+ return (iter->second == "true");
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::SetBoolValue()
+//
+// Set boolean value.
+//////////////////////////////////////////////////////////////////////
+
+void Options::SetBoolValue(const std::string &key, bool value)
+{
+ mapping[key] = (value ? "true" : "false");
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::GetIntValue()
+//
+// Get integer value.
+//////////////////////////////////////////////////////////////////////
+
+int Options::GetIntValue(const std::string &key) const
+{
+ int value = 0;
+ std::map<std::string, std::string>::const_iterator iter = mapping.find(key);
+ if (iter == mapping.end()) Error("Requested key '%s' not found in configuration.", key.c_str());
+ if (!ConvertToNumber(iter->second, value)) Error("Failed to parse integer value in configuration.");
+ return value;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::SetIntValue()
+//
+// Set integer value.
+//////////////////////////////////////////////////////////////////////
+
+void Options::SetIntValue(const std::string &key, int value)
+{
+ mapping[key] = SPrintF("%d", value);
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::GetRealValue()
+//
+// Get real value.
+//////////////////////////////////////////////////////////////////////
+
+double Options::GetRealValue(const std::string &key) const
+{
+ double value = 0;
+ std::map<std::string, std::string>::const_iterator iter = mapping.find(key);
+ if (iter == mapping.end()) Error("Requested key '%s' not found in configuration.", key.c_str());
+ if (!ConvertToNumber(iter->second, value)) Error("Failed to parse real value in configuration.");
+ return value;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::SetRealValue()
+//
+// Set real value.
+//////////////////////////////////////////////////////////////////////
+
+void Options::SetRealValue(const std::string &key, double value)
+{
+ mapping[key] = SPrintF("%lf", value);
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::GetStringValue()
+//
+// Get string value.
+//////////////////////////////////////////////////////////////////////
+
+const std::string &Options::GetStringValue(const std::string &key) const
+{
+ std::map<std::string, std::string>::const_iterator iter = mapping.find(key);
+ if (iter == mapping.end()) Error("Requested key '%s' not found in configuration.", key.c_str());
+ return iter->second;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Options::SetStringValue()
+//
+// Set string value.
+//////////////////////////////////////////////////////////////////////
+
+void Options::SetStringValue(const std::string &key, const std::string &value)
+{
+ mapping[key] = value;
+}
diff --git a/src/Options.hpp b/src/Options.hpp
new file mode 100644
index 0000000..390d93e
--- /dev/null
+++ b/src/Options.hpp
@@ -0,0 +1,39 @@
+//////////////////////////////////////////////////////////////////////
+// Options.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef OPTIONS_HPP
+#define OPTIONS_HPP
+
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class Options
+//////////////////////////////////////////////////////////////////////
+
+class Options
+{
+ std::map<std::string, std::string> mapping;
+
+public:
+
+ // constructor, copy constructor, and assignment operator
+ Options();
+ Options(const Options &rhs);
+ Options &operator=(const Options &rhs);
+
+ // getters and setters
+ bool GetBoolValue(const std::string &key) const;
+ void SetBoolValue(const std::string &key, bool value);
+
+ int GetIntValue(const std::string &key) const;
+ void SetIntValue(const std::string &key, int value);
+
+ double GetRealValue(const std::string &key) const;
+ void SetRealValue(const std::string &key, double value);
+
+ const std::string &GetStringValue(const std::string &key) const;
+ void SetStringValue(const std::string &key, const std::string &value);
+};
+
+#endif
diff --git a/src/OuterOptimizationWrapper.hpp b/src/OuterOptimizationWrapper.hpp
new file mode 100644
index 0000000..612971c
--- /dev/null
+++ b/src/OuterOptimizationWrapper.hpp
@@ -0,0 +1,45 @@
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper.hpp
+//
+// Outer optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef OUTEROPTIMIZATIONWRAPPER_HPP
+#define OUTEROPTIMIZATIONWRAPPER_HPP
+
+#include "OptimizationWrapper.hpp"
+#include "CGOptimizationWrapper.hpp"
+#include "InnerOptimizationWrapper.hpp"
+#include "OuterOptimizationWrapper.hpp"
+#include "LBFGS.hpp"
+
+template<class RealT>
+class OptimizationWrapper;
+
+//////////////////////////////////////////////////////////////////////
+// class OuterOptimizationWrapper
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class OuterOptimizationWrapper : public LBFGS<RealT>
+{
+ OptimizationWrapper<RealT> *optimization_wrapper;
+ const std::vector<RealT> initial_w;
+ const std::vector<int> training;
+ const std::vector<int> holdout;
+
+public:
+ OuterOptimizationWrapper(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<RealT> &initial_w,
+ const std::vector<int> &training,
+ const std::vector<int> &holdout);
+
+ RealT ComputeFunction(const std::vector<RealT> &log_C);
+ void ComputeGradient(std::vector<RealT> &g, const std::vector<RealT> &log_C);
+ void Report(int iteration, const std::vector<RealT> &x, RealT f, RealT step_size);
+ void Report(const std::string &s);
+};
+
+#include "OuterOptimizationWrapper.ipp"
+
+#endif
diff --git a/src/OuterOptimizationWrapper.ipp b/src/OuterOptimizationWrapper.ipp
new file mode 100644
index 0000000..d8b7bd3
--- /dev/null
+++ b/src/OuterOptimizationWrapper.ipp
@@ -0,0 +1,137 @@
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper.cpp
+//
+// Implementation of functors needed for optimization.
+//////////////////////////////////////////////////////////////////////
+
+#include "OuterOptimizationWrapper.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper::OuterOptimizationWrapper()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+OuterOptimizationWrapper<RealT>::OuterOptimizationWrapper(OptimizationWrapper<RealT> *optimization_wrapper,
+ const std::vector<RealT> &initial_w,
+ const std::vector<int> &training,
+ const std::vector<int> &holdout):
+ LBFGS<RealT>(20,1e-5,100,1e-5,3,1),
+ optimization_wrapper(optimization_wrapper),
+ initial_w(initial_w),
+ training(training),
+ holdout(holdout)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper::ComputeFunction()
+//
+// Compute function for outer iteration.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT OuterOptimizationWrapper<RealT>::ComputeFunction(const std::vector<RealT> &log_C)
+{
+ std::ostringstream oss;
+ oss << "Computing outer function using C = " << Exp(log_C);
+ optimization_wrapper->PrintMessage(oss.str());
+ optimization_wrapper->Indent();
+
+ // w = solution of OPT1 for current C
+ std::vector<RealT> w = initial_w;
+ optimization_wrapper->PrintMessage("Solving OPT1...");
+ optimization_wrapper->Indent();
+ optimization_wrapper->Train(training, w, Exp(log_C));
+ optimization_wrapper->Unindent();
+
+ // compute holdout logloss
+ RealT ret = optimization_wrapper->GetComputationWrapper().ComputeFunction(holdout, w, false, true, optimization_wrapper->GetOptions().GetRealValue("log_base"));
+
+ optimization_wrapper->Unindent();
+ optimization_wrapper->PrintMessage(SPrintF("Finished outer function: %lf", ret));
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper::ComputeGradient()
+//
+// Compute the regularized logloss gradient using a particular
+// parameter set and fixed regularization hyperparameters.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void OuterOptimizationWrapper<RealT>::ComputeGradient(std::vector<RealT> &g, const std::vector<RealT> &log_C)
+{
+ const std::vector<RealT> C = Exp(log_C);
+
+ std::ostringstream oss;
+ oss << "Computing outer gradient using C = " << C;
+ optimization_wrapper->PrintMessage(oss.str());
+ optimization_wrapper->Indent();
+
+ // show current set of hyperparameters
+ optimization_wrapper->PrintMessage("Current hyperparameters:");
+ optimization_wrapper->Indent();
+ const std::vector<ParameterGroup> &groups = optimization_wrapper->GetParameterManager().GetParameterGroups();
+ for (size_t i = 0; i < groups.size(); i++)
+ optimization_wrapper->PrintMessage(SPrintF("Hyperparameter group %d (%s): %lf", i+1, groups[i].name.c_str(), C[i]));
+ optimization_wrapper->Unindent();
+
+ // w = solution of OPT1 for current C
+ std::vector<RealT> w = initial_w;
+ optimization_wrapper->PrintMessage("Solving OPT1...");
+ optimization_wrapper->Indent();
+ optimization_wrapper->Train(training, w, C);
+ optimization_wrapper->Unindent();
+
+ // compute holdout logloss
+ std::vector<RealT> holdout_gradient = optimization_wrapper->GetComputationWrapper().ComputeGradient(holdout, w, false, true, optimization_wrapper->GetOptions().GetRealValue("log_base"));
+
+ // solve linear system
+ CGOptimizationWrapper<RealT> cg_linear(optimization_wrapper, training, w, C);
+ std::vector<RealT> x(holdout_gradient.size());
+
+ optimization_wrapper->PrintMessage("Solving linear system...");
+ optimization_wrapper->Indent();
+ cg_linear.Minimize(holdout_gradient,x);
+ optimization_wrapper->Unindent();
+
+ // form "B" matrix
+ const std::vector<RealT> log_C_grad = Exp(log_C);
+ std::vector<std::vector<RealT> > B(x.size(), std::vector<RealT>(optimization_wrapper->GetParameterManager().GetNumParameterGroups()));
+ for (size_t i = 0; i < groups.size(); i++)
+ for (int j = groups[i].begin; j < groups[i].end; j++)
+ B[j][i] = w[j] * log_C_grad[i];
+
+ // compute gradient
+ g.clear();
+ g.resize(log_C.size());
+ for (size_t i = 0; i < B.size(); i++)
+ g -= x[i] * B[i];
+
+ optimization_wrapper->Unindent();
+ optimization_wrapper->PrintMessage(SPrintF("Finished outer gradient: norm = %lf", Norm(g)));
+}
+
+//////////////////////////////////////////////////////////////////////
+// OuterOptimizationWrapper::Report()
+//
+// Routines for printing results and messages from the optimization_wrapper.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void OuterOptimizationWrapper<RealT>::Report(int iteration, const std::vector<RealT> &log_C, RealT f, RealT step_size)
+{
+ std::ostringstream oss;
+ oss << "Outer iteration " << iteration << ": holdout f = " << f << ", C = " << Exp(log_C)
+ << ", step length = " << step_size << ", efficiency = " << optimization_wrapper->GetComputationEngine().GetEfficiency() << "%";
+ optimization_wrapper->PrintMessage(oss.str());
+}
+
+template<class RealT>
+void OuterOptimizationWrapper<RealT>::Report(const std::string &s)
+{
+ optimization_wrapper->PrintMessage(SPrintF("Outer message: %s", s.c_str()));
+}
+
diff --git a/src/ParameterManager.hpp b/src/ParameterManager.hpp
new file mode 100644
index 0000000..c293569
--- /dev/null
+++ b/src/ParameterManager.hpp
@@ -0,0 +1,120 @@
+//////////////////////////////////////////////////////////////////////
+// ParameterManager.hpp
+//
+// This class is used for parameter management for models in which
+//
+// (1) the number of logical (free) parameters in the model is not
+// the same as the number of physical parameters stored in
+// memory
+//
+// (2) certain sets of logical parameters are bound together in
+// parameter groups.
+//
+// To accomplish this, the ParameterManager class provides a set of
+// routines for constructing a mapping between logical parameters and
+// physical parameters:
+//
+// AddParameterGroup(group_name)
+// AddParameterMapping(logical_name, physical_ptr)
+//
+// -------------------------------------------------------------------
+//
+// The ParameterManager class provides the following functionality:
+//
+// GetPhysicalParameters(logical_index)
+//
+// return a vector containing pointers to all physical
+// parameters which map to a particular logical_index (or all
+// physical parameters if logical_index == -1)
+//
+// GetLogicalIndex(physical_ptr)
+//
+// return the index of the logical parameter corresponding to
+// physical_ptr
+//
+// GetNumPhysicalParameters()
+//
+// return the number of physical parameters
+//
+// GetNumLogicalParameters()
+//
+// return the number of logical parameters
+//
+//////////////////////////////////////////////////////////////////////
+
+#ifndef PARAMETERMANAGER_HPP
+#define PARAMETERMANAGER_HPP
+
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// struct ParameterGroup
+//
+// Contains the indices of all parameters corresponding to a
+// particular parameter group. In particular, "begin" is the
+// index of the first parameter belonging to the parameter group,
+// and "end" is the index of the element *after* the last parameter
+// belonging to the group.
+//////////////////////////////////////////////////////////////////////
+
+struct ParameterGroup
+{
+ std::string name;
+ int begin, end;
+
+ ParameterGroup();
+ ParameterGroup(const std::string &name, int begin, int end);
+ ParameterGroup(const ParameterGroup &rhs);
+ ParameterGroup &operator=(const ParameterGroup &rhs);
+};
+
+//////////////////////////////////////////////////////////////////////
+// class ParameterManager
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class ParameterManager
+{
+ std::vector<std::string> names;
+ std::vector<ParameterGroup> groups;
+ std::map<std::pair<RealT, RealT> *, int> physical_to_logical;
+ std::vector<std::vector<std::pair<RealT, RealT> *> > logical_to_physical;
+ std::map<std::string, int> logical_name_to_index;
+
+public:
+
+ // constructors, assignment operator, and destructor
+ ParameterManager();
+ ParameterManager(const ParameterManager &rhs);
+ ParameterManager &operator= (const ParameterManager &rhs);
+ virtual ~ParameterManager();
+
+ // routines for adding new parameters and parameter groups
+ void ClearParameters();
+ void AddParameterGroup(const std::string &name);
+ void AddParameterMapping(const std::string &logical_name, std::pair<RealT, RealT> *physical_ptr);
+
+ // file input/output
+ void ReadFromFile(const std::string &filename, std::vector<RealT> &values);
+ void WriteToFile(const std::string &filename, const std::vector<RealT> &values);
+
+ // expand a vector of values for each parameter group
+ const std::vector<RealT> ExpandParameterGroupValues(const std::vector<RealT> &values) const;
+
+ // retrieve physical parameters corresponding to a particular logical index
+ std::vector<std::pair<RealT, RealT> *> GetPhysicalParameters(int logical_index) const;
+
+ // return logical index corresponding to a physical parameter
+ int GetLogicalIndex(const std::pair<RealT, RealT> *physical_ptr) const;
+
+ // simple getters
+ const std::vector<std::string> GetNames() const { return names; }
+ const std::vector<ParameterGroup> &GetParameterGroups() const { return groups; }
+ size_t GetNumParameterGroups() const { return groups.size(); }
+ size_t GetNumPhysicalParameters() const { return physical_to_logical.size(); }
+ size_t GetNumLogicalParameters() const { return logical_to_physical.size(); }
+};
+
+#include "ParameterManager.ipp"
+
+#endif
diff --git a/src/ParameterManager.ipp b/src/ParameterManager.ipp
new file mode 100644
index 0000000..9f49cab
--- /dev/null
+++ b/src/ParameterManager.ipp
@@ -0,0 +1,268 @@
+//////////////////////////////////////////////////////////////////////
+// ParameterManager.ipp
+//////////////////////////////////////////////////////////////////////
+
+#include "ParameterManager.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// ParameterGroup::ParameterGroup()
+// ParameterGroup::operator=()
+//
+// Constructors and assignment operator.
+//////////////////////////////////////////////////////////////////////
+
+ParameterGroup::ParameterGroup() {}
+
+ParameterGroup::ParameterGroup(const std::string &name, int begin, int end) :
+ name(name),
+ begin(begin),
+ end(end)
+{
+ Assert(begin <= end, "Inconsistent begin and end indices.");
+}
+
+ParameterGroup::ParameterGroup(const ParameterGroup &rhs) :
+ name(rhs.name),
+ begin(rhs.begin),
+ end(rhs.end)
+{
+ Assert(begin <= end, "Inconsistent begin and end indices.");
+}
+
+ParameterGroup &ParameterGroup::operator=(const ParameterGroup &rhs)
+{
+ if (this != &rhs)
+ {
+ name = rhs.name;
+ begin = rhs.begin;
+ end = rhs.end;
+ Assert(begin <= end, "Inconsistent begin and end indices.");
+ }
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::ParameterManager()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ParameterManager<RealT>::ParameterManager() :
+ names(),
+ groups(),
+ physical_to_logical(),
+ logical_to_physical(),
+ logical_name_to_index()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::ParameterManager()
+//
+// Copy constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ParameterManager<RealT>::ParameterManager(const ParameterManager &rhs) :
+ names(rhs.names),
+ groups(rhs.groups),
+ physical_to_logical(rhs.physical_to_logical),
+ logical_to_physical(rhs.logical_to_physical),
+ logical_name_to_index(rhs.logical_name_to_index)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::operator=()
+//
+// Assignment operator.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ParameterManager<RealT> &ParameterManager<RealT>::operator=(const ParameterManager &rhs)
+{
+ if (this != &rhs)
+ {
+ names = rhs.names;
+ groups = rhs.groups;
+ physical_to_logical = rhs.physical_to_logical;
+ logical_to_physical = rhs.logical_to_physical;
+ logical_name_to_index = rhs.logical_name_to_index;
+ }
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::~ParameterManager()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+ParameterManager<RealT>::~ParameterManager()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::ClearParameters()
+//
+// Clear parameter manager.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ParameterManager<RealT>::ClearParameters()
+{
+ names.clear();
+ groups.clear();
+ physical_to_logical.clear();
+ logical_to_physical.clear();
+ logical_name_to_index.clear();
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::AddParameterGroup()
+//
+// Mark the beginning of a new parameter group.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ParameterManager<RealT>::AddParameterGroup(const std::string &name)
+{
+ groups.push_back(ParameterGroup(name, int(names.size()), int(names.size())));
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::AddParameterMapping()
+//
+// Create a mapping from a physical parameter to a logical parameter.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ParameterManager<RealT>::AddParameterMapping(const std::string &logical_name, std::pair<RealT, RealT> *physical_ptr)
+{
+ // check if the logical parameter name has been seen before
+ std::map<std::string,int>::iterator iter = logical_name_to_index.find(logical_name);
+ if (iter == logical_name_to_index.end())
+ {
+ // if not, add it
+ iter = logical_name_to_index.insert(std::make_pair(logical_name, int(names.size()))).first;
+ names.push_back(logical_name);
+ logical_to_physical.push_back(std::vector<std::pair<RealT, RealT> *>());
+ ++(groups.back().end);
+ }
+
+ // save mapping from physical parameter pointer to logical index, and vice versa
+ physical_to_logical[physical_ptr] = iter->second;
+ logical_to_physical[iter->second].push_back(physical_ptr);
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::ReadFromFile()
+//
+// Read parameters from file.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ParameterManager<RealT>::ReadFromFile(const std::string &filename, std::vector<RealT> &values)
+{
+ std::map<std::string, RealT> params;
+ RealT value;
+ std::string name;
+ std::string s;
+
+ // read parameter file
+ std::ifstream infile(filename.c_str());
+ if (infile.fail()) Error(("Could not open file \"" + filename + "\" for reading.").c_str());
+
+ while (getline(infile, s))
+ {
+ // skip blank lines and comments
+ if (s.length() == 0 || s[0] == '#') continue;
+
+ // read parameter names and values
+ std::istringstream iss(s);
+ if (iss >> name >> value)
+ {
+ if (params.find(name) != params.end())
+ Error("Parameter file contains a duplicate parameter: %s", name.c_str());
+ params[name] = value;
+ }
+ }
+ infile.close();
+
+ // convert read parameters to vector format
+ values.clear();
+ values.resize(names.size());
+ for (size_t i = 0; i < names.size(); i++)
+ {
+ typename std::map<std::string, RealT>::iterator iter = params.find(names[i]);
+ if (iter == params.end()) Error("Parameter file missing parameter: %s", names[i].c_str());
+ values[i] = iter->second;
+ params.erase(iter);
+ }
+
+ // print an error message for extra parameters
+ for (typename std::map<std::string, RealT>::iterator iter = params.begin(); iter != params.end(); ++iter)
+ Warning("Parameter file contains extra parameter: %s", iter->first.c_str());
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::WriteToFile()
+//
+// Write parameters to file.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+void ParameterManager<RealT>::WriteToFile(const std::string &filename, const std::vector<RealT> &values)
+{
+ if (values.size() != names.size()) Error("Incorrect number of parameters.");
+ std::ofstream outfile(filename.c_str());
+ if (outfile.fail()) Error(("Could not open file \"" + filename + "\" for writing.").c_str());
+ for (size_t i = 0; i < values.size(); i++)
+ outfile << names[i] << " " << std::setprecision(10) << values[i] << std::endl;
+ outfile.close();
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::ExpandParameterGroupValues()
+//
+// Expand a vector of group values.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+const std::vector<RealT> ParameterManager<RealT>::ExpandParameterGroupValues(const std::vector<RealT> &values) const
+{
+ std::vector<RealT> expanded;
+ if (values.size() != groups.size()) Error("Incorrect number of hyperparametrs.");
+ for (size_t i = 0; i < groups.size(); i++)
+ for (int j = groups[i].begin; j < groups[i].end; j++)
+ expanded.push_back(values[i]);
+ return expanded;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::GetPhysicalParameters()
+//
+// Retrieve the vector of physical parameters associated with a
+// particular logical parameter.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+std::vector<std::pair<RealT, RealT> *> ParameterManager<RealT>::GetPhysicalParameters(int logical_index) const
+{
+ if (logical_index < 0 || logical_index >= int(names.size()))
+ Error("Requested for invalid logical parameter index: %d", logical_index);
+ return logical_to_physical[logical_index];
+}
+
+//////////////////////////////////////////////////////////////////////
+// ParameterManager::GetLogicalIndex()
+//
+// Retrieve the logical index for a particular physical parameter.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+int ParameterManager<RealT>::GetLogicalIndex(const std::pair<RealT, RealT> *physical_ptr) const
+{
+ typename std::map<std::pair<RealT, RealT> *, int>::const_iterator iter = physical_to_logical.find(physical_ptr);
+ if (iter == physical_to_logical.end()) Error("Request for unknown physical parameter.");
+ return iter->second;
+}
diff --git a/src/PlotRNA.cpp b/src/PlotRNA.cpp
new file mode 100644
index 0000000..2228c6e
--- /dev/null
+++ b/src/PlotRNA.cpp
@@ -0,0 +1,490 @@
+/////////////////////////////////////////////////////////////////
+// PlotRNA.cpp
+//
+// Plot an RNA secondary structure.
+/////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////
+// Header files
+/////////////////////////////////////////////////////////////////
+
+#include "gd.h"
+#include "gdfontmb.h"
+#include "gdfonts.h"
+#include <iostream>
+#include <vector>
+#include <map>
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <cstring>
+#include <cstdio>
+#include <cmath>
+#include "Utilities.hpp"
+#include "SStruct.hpp"
+#include "EncapsulatedPostScript.hpp"
+
+/////////////////////////////////////////////////////////////////
+// Constants
+/////////////////////////////////////////////////////////////////
+
+const int HIGH_CONFIDENCE_COLOR[] = { 0, 0, 0 };
+const int LOW_CONFIDENCE_COLOR[] = { 216, 216, 255 };
+
+const double SCALE = 12;
+const int BORDER = 50;
+const int FONT_SIZE = 10;
+const double PI = 3.141592653589793238462643383279502884197169399;
+
+/////////////////////////////////////////////////////////////////
+// Types
+/////////////////////////////////////////////////////////////////
+
+struct Point
+{
+ double x, y;
+ Point() : x(0), y(0) {}
+ Point(double x, double y) : x(x), y(y) {}
+};
+
+/////////////////////////////////////////////////////////////////
+// Globals
+/////////////////////////////////////////////////////////////////
+
+int HEADER = 0;
+std::string png_output_filename = "";
+std::string eps_output_filename = "";
+
+/////////////////////////////////////////////////////////////////
+// ReadCoords()
+//
+// Read coordinates file.
+/////////////////////////////////////////////////////////////////
+
+std::vector<Point> ReadCoords(const std::string &filename)
+{
+ std::vector<Point> res(1);
+ std::ifstream infile(filename.c_str());
+ if (infile.fail()) Error("Could not read coordinates file.");
+
+ double first, second;
+ while (infile >> first >> second)
+ res.push_back(Point(first, second));
+
+ infile.close();
+ return res;
+}
+
+/////////////////////////////////////////////////////////////////
+// ReadPosteriors()
+//
+// Read posteriors.
+/////////////////////////////////////////////////////////////////
+
+std::vector<double> ReadPosteriors(const std::string &filename, const SStruct &sstruct)
+{
+ const std::vector<int> &mapping = sstruct.GetMapping();
+ std::vector<double> posteriors(sstruct.GetLength()+1, 1.0);
+ if (filename == "") return posteriors;
+
+ std::ifstream infile(filename.c_str());
+ if (infile.fail()) Error("Could not read posteriors file.");
+
+ std::string s;
+ int length = 0;
+ while (std::getline(infile, s))
+ {
+ std::istringstream iss(s);
+
+ char let;
+ int from;
+ if (iss >> from >> let)
+ {
+ length++;
+ if (from != length) Error("Bad nucleotide numbering in posteriors file.");
+ if (!isalpha(let)) Error("Bad letter in posteriors file.");
+ while (iss >> s)
+ {
+ std::string::size_type idx = s.find(':');
+ if (idx == std::string::npos) Error("Badly formatted line in posteriors file.");
+ int to = atoi(s.substr(0, idx).c_str());
+ if (to < 0) Error("Negative mapping indices not allowed in posteriors file.");
+ double value = atof(s.substr(idx + 1).c_str());
+ if (value < -0.10 || value > 1.10) Error("Invalid value in posteriors file.");
+ value = Clip(value, 0.0, 1.0);
+
+ if (from < 1 || from > sstruct.GetLength()) Error("Index in posteriors file does not match BPSEQ length.");
+ if (to < 1 || to > sstruct.GetLength()) Error("Index in posteriors file does not match BPSEQ length.");
+ if (mapping[from] == to) posteriors[from] = value;
+ if (mapping[to] == from) posteriors[to] = value;
+ if (mapping[from] == 0) posteriors[from] -= value;
+ if (mapping[to] == 0) posteriors[to] -= value;
+ }
+ }
+ else
+ {
+ Error("Badly formatted line in posteriors file.");
+ }
+ }
+
+ infile.close();
+
+ return posteriors;
+}
+
+/////////////////////////////////////////////////////////////////
+// DrawRNA()
+//
+// Draw RNA secondary structure.
+/////////////////////////////////////////////////////////////////
+
+void DrawRNA(const SStruct &sstruct,
+ std::vector<Point> coords,
+ const std::vector<double> &posteriors,
+ std::string title)
+{
+ bool toggle_png = (png_output_filename != std::string(""));
+ bool toggle_eps = (eps_output_filename != std::string(""));
+
+ const std::string &sequence = sstruct.GetSequences()[0];
+ const std::vector<int> &mapping = sstruct.GetMapping();
+
+ // transform and recenter coordinates
+
+ for (size_t i = 1; i < coords.size(); i++)
+ coords[i].y *= -1;
+
+ double min_x = std::numeric_limits<double>::infinity();
+ double min_y = std::numeric_limits<double>::infinity();
+ double max_x = -std::numeric_limits<double>::infinity();
+ double max_y = -std::numeric_limits<double>::infinity();
+
+ for (size_t i = 1; i < coords.size(); i++)
+ {
+ min_x = std::min(min_x, coords[i].x);
+ min_y = std::min(min_y, coords[i].y);
+ max_x = std::max(max_x, coords[i].x);
+ max_y = std::max(max_y, coords[i].y);
+ }
+
+ int width = 0;
+ int height = 0;
+ for (size_t i = 1; i < coords.size(); i++)
+ {
+ coords[i].x = std::floor((coords[i].x - min_x) * SCALE);
+ coords[i].y = std::floor((coords[i].y - min_y) * SCALE);
+ width = std::max(width, int(coords[i].x)+1);
+ height = std::max(height, int(coords[i].y)+1);
+ }
+
+ // create image
+
+ int image_width = width + 2*BORDER;
+ int image_height = height + 2*BORDER + HEADER;
+
+ gdImagePtr image = NULL;
+ EncapsulatedPostScript *eps = NULL;
+ std::ofstream out;
+
+ if (toggle_eps)
+ {
+ out.open(eps_output_filename.c_str());
+ eps = new EncapsulatedPostScript(out, image_width, image_height, FONT_SIZE);
+ }
+ if (toggle_png)
+ {
+ image = gdImageCreateTrueColor(image_width, image_height);
+ }
+
+ // allocate palette
+
+ int white = 0;
+ int black = 0;
+ std::vector<int> confidence;
+
+ if (toggle_png)
+ {
+ white = gdImageColorAllocate(image, 255, 255, 255);
+ black = gdImageColorAllocate(image, 0, 0, 0);
+
+ confidence.resize(101);
+ for (int i = 0; i <= 100; i++)
+ {
+ int r = (LOW_CONFIDENCE_COLOR[0] * (100 - i) + HIGH_CONFIDENCE_COLOR[0] * i) / 100;
+ int g = (LOW_CONFIDENCE_COLOR[1] * (100 - i) + HIGH_CONFIDENCE_COLOR[1] * i) / 100;
+ int b = (LOW_CONFIDENCE_COLOR[2] * (100 - i) + HIGH_CONFIDENCE_COLOR[2] * i) / 100;
+ confidence[i] = gdImageColorAllocate(image, r, g, b);
+ }
+ }
+
+ // set white background
+
+ if (toggle_png)
+ {
+ gdImageFilledRectangle(image, 0, 0, image_width-1, image_height-1, white);
+ }
+
+ // obtain font
+
+ gdFontPtr text_font = NULL;
+ gdFontPtr number_font = NULL;
+
+ if (toggle_png)
+ {
+ text_font = gdFontGetMediumBold();
+ number_font = gdFontGetSmall();
+ }
+
+ // draw RNA
+
+ for (size_t i = 1; i < coords.size(); i++)
+ {
+
+ // choose color
+
+ double p = posteriors[i];
+ int index = Clip(int(p * 100), 0, 100);
+
+ if (toggle_eps)
+ {
+ eps->SetRGBColor(double((LOW_CONFIDENCE_COLOR[0] * (100 - index) + HIGH_CONFIDENCE_COLOR[0] * index) / 100) / 256.0,
+ double((LOW_CONFIDENCE_COLOR[1] * (100 - index) + HIGH_CONFIDENCE_COLOR[1] * index) / 100) / 256.0,
+ double((LOW_CONFIDENCE_COLOR[2] * (100 - index) + HIGH_CONFIDENCE_COLOR[2] * index) / 100) / 256.0);
+ }
+
+ // draw letter
+
+ int x = BORDER + int(coords[i].x);
+ int y = HEADER + BORDER + int(coords[i].y);
+
+ if (toggle_eps)
+ {
+ eps->DrawString(x, y, SPrintF("%c", sequence[i]), 1, 1);
+ }
+
+ if (toggle_png)
+ {
+ gdImageSetAntiAliased(image, confidence[index]);
+ gdImageChar(image, text_font, x - text_font->w/2, y - text_font->h/2, sequence[i], confidence[index]);
+ }
+
+ // draw bonds
+
+ if (mapping[i] > int(i))
+ {
+ int ox = BORDER + int(coords[mapping[i]].x);
+ int oy = HEADER + BORDER + int(coords[mapping[i]].y);
+
+ char left = toupper(sequence[i]);
+ char right = toupper(sequence[mapping[i]]);
+
+ // draw line for a Watson-Crick pair
+
+ if (left == 'A' && (right == 'T' || right == 'U') ||
+ left == 'C' && right == 'G' ||
+ left == 'G' && right == 'C' ||
+ (left == 'T' || left == 'U') && right == 'A')
+ {
+
+ if (toggle_eps)
+ {
+ eps->DrawLine(0.75*x + 0.25*ox, 0.75*y + 0.25*oy,
+ 0.25*x + 0.75*ox, 0.25*y + 0.75*oy, 2.0);
+ }
+
+ if (toggle_png)
+ {
+ gdImageLine(image,
+ int(0.75*x + 0.25*ox), int(0.75*y + 0.25*oy),
+ int(0.25*x + 0.75*ox), int(0.25*y + 0.75*oy),
+ gdAntiAliased);
+ }
+ }
+
+ // draw dot for GU pair
+
+ else if (left == 'G' && (right == 'T' || right == 'U') ||
+ (left == 'T' || left == 'U') && right == 'G')
+ {
+
+ if (toggle_eps)
+ {
+ eps->DrawCircle(0.5*x + 0.5*ox, 0.5*y + 0.5*oy, 3.0);
+ }
+
+ if (toggle_png)
+ {
+ gdImageFilledArc(image,
+ (int)(0.5*x + 0.5*ox), (int)(0.5*y + 0.5*oy),
+ 5, 5, 0, 360, gdAntiAliased, gdArc);
+ }
+ }
+
+ // draw non-canonical base-pairings
+
+ else
+ {
+ if (toggle_eps)
+ {
+ eps->DrawLine(0.75*x + 0.25*ox, 0.75*y + 0.25*oy,
+ 0.25*x + 0.75*ox, 0.25*y + 0.75*oy, 2.0);
+ }
+ }
+ }
+
+ if (toggle_eps)
+ {
+ eps->SetColorBlack();
+ }
+
+ // draw number if needed
+
+ if (i % 10 == 0)
+ {
+ std::string number = SPrintF ("%d", i);
+ int lx = BORDER + int(coords[i-1].x) - x;
+ int ly = HEADER + BORDER + int(coords[i-1].y) - y;
+ int rx = (i + 1 < coords.size()) ? BORDER + int(coords[i+1].x) - x : -lx;
+ int ry = (i + 1 < coords.size()) ? HEADER + BORDER + int(coords[i+1].y) - y : -ly;
+ double al = std::atan2(double(ly), double(lx));
+ double ar = std::atan2(double(ry), double(rx));
+ if (al > ar) al += 2 * PI;
+ double an = (al + ar) / 2.0;
+ int nx = int(1.5 * SCALE * std::cos(an));
+ int ny = int(1.5 * SCALE * std::sin(an));
+
+ if (toggle_eps)
+ {
+ eps->DrawString(x + nx, y + ny, number, 1, 1);
+ }
+
+ if (toggle_png)
+ {
+ gdImageString(image, number_font,
+ x + nx - int(number.length()) * number_font->w/2,
+ y + ny - number_font->h/2,
+ reinterpret_cast<unsigned char *>(const_cast<char *>(number.c_str())), black);
+ }
+ }
+ }
+
+ if (title != "")
+ {
+
+ // write title
+
+ if (toggle_eps)
+ {
+ eps->DrawString(image_width / 2, HEADER/2, title, 1, 1);
+ }
+
+ if (toggle_png)
+ {
+ int max_chars = int(0.75 * image_width / number_font->w);
+ if (int(title.length()) > max_chars)
+ {
+ title = title.substr(0, std::max(0, max_chars - 3)) + "...";
+ }
+
+ gdImageString(image, number_font, image_width / 2 - int(title.length()) * number_font->w/2,
+ HEADER/2, reinterpret_cast<unsigned char *>(const_cast<char *>(title.c_str())), black);
+ }
+
+ }
+
+ if (toggle_eps)
+ {
+ eps->Close();
+ out.close();
+ }
+
+ if (toggle_png)
+ {
+ // write to output file
+
+ FILE *pngout = fopen(png_output_filename.c_str(), "wb");
+ gdImagePng(image, pngout);
+ fclose(pngout);
+ gdImageDestroy(image);
+ }
+}
+
+/////////////////////////////////////////////////////////////////
+// main()
+//
+// Main program.
+/////////////////////////////////////////////////////////////////
+
+int main(int argc, char **argv)
+{
+ if (argc < 4)
+ {
+ std::cerr << std::endl
+ << "Usage: " << argv[0] << " [OPTION]... BPSEQFILE COORDSFILE" << std::endl
+ << std::endl
+ << " where BPSEQFILE is the name of the input BPSEQ file" << std::endl
+ << " COORDFILE is the name of the input coordinates file" << std::endl
+ << std::endl
+ << "Miscellaneous arguments:" << std::endl
+ << " --posteriors POSTERIORSFILE is an optional posteriors file" << std::endl
+ << " --title TITLE is an optional title" << std::endl
+ << " --eps FILENAME specifies EPS format output" << std::endl
+ << " --png FILENAME specifies PNG format output" << std::endl
+ << std::endl;
+ exit(1);
+ }
+
+ // parse arguments
+
+ std::string posteriors_filename;
+ std::string title;
+
+ std::vector<std::string> default_args;
+ for (int i = 1; i < argc; i++)
+ {
+ if (argv[i][0] == '-')
+ {
+ if (std::string(argv[i]) == "--posteriors")
+ {
+ if (i == argc-1) Error("Argument required after --posteriors.");
+ posteriors_filename = argv[++i];
+ }
+ else if (std::string(argv[i]) == "--title")
+ {
+ if (i == argc-1) Error("Argument required after --title.");
+ title = argv[++i];
+ HEADER = 50;
+ }
+ else if (std::string(argv[i]) == "--eps")
+ {
+ if (i == argc-1) Error("Argument required after --eps.");
+ eps_output_filename = argv[++i];
+ }
+ else if (std::string(argv[i]) == "--png")
+ {
+ if (i == argc-1) Error("Argument required after --png.");
+ png_output_filename = argv[++i];
+ }
+ else
+ {
+ std::cerr << "Unknown argument: " << argv[i] << std::endl;
+ exit (1);
+ }
+ }
+ else
+ {
+ default_args.push_back(argv[i]);
+ }
+ }
+
+ if (default_args.size() != 2) Error("Incorrect number of arguments.");
+ if (eps_output_filename == std::string("") &&
+ png_output_filename == std::string(""))
+ Error("At least one output format (eps, png) must be specified.");
+
+ SStruct sstruct(default_args[0]);
+ std::vector<Point> coords = ReadCoords(default_args[1]);
+ std::vector<double> posteriors = ReadPosteriors(posteriors_filename, sstruct);
+
+ DrawRNA(sstruct, coords, posteriors, title);
+}
diff --git a/src/SStruct.cpp b/src/SStruct.cpp
new file mode 100644
index 0000000..914bb73
--- /dev/null
+++ b/src/SStruct.cpp
@@ -0,0 +1,641 @@
+//////////////////////////////////////////////////////////////////////
+// SStruct.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "SStruct.hpp"
+
+enum FileFormat
+{
+ FileFormat_FASTA,
+ FileFormat_BPSEQ,
+ FileFormat_RAW
+};
+
+const int SStruct::UNPAIRED = 0;
+const int SStruct::UNKNOWN = -1;
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::SStruct()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+SStruct::SStruct()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::SStruct()
+//
+// Create object from file.
+//////////////////////////////////////////////////////////////////////
+
+SStruct::SStruct(const std::string &filename)
+{
+ Load(filename);
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::Load()
+//
+// Load from file. Attempt to detect the format of the file
+// automatically.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::Load(const std::string &filename)
+{
+ // auto-detect file format and load file
+ switch (AnalyzeFormat(filename))
+ {
+ case FileFormat_FASTA: LoadFASTA(filename); break;
+ case FileFormat_RAW: LoadRAW(filename); break;
+ case FileFormat_BPSEQ: LoadBPSEQ(filename); break;
+ default: Error("Unable to determine file type.");
+ }
+
+ // perform character conversions
+ for (size_t i = 0; i < sequences.size(); i++)
+ sequences[i] = FilterSequence(sequences[i]);
+
+ // error-checking
+ ValidateMapping(mapping);
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::AnalyzeFormat()
+//
+// Determine file format.
+//////////////////////////////////////////////////////////////////////
+
+int SStruct::AnalyzeFormat(const std::string &filename) const
+{
+ std::ifstream data(filename.c_str());
+ if (data.fail()) Error(("Unable to open input file: " + filename).c_str());
+
+ // look for first non-blank line
+ std::string s;
+ while (std::getline(data, s))
+ if (s.length() > 0) break;
+
+ // analyze to determine file format
+ FileFormat format;
+ if (s[0] == '>')
+ format = FileFormat_FASTA;
+ else
+ {
+ std::istringstream iss(s);
+ int number;
+ std::string i, c, j;
+ if ((iss >> i >> c >> j) &&
+ ConvertToNumber(i, number) &&
+ c.length() == 1 &&
+ ConvertToNumber(j, number))
+ {
+ format = FileFormat_BPSEQ;
+ }
+ else
+ format = FileFormat_RAW;
+ }
+
+ data.close();
+
+ return format;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::LoadFASTA()
+//
+// Create object from a FASTA file. Assumes that the data file has a
+// FASTA format. Optionally, a parenthesized base-pairing structure
+// may be provided as one of the sequences in the file.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::LoadFASTA(const std::string &filename)
+{
+ // clear any previous data
+ std::vector<std::string>().swap(names);
+ std::vector<std::string>().swap(sequences);
+ std::vector<int>().swap(mapping);
+
+ // open file for reading
+ std::ifstream data(filename.c_str());
+ if (data.fail()) Error("Unable to open input file: %s", filename.c_str());
+
+ // process sequences
+ std::string s;
+ while (std::getline(data, s))
+ {
+ s = Trim(s);
+ if (s.length() == 0) continue;
+
+ // check for MFA header
+ if (s[0] == '>')
+ {
+ names.push_back(s.substr(1));
+ sequences.push_back("@");
+ }
+
+ // otherwise process sequence data
+ else
+ {
+ if (sequences.size() == 0) Error("Expected header for FASTA file: %s", filename.c_str());
+ for (size_t i = 0; i < s.length(); i++)
+ {
+ if (isspace(s[i])) continue;
+ sequences.back() += s[i];
+ }
+ }
+ }
+
+ // sanity-checks
+ if (sequences.size() == 0) Error("No sequences read.");
+ if (sequences[0].length() == 1) Error("Zero-length sequence read.");
+ for (size_t i = 1; i < sequences.size(); i++)
+ if (sequences[i].length() != sequences[0].length())
+ Error("Not all sequences have the same length.");
+
+ // determine if any of the sequences could be a consensus sequence
+ bool consensus_found = false;
+ size_t i = 0;
+ while (i < sequences.size())
+ {
+ // check for alphabetic characters
+ bool is_consensus = true;
+ for (size_t j = 1; is_consensus && j < sequences[i].length(); j++)
+ if (isalpha(sequences[i][j])) is_consensus = false;
+
+ // extract consensus mapping
+ if (is_consensus)
+ {
+ if (consensus_found)
+ Error("More than one consensus base-pairing structure found.");
+ else
+ {
+ mapping = ConvertParensToMapping(FilterParens(sequences[i]));
+ sequences.erase(sequences.begin() + i);
+ names.erase(names.begin() + i);
+ consensus_found = true;
+ continue;
+ }
+ }
+ i++;
+ }
+
+ // supply empty mapping if none found
+ if (!consensus_found)
+ {
+ mapping = std::vector<int>(sequences[0].length(), UNKNOWN);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::LoadRAW()
+//
+// Create object from raw (unformatted) file. Assumes that exactly
+// one sequence is provided, with no secondary structure.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::LoadRAW(const std::string &filename)
+{
+ // clear any previous data
+ std::vector<std::string>().swap(names);
+ std::vector<std::string>().swap(sequences);
+ std::vector<int>().swap(mapping);
+
+ // initialize
+ names.push_back(filename);
+ sequences.push_back("@");
+
+ // open file for reading
+ std::ifstream data(filename.c_str());
+ if (data.fail()) Error("Unable to open input file: %s", filename.c_str());
+
+ // now retrieve sequence data
+ std::string s;
+ while (std::getline(data, s))
+ {
+ for (size_t i = 0; i < s.length(); i++)
+ {
+ if (isspace(s[i])) continue;
+ sequences.back() += s[i];
+ }
+ }
+
+ // sanity-checks
+ if (sequences[0].length() == 1) Error("Zero-length sequence read.");
+
+ // initialize empty secondary structure
+ mapping.resize(sequences[0].length(), UNKNOWN);
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::LoadBPSEQ()
+//
+// Create object from BPSEQ file. Assumes that exactly one sequence
+// is provided. Base-pairings in the file may contain pseudoknots.
+// Unpaired nucleotides should be annotated with base-pairing '0', and
+// nucleotides with no known pairing should be annotated with
+// base-pairing '-1'.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::LoadBPSEQ(const std::string &filename)
+{
+ // clear any previous data
+ std::vector<std::string>().swap(names);
+ std::vector<std::string>().swap(sequences);
+ std::vector<int>().swap(mapping);
+
+ // initialize
+ names.push_back(filename);
+ sequences.push_back("@");
+ mapping.push_back(UNKNOWN);
+
+ // open file
+ std::ifstream data(filename.c_str());
+ if (data.fail()) Error("Unable to open input file: %s", filename.c_str());
+
+ // process file
+ std::string token;
+ int row = 0;
+ while (data >> token)
+ {
+ // read row
+ int index = 0;
+ if (!ConvertToNumber(token, index)) Error("Could not read row number: %s", filename.c_str());
+ if (index <= 0) Error("Row numbers must be positive: %s", filename.c_str());
+ if (index != row+1) Error("Rows of BPSEQ file must occur in increasing order: %s", filename.c_str());
+ row = index;
+
+ // read sequence letter
+ if (!(data >> token)) Error("Expected sequence letter after row number: %s", filename.c_str());
+ if (token.length() != 1) Error("Expected sequence letter after row number: %s", filename.c_str());
+ char ch = token[0];
+
+ // read mapping
+ int maps_to = 0;
+ if (!(data >> token)) Error("Expected mapping after sequence letter: %s", filename.c_str());
+ if (!ConvertToNumber(token, maps_to)) Error("Could not read matching row number: %s", filename.c_str());
+ if (maps_to < -1) Error("Matching row numbers must be greater than or equal to -1: %s", filename.c_str());
+
+ sequences.back().push_back(ch);
+ mapping.push_back(maps_to);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::SStruct()
+//
+// Create object from existing SStruct object.
+//////////////////////////////////////////////////////////////////////
+
+SStruct::SStruct(const SStruct &rhs) :
+ names(rhs.names),
+ sequences(rhs.sequences),
+ mapping(rhs.mapping)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::~SStruct()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+SStruct::~SStruct()
+{}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::operator=
+//
+// Assignment operator.
+//////////////////////////////////////////////////////////////////////
+
+const SStruct &SStruct::operator=(const SStruct &rhs)
+{
+ if (this != &rhs)
+ {
+ names = rhs.names;
+ sequences = rhs.sequences;
+ mapping = rhs.mapping;
+ }
+
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::FilterSequence()
+//
+// Perform character conversions to put the RNA sequence in a standard
+// format.
+//////////////////////////////////////////////////////////////////////
+
+std::string SStruct::FilterSequence(std::string sequence) const
+{
+ if (sequence[0] != '@') Error("Improperly formatted sequence.");
+
+ for (size_t i = 1; i < sequence.length(); i++)
+ {
+ bool uppercase = isupper(sequence[i]);
+ char c = tolower(sequence[i]);
+
+ switch (c)
+ {
+ case '.': c = '-'; break;
+ case 't': c = 'u'; break;
+ case '-': case 'a': case 'c': case 'g': case 'u': break;
+ default:
+ if (isalpha(c))
+ c = 'n';
+ else
+ Error("Unexpected character '%c' in sequence.", c);
+ break;
+ }
+
+ if (uppercase) c = toupper(c);
+ sequence[i] = c;
+ }
+
+ return sequence;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::FilterParens()
+//
+// Perform character conversions as needed.
+//////////////////////////////////////////////////////////////////////
+
+std::string SStruct::FilterParens(std::string sequence) const
+{
+ if (sequence[0] != '@') Error("Improperly formatted sequence.");
+
+ for (size_t i = 1; i < sequence.length(); i++)
+ {
+ switch (sequence[i])
+ {
+ case '-': sequence[i] = '.'; break;
+ case '?': case '.': case '(': case ')': break;
+ default: Error("Unexpected character '%c' in parenthesized structure.", sequence[i]);
+ }
+ }
+
+ return sequence;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ConvertParensToMapping()
+//
+// Convert a parenthesized string to a mapping. No pseudoknots
+// allowed.
+//////////////////////////////////////////////////////////////////////
+
+std::vector<int> SStruct::ConvertParensToMapping(const std::string &parens) const
+{
+ std::vector<int> mapping(parens.length(), UNKNOWN);
+ std::vector<int> stack;
+
+ Assert(parens[0] == '@', "Invalid parenthesized string.");
+ for (int i = 1; i < int(parens.length()); i++)
+ {
+ switch (parens[i])
+ {
+ case '?': break;
+ case '.': mapping[i] = UNPAIRED; break;
+ case '(': stack.push_back(i); break;
+ case ')':
+ if (stack.size() == 0) Error("Parentheses mismatch.");
+ mapping[i] = stack.back();
+ mapping[stack.back()] = i;
+ stack.pop_back();
+ break;
+ default:
+ Error("Unexpected character '%c' in parenthesized structure.", parens[i]);
+ }
+ }
+ if (stack.size() != 0) Error("Parentheses mismatch.");
+
+ return mapping;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ConvertMappingToParens()
+//
+// Convert a mapping to a parenthesized string. No pseudoknots
+// allowed.
+//////////////////////////////////////////////////////////////////////
+
+std::string SStruct::ConvertMappingToParens(const std::vector<int> &mapping) const
+{
+ Assert(!ContainsPseudoknots(), "Should not attempt to convert a mapping with pseudoknots.");
+ std::string parens = "@";
+
+ for (int i = 1; i < int(mapping.size()); i++)
+ {
+ if (mapping[i] == UNKNOWN)
+ parens += "?";
+ else if (mapping[i] == UNPAIRED)
+ parens += ".";
+ else if (mapping[i] > i)
+ parens += "(";
+ else if (mapping[i] < i)
+ parens += ")";
+ else
+ Error("Invalid structure.");
+ }
+
+ return parens;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ValidateMapping()
+//
+// Check that a std::vector<int> represents a valid secondary
+// structure mapping. Pseudoknots are allowed.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::ValidateMapping(const std::vector<int> &mapping) const
+{
+ if (mapping.size() == 0 || mapping[0] != UNKNOWN) Error("Invalid mapping.");
+ for (int i = 1; i < int(mapping.size()); i++)
+ {
+ if (mapping[i] == UNPAIRED || mapping[i] == UNKNOWN)
+ continue;
+ if (mapping[i] < 1 || mapping[i] >= int(mapping.size()))
+ Error("Position %d of sequence maps to invalid position.", i);
+ if (mapping[mapping[i]] != i)
+ Error("Positions %d and %d of sequence do not map to each other.", i, mapping[i]);
+ if (mapping[i] == i)
+ Error("Position %d of sequence maps to itself.", i);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ContainsPseudoknots()
+//
+// Check if secondary structure contains pseudoknots.
+//////////////////////////////////////////////////////////////////////
+
+bool SStruct::ContainsPseudoknots() const
+{
+ std::vector<int> stack;
+
+ for (int i = 1; i < int(mapping.size()); i++)
+ {
+ if (mapping[i] == UNPAIRED || mapping[i] == UNKNOWN)
+ continue;
+ if (mapping[i] > i)
+ stack.push_back(i);
+ else if (mapping[i] < i)
+ {
+ if (stack.back() == mapping[i])
+ stack.pop_back();
+ else
+ return true;
+ }
+ else
+ Error("Invalid structure: positions may not map to themselves.");
+ }
+ if (stack.size() != 0) Error("Invalid structure: bad pairings found.");
+
+ return false;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::RemoveNoncomplementaryPairings()
+//
+// Remove all non-{AU,CG,GU} pairings from mapping.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::RemoveNoncomplementaryPairings(const int seq)
+{
+ if (seq < 0 || seq >= int(sequences.size())) Error("Reference to invalid sequence.");
+ Assert(sequences[seq].length() == mapping.size(), "Inconsistent lengths.");
+
+ for (int i = 1; i < int(mapping.size()); i++)
+ {
+ if (mapping[i] > i && !IsComplementary(sequences[seq][i], sequences[seq][mapping[i]]))
+ {
+ mapping[mapping[i]] = UNPAIRED;
+ mapping[i] = UNPAIRED;
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::WriteBPSEQ()
+//
+// Write sequence in BPSEQ format. The BPSEQ format can only handle
+// single sequences, so it will only print out the sequence "seq".
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::WriteBPSEQ(std::ostream &outfile, const int seq) const
+{
+ if (seq < 0 || seq >= int(sequences.size())) Error("Reference to invalid sequence.");
+ Assert(sequences[seq].length() == mapping.size(), "Inconsistent lengths.");
+
+ for (size_t i = 1; i < mapping.size(); i++)
+ outfile << i << ' ' << sequences[seq][i] << ' ' << mapping[i] << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::WriteParens()
+//
+// Write sequence in parenthesized format. This routine assumes that
+// the structure does not contain pseudoknots. All sequences are
+// printed.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::WriteParens(std::ostream &outfile) const
+{
+ if (ContainsPseudoknots()) Error("Cannot write structure containing pseudoknots using parenthesized format.");
+
+ // print sequences
+ for (size_t k = 0; k < sequences.size(); k++)
+ {
+ outfile << ">" << names[k] << std::endl;
+ outfile << sequences[k].substr(1) << std::endl;
+ }
+
+ // print structure
+ outfile << ">structure" << std::endl;
+ outfile << ConvertMappingToParens(mapping).substr(1) << std::endl;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ComputePercentIdentity()
+//
+// Compute average pairwise percent identity for the alignment.
+// The pairwise PID = # of identities / MIN(len1, len2).
+//////////////////////////////////////////////////////////////////////
+
+double SStruct::ComputePercentIdentity() const
+{
+ double pid = 0.0;
+ for (size_t i = 0; i < sequences.size(); i++)
+ {
+ for (size_t j = i+1; j < sequences.size(); j++)
+ {
+ int identities = 0;
+ int len1 = 0;
+ int len2 = 0;
+
+ const std::string &s = sequences[i];
+ const std::string &t = sequences[j];
+
+ for (size_t k = 0; k < s.length(); k++)
+ {
+ if (isalpha(s[k])) len1++;
+ if (isalpha(t[k])) len2++;
+ if (isalpha(s[k]) && toupper(s[k]) == toupper(t[k])) identities++;
+ }
+
+ int den = std::min(len1, len2);
+ double pairwise_pid = (den == 0 ? 0.0 : double(identities) / den);
+
+ pid += pairwise_pid;
+ }
+ }
+
+ return pid / (sequences.size() * (sequences.size() - 1) / 2);
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::ComputePositionBasedSequenceWeights()
+//
+// Compute sequence weights according to:
+// Henikoff, S., and Henikoff J. 1994. Position-based
+// sequence weights. J Mol Biol 243(4):574-578.
+//////////////////////////////////////////////////////////////////////
+
+std::vector<double> SStruct::ComputePositionBasedSequenceWeights() const
+{
+ std::vector<double> weights(sequences.size(), 0.0);
+ std::vector<int> counts(256);
+
+ for (size_t i = 1; i < sequences[0].length(); i++)
+ {
+ int diversity = 0;
+ std::fill(counts.begin(), counts.end(), 0);
+
+ for (size_t j = 0; j < sequences.size(); j++)
+ {
+ if (counts[BYTE(sequences[j][i])] == 0) diversity++;
+ ++(counts[BYTE(sequences[j][i])]);
+ }
+
+ for (size_t j = 0; j < sequences.size(); j++)
+ weights[j] += 1.0 / (diversity * counts[BYTE(sequences[j][i])]);
+ }
+
+ weights /= Sum(weights);
+ return weights;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SStruct::SetMapping()
+//
+// Set secondary structure mapping.
+//////////////////////////////////////////////////////////////////////
+
+void SStruct::SetMapping(const std::vector<int> &mapping)
+{
+ this->mapping = mapping;
+ ValidateMapping(mapping);
+}
diff --git a/src/SStruct.hpp b/src/SStruct.hpp
new file mode 100644
index 0000000..db61a89
--- /dev/null
+++ b/src/SStruct.hpp
@@ -0,0 +1,99 @@
+//////////////////////////////////////////////////////////////////////
+// SStruct.hpp
+//
+// This is a class for reading and writing of RNA secondary
+// structures. The file formats supported include
+//
+// (1) BPSEQ
+// (2) FASTA
+// (3) plain text (raw)
+//////////////////////////////////////////////////////////////////////
+
+#ifndef SSTRUCT_HPP
+#define SSTRUCT_HPP
+
+#include <fstream>
+#include <vector>
+#include <string>
+#include <sstream>
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// class SStruct
+//////////////////////////////////////////////////////////////////////
+
+class SStruct
+{
+ std::vector<std::string> names;
+ std::vector<std::string> sequences;
+ std::vector<int> mapping;
+
+ // automatic file format detection
+ int AnalyzeFormat(const std::string &filename) const;
+
+ // load file of a particular file format
+ void LoadFASTA(const std::string &filename);
+ void LoadRAW(const std::string &filename);
+ void LoadBPSEQ(const std::string &filename);
+
+ // perform standard character conversions for RNA sequence and structures
+ std::string FilterSequence(std::string sequence) const;
+ std::string FilterParens(std::string sequence) const;
+
+ // convert a pseudoknot-free parenthesized structure to a mapping and back
+ std::vector<int> ConvertParensToMapping(const std::string &parens) const;
+ std::string ConvertMappingToParens(const std::vector<int> &mapping) const;
+
+ // check that a (possibly pseudoknotted) mapping is valid
+ void ValidateMapping(const std::vector<int> &mapping) const;
+
+public:
+
+ // integer constants used to identify nucleotides which are either
+ // unpaired or whose pairing is not known
+ static const int UNPAIRED;
+ static const int UNKNOWN;
+
+ // constructor and destructor
+ SStruct();
+ SStruct(const std::string &filename);
+ SStruct(const SStruct &rhs);
+ ~SStruct();
+
+ // load sequence and struture from file
+ void Load(const std::string &filename);
+
+ // assignment operator
+ const SStruct& operator=(const SStruct &rhs);
+
+ // check for pseudoknots
+ bool ContainsPseudoknots() const;
+
+ // remove noncomplementary base-pairs
+ void RemoveNoncomplementaryPairings(const int seq = 0);
+
+ // output in various formats
+ void WriteBPSEQ(std::ostream &outfile, const int seq = 0) const;
+ void WriteParens(std::ostream &outfile) const;
+
+ // compute alignment percent identity
+ double ComputePercentIdentity() const;
+
+ // compute position-based sequence weights
+ std::vector<double> ComputePositionBasedSequenceWeights() const;
+
+ // set mapping
+ void SetMapping(const std::vector<int> &mapping);
+
+ //////////////////////////////////////////////////////////////////////
+ // Getters
+ //////////////////////////////////////////////////////////////////////
+
+ const std::vector<std::string> &GetNames() const { return names; }
+ const std::vector<std::string> &GetSequences() const { return sequences; }
+ const std::vector<int> &GetMapping() const { return mapping; }
+ int GetLength() const { return int(mapping.size())-1; }
+ int GetNumSequences() const { return int(sequences.size()); }
+};
+
+#endif
diff --git a/src/ScorePrediction.cpp b/src/ScorePrediction.cpp
new file mode 100644
index 0000000..5df95e8
--- /dev/null
+++ b/src/ScorePrediction.cpp
@@ -0,0 +1,298 @@
+////////////////////////////////////////////////////////////
+// ScorePrediction.cpp
+//
+// Score a test prediction file against a reference.
+////////////////////////////////////////////////////////////
+
+#include "SStruct.hpp"
+#include "Utilities.hpp"
+
+///////////////////////////////////////////////////////////////////////////
+// ComputeIntersection()
+//
+// Compute intersection of two sets.
+///////////////////////////////////////////////////////////////////////////
+
+std::set<std::vector<int> > ComputeIntersection (const std::set<std::vector<int> > &set1,
+ const std::set<std::vector<int> > &set2)
+{
+ std::set<std::vector<int> > ret;
+ std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),
+ std::insert_iterator<std::set<std::vector<int> > >(ret, ret.begin()));
+ return ret;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Match()
+//
+// Check if sequence matches.
+///////////////////////////////////////////////////////////////////////////
+
+bool Match(const std::pair<std::string, std::string> &s,
+ const std::pair<std::string, std::string> &t)
+{
+ for (size_t i = 0; i < s.first.length(); i++)
+ {
+ if (toupper(s.first[i]) == 'N' || toupper(t.first[i]) == 'N') continue;
+ if (toupper(s.first[i]) != toupper(t.first[i])) return false;
+ }
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Canonicalize()
+//
+// Convert sequence into standard unique form.
+///////////////////////////////////////////////////////////////////////////
+
+std::vector<std::pair<std::pair<std::string, std::string>, int> > Canonicalize(const SStruct &ref, const bool use_protein)
+{
+ std::vector<std::pair<std::pair<std::string, std::string>, int> > ret;
+ for (int i = 0; i < ref.GetNumSequences(); i++)
+ {
+ const std::string &sequence = ref.GetSequences()[i];
+ const std::string &name = ref.GetNames()[i];
+
+ std::string projected_sequence;
+ for (size_t j = 1; j < sequence.length(); j++)
+ {
+ const bool upper = isupper(sequence[j]);
+ char ch = toupper(sequence[j]);
+ if (isalpha(ch))
+ {
+ if (use_protein)
+ {
+ if (std::string("ACDEFGHIKLMNPQRSTVWY").find(ch) == std::string::npos)
+ ch = 'N';
+ }
+ else
+ {
+ if (ch == 'T')
+ ch = 'U';
+ else if (ch != 'A' && ch != 'C' && ch != 'G' && ch != 'U')
+ ch = 'N';
+ }
+ if (!upper) ch = tolower(ch);
+ projected_sequence.push_back(ch);
+ }
+ }
+
+ ret.push_back(std::make_pair(std::make_pair(projected_sequence, name), i));
+ }
+ return ret;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// GetSequenceOrdering()
+//
+// Compute mapping from test sequence indices to reference sequence
+// indices.
+///////////////////////////////////////////////////////////////////////////
+
+std::vector<int> GetSequenceOrdering(const SStruct &ref,
+ const SStruct &test,
+ const std::string &ref_filename,
+ const std::string &test_filename,
+ const bool use_protein)
+{
+ std::vector<std::pair<std::pair<std::string, std::string>, int> > ref_strings = Canonicalize(ref, use_protein);
+ std::vector<std::pair<std::pair<std::string, std::string>, int> > test_strings = Canonicalize(test, use_protein);
+
+ Assert(ref_strings.size() == test_strings.size(), "Dimension mismatch.");
+
+ std::vector<int> mapping(ref_strings.size());
+ std::vector<int> used(test_strings.size());
+
+ // assign each reference sequence to a test sequence
+
+ for (size_t i = 0; i < ref_strings.size(); i++)
+ {
+ bool found = false;
+ for (size_t j = 0; !found && j < test_strings.size(); j++)
+ {
+ if (used[j]) continue;
+ if (Match(ref_strings[i].first, test_strings[j].first))
+ {
+ used[j] = 1;
+ mapping[i] = j;
+ found = true;
+ }
+ }
+ if (!found)
+ {
+ Error("Unable to find matching sequence (%s vs %s)\n>%s\n%s",
+ ref_filename.c_str(), test_filename.c_str(),
+ ref_strings[i].first.second.c_str(),
+ ref_strings[i].first.first.c_str());
+ }
+ }
+
+ return mapping;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// AddPairings()
+//
+// Add all base-pairing positions in a sequence.
+///////////////////////////////////////////////////////////////////////////
+
+void AddPairings(std::set<std::vector<int> > &pairings, int ii, const std::string &s, const std::vector<int> &mapping)
+{
+ const std::vector<int> s_mapping = GetSequenceMapping(s);
+
+ std::vector<int> pairing(3);
+ pairing[0] = ii;
+
+ if (s.length() != mapping.size()) Error("Dimension mismatch for alignment.");
+
+ for (int i = 1; i < int(s.length()); i++)
+ {
+ if (mapping[i] > i && isalpha(s[i]) && isalpha(s[mapping[i]]))
+ {
+ pairing[1] = s_mapping[i];
+ pairing[2] = s_mapping[mapping[i]];
+ pairings.insert(pairing);
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+// AddMatches()
+//
+// Add all matched positions in an alignment.
+///////////////////////////////////////////////////////////////////////////
+
+void AddMatches(std::set<std::vector<int> > &matches, int ii, int jj, const std::string &s, const std::string &t, const bool core_blocks_only, const bool use_protein)
+{
+ const std::vector<int> s_mapping = GetSequenceMapping(s);
+ const std::vector<int> t_mapping = GetSequenceMapping(t);
+
+ std::vector<int> match(4);
+ match[0] = ii;
+ match[1] = jj;
+
+ if (s.length() != t.length()) Error("Dimension mismatch for alignment.");
+
+ for (size_t i = 1; i < s.length(); i++)
+ {
+ if (isalpha(s[i]) && isalpha(t[i]))
+ {
+ if (use_protein && core_blocks_only && (!isupper(s[i]) || !isupper(t[i])))
+ continue;
+ match[2] = s_mapping[i];
+ match[3] = t_mapping[i];
+ matches.insert(match);
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+// ComputeScores()
+//
+// Compute sensitivity and specificity.
+///////////////////////////////////////////////////////////////////////////
+
+void ComputeScores(const std::string &ref_filename,
+ const std::string &test_filename,
+ const bool use_core_blocks,
+ const bool use_protein)
+{
+ SStruct ref(ref_filename);
+ SStruct test(test_filename);
+
+ if (ref.GetNumSequences() != test.GetNumSequences())
+ Error("%s (%d) and %s (%d) have different numbers of sequences.",
+ ref_filename.c_str(), ref.GetNumSequences(),
+ test_filename.c_str(), test.GetNumSequences());
+
+ std::vector<int> test_ordering = GetSequenceOrdering(ref, test, ref_filename, test_filename, use_protein);
+
+ std::set<std::vector<int> > reference_pairings;
+ std::set<std::vector<int> > test_pairings;
+ std::set<std::vector<int> > reference_matches;
+ std::set<std::vector<int> > test_matches;
+
+ for (int i = 0; i < ref.GetNumSequences(); i++)
+ {
+ AddPairings(reference_pairings, i, ref.GetSequences()[i], ref.GetMapping());
+ AddPairings(test_pairings, i, test.GetSequences()[test_ordering[i]], test.GetMapping());
+
+ for (int j = i+1; j < ref.GetNumSequences(); j++)
+ {
+ AddMatches(reference_matches, i, j, ref.GetSequences()[i], ref.GetSequences()[j], use_core_blocks, use_protein);
+ AddMatches(test_matches, i, j, test.GetSequences()[test_ordering[i]], test.GetSequences()[test_ordering[j]], false, use_protein);
+ }
+ }
+
+ std::set<std::vector<int> > correct_pairings = ComputeIntersection(reference_pairings, test_pairings);
+ std::set<std::vector<int> > correct_matches = ComputeIntersection(reference_matches, test_matches);
+
+ double Qscore = (reference_matches.size() == 0) ? 1.0 : double(correct_matches.size()) / reference_matches.size();
+ double fM = (test_matches.size() == 0) ? 1.0 : double(correct_matches.size()) / test_matches.size();
+
+ double sensitivity = (reference_pairings.size() == 0) ? 1.0 : double(correct_pairings.size()) / reference_pairings.size();
+ double ppv = (test_pairings.size() == 0) ? 1.0 : double(correct_pairings.size()) / test_pairings.size();
+
+ std::cout << "ref=" << ref_filename << "; test=" << test_filename << "; N=" << ref.GetNumSequences()
+ << "; ref_len=" << ref.GetLength() << "; ref_pid=" << ref.ComputePercentIdentity()
+ << "; test_len=" << test.GetLength() << "; test_pid=" << test.ComputePercentIdentity()
+ << "; Q=" << Qscore << "; fM=" << fM
+ << "; sens=" << sensitivity << "; ppv=" << ppv << std::endl;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// main()
+//
+// Main program.
+///////////////////////////////////////////////////////////////////////////
+
+int main(int argc, char **argv)
+{
+ if (argc < 4)
+ {
+ std::cerr << std::endl
+ << "Usage: " << argv[0] << " [protein|rna] REF TEST [--core]" << std::endl
+ << std::endl
+ << " where REF is the name of the reference file (in BPSEQ or FASTA format)" << std::endl
+ << " TEST is the name of the test file (in BPSEQ or FASTA format)" << std::endl
+ << std::endl;
+ exit(1);
+ }
+
+ std::vector<std::string> filenames;
+ bool use_protein = false;
+ bool use_core_blocks = false;
+
+ if (std::string(argv[1]) != "protein" &&
+ std::string(argv[1]) != "rna")
+ {
+ Error("First parameter must be either \"protein\" or \"rna\".");
+ }
+ else
+ {
+ use_protein = (std::string(argv[1]) == "protein");
+ }
+
+ for (int i = 2; i < argc; i++)
+ {
+ if (argv[i][0] == '-')
+ {
+ if (std::string(argv[i]) == "--core")
+ {
+ use_core_blocks = true;
+ }
+ else
+ {
+ Error("Unknown argument: %s", argv[i]);
+ }
+ }
+ else
+ {
+ filenames.push_back(argv[i]);
+ }
+ }
+
+ if (filenames.size() != 2) Error("Incorrect number of filenames specified.");
+
+ ComputeScores(filenames[0], filenames[1], use_core_blocks, use_protein);
+}
diff --git a/src/SparseMatrix.hpp b/src/SparseMatrix.hpp
new file mode 100644
index 0000000..9e914f4
--- /dev/null
+++ b/src/SparseMatrix.hpp
@@ -0,0 +1,135 @@
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef SPARSEMATRIX_HPP
+#define SPARSEMATRIX_HPP
+
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// struct SparseMatrixEntry
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+struct SparseMatrixEntry
+{
+ int column;
+ T value;
+
+ // constructors and assignment operator
+ SparseMatrixEntry();
+
+ // constructor
+ SparseMatrixEntry(int column, T value);
+
+ // copy constructor
+ SparseMatrixEntry(const SparseMatrixEntry &rhs);
+
+ // assignment operator
+ SparseMatrixEntry &operator=(const SparseMatrixEntry &rhs);
+};
+
+//////////////////////////////////////////////////////////////////////
+// class SparseMatrix
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+class SparseMatrix
+{
+ SparseMatrixEntry<T> *data;
+ SparseMatrixEntry<T> **row_ptrs;
+ int rows, cols, entries;
+ T zero;
+
+public:
+ enum ConversionType { TRANSPOSE };
+
+ // create an empty sparse matrix without allocating memory
+ SparseMatrix();
+
+ // create empty sparse matrix, using row_sizes[] to determine the number of entries allocated for each row
+ SparseMatrix(const std::vector<int> &row_sizes, int rows, int cols, T zero);
+
+ // create sparse matrix from a map of (row, column) pairs to values
+ SparseMatrix(const std::map<std::pair<int,int>, T> &source, int rows, int cols, T zero);
+
+ // create sparse matrix from dense rectangular array, using existing sparse matrix to determine sparsity pattern
+ SparseMatrix(const SparseMatrix<T> &mask, const T *source);
+
+ // create sparse matrix from dense rectangular array
+ SparseMatrix(const T *source, int rows, int cols, T zero);
+
+ // create sparse matrix from dense upper triangular array
+ SparseMatrix(const T *source, int size, T zero);
+
+ // copy constructor
+ SparseMatrix(const SparseMatrix &rhs);
+
+ // copy constructor, with transpose
+ SparseMatrix(const SparseMatrix &rhs, ConversionType);
+
+ // destructor
+ ~SparseMatrix();
+
+ // assignment operator
+ SparseMatrix &operator=(const SparseMatrix &rhs);
+
+ // accessors
+
+ // get number of rows
+ int GetNumRows() const;
+
+ // get number of columns
+ int GetNumCols() const;
+
+ // get number of non-zero entries
+ int GetNumEntries() const;
+
+ // get pointer to beginning of row
+ const SparseMatrixEntry<T> *GetRowBegin(int row) const;
+
+ // get pointer past end of row
+ const SparseMatrixEntry<T> *GetRowEnd(int row) const;
+
+ // get pointer to last element of row
+ const SparseMatrixEntry<T> *GetRowRBegin(int row) const;
+
+ // get pointer to element before beginning of row
+ const SparseMatrixEntry<T> *GetRowREnd(int row) const;
+
+ // get pointer to beginning of row
+ SparseMatrixEntry<T> *GetRowBegin(int row);
+
+ // get pointer past end of row
+ SparseMatrixEntry<T> *GetRowEnd(int row);
+
+ // get pointer to last element of row
+ SparseMatrixEntry<T> *GetRowRBegin(int row);
+
+ // get pointer to element before beginning of row
+ SparseMatrixEntry<T> *GetRowREnd(int row);
+
+ // retrieve an arbitrary element
+ T operator()(int row, int col) const;
+
+ // compute sum of all non-zero elements
+ T GetSum() const;
+
+ // return rows*cols length vector containing all entries
+ std::vector<T> GetUnsparse() const;
+
+ // return rows*cols length vector containing 1 for each non-zero entry
+ // and zero elsewhere
+ std::vector<T> GetUnsparseMask() const;
+
+ // print a sparse version of the matrix to a file
+ void PrintSparse(std::ostream &outfile) const;
+
+ // print a sparse version of the matrix, along with sequence letters, to a file
+ void PrintSparseBPSEQ(std::ostream &outfile, const std::string &s) const;
+};
+
+#include "SparseMatrix.ipp"
+
+#endif
diff --git a/src/SparseMatrix.ipp b/src/SparseMatrix.ipp
new file mode 100644
index 0000000..9601c03
--- /dev/null
+++ b/src/SparseMatrix.ipp
@@ -0,0 +1,627 @@
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix.cpp
+//////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrixEntry::SparseMatrixEntry()
+// SparseMatrixEntry::operator=
+//
+// Constructors and assignment operator.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+inline SparseMatrixEntry<T>::SparseMatrixEntry()
+{}
+
+template<class T>
+inline SparseMatrixEntry<T>::SparseMatrixEntry(int column, T value) :
+ column(column), value(value)
+{
+ Assert(column >= 0, "Column number must be nonnegative.");
+}
+
+template<class T>
+inline SparseMatrixEntry<T>::SparseMatrixEntry(const SparseMatrixEntry<T> &rhs) :
+ column(rhs.column), value(rhs.value)
+{
+ Assert(column >= 0, "Column number must be nonnegative.");
+}
+
+template<class T>
+inline SparseMatrixEntry<T> &SparseMatrixEntry<T>::operator=(const SparseMatrixEntry<T> &rhs)
+{
+ if (this != &rhs)
+ {
+ column = rhs.column;
+ value = rhs.value;
+ Assert(column >= 0, "Column number must be nonnegative.");
+ }
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Default constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix() :
+ data(NULL), row_ptrs(NULL), rows(0), cols(0), entries(0), zero(0)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from a vector of row sizes.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const std::vector<int> &row_sizes, int rows, int cols, T zero) :
+ rows(rows), cols(cols), entries(0), zero(zero)
+{
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+ Assert(rows == int(row_sizes.size()), "Mismatch in number of rows.");
+
+ // count number of entries
+
+ entries = std::accumulate(row_sizes.begin(), row_sizes.end(), 0);
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // build sparse matrix
+
+ std::fill(data, data + entries, SparseMatrixEntry<T>(0, zero));
+ row_ptrs[0] = data;
+
+ for (size_t i = 0; i < row_sizes.size(); i++)
+ row_ptrs[i+1] = row_ptrs[i] + row_sizes[i];
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from a map of (row, column)
+// pairs to values.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const std::map<std::pair<int,int>, T> &source, int rows, int cols, T zero) :
+ rows(rows), cols(cols), entries(0), zero(zero)
+{
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+
+ // count number of entries
+
+ entries = int(source.size());
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // build sparse matrix
+
+ SparseMatrixEntry<T> *dest_ptr = data;
+
+ int current_row = -1;
+ for (typename std::map<std::pair<int,int>, T>::const_iterator iter = source.begin(); iter != source.end(); ++iter)
+ {
+ Assert(0 <= iter->first.first && iter->first.first < rows, "Invalid row.");
+ Assert(0 <= iter->first.second && iter->first.second < cols, "Invalid column.");
+
+ // skip until current row
+
+ while (current_row < iter->first.first)
+ {
+ ++current_row;
+ row_ptrs[current_row] = dest_ptr;
+ }
+
+ // fill entry
+
+ dest_ptr->column = iter->first.second;
+ dest_ptr->value = iter->second;
+ ++dest_ptr;
+ }
+
+ // fill in remainder of row pointers
+
+ while (current_row < rows)
+ {
+ ++current_row;
+ row_ptrs[current_row] = dest_ptr;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Create sparse matrix from dense rectangular
+// array, using existing sparse matrix to determine sparsity
+// pattern.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const SparseMatrix<T> &mask, const T *source) :
+ rows(mask.rows), cols(mask.cols), entries(mask.entries), zero(mask.zero)
+{
+ // check if source matrix is empty
+
+ if (mask.data == NULL)
+ {
+ data = NULL;
+ row_ptrs = NULL;
+ return;
+ }
+
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+ Assert(entries >= 0, "Number of entries must be nonnegative.");
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // compute proper row offsets
+
+ std::copy(mask.data, mask.data + entries, data);
+ for (int i = 0; i <= rows; i++)
+ row_ptrs[i] = data + (mask.row_ptrs[i] - mask.data);
+
+ // overwrite data using source
+
+ for (int i = 0; i < rows; i++)
+ {
+ for (SparseMatrixEntry<T> *ptr = row_ptrs[i]; ptr != row_ptrs[i+1]; ++ptr)
+ ptr->value = source[i * cols + ptr->column];
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from a rectangular matrix.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const T *source, int rows, int cols, T zero) :
+ rows(rows), cols(cols), entries(0), zero(zero)
+{
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+
+ // count number of entries -- since this constructor uses
+ // a dense format, it performs an "equality to zero" test in
+ // order to decide which elements should be skipped
+
+ entries = rows*cols - std::count(source, source + rows*cols, zero);
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // build sparse matrix
+
+ const T *source_ptr = source;
+ SparseMatrixEntry<T> *dest_ptr = data;
+ for (int i = 0; i < rows; i++)
+ {
+ row_ptrs[i] = dest_ptr;
+ for (int j = 0; j < cols; j++)
+ {
+
+ // store only non-zero entries
+
+ if (*source_ptr != zero)
+ {
+ dest_ptr->column = j;
+ dest_ptr->value = *source_ptr;
+ ++dest_ptr;
+ }
+
+ ++source_ptr;
+ }
+ }
+
+ row_ptrs[rows] = data + entries;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from a triangular matrix.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const T *source, int size, T zero) :
+ rows(size), cols(size), entries(0), zero(zero)
+{
+
+ Assert(size > 0, "Size of triangular matrix must be positive.");
+
+ // count number of entries -- since this constructor uses
+ // a dense format, it performs an "equality to zero" test in
+ // order to decide which elements should be skipped
+
+ const T *source_ptr = source;
+ for (int i = 0; i < rows; i++)
+ {
+ for (int j = i; j < cols; j++)
+ {
+ if (*source_ptr != zero) entries++;
+ ++source_ptr;
+ }
+ }
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // build sparse matrix
+
+ source_ptr = source;
+ SparseMatrixEntry<T> *dest_ptr = data;
+ for (int i = 0; i < rows; i++)
+ {
+ row_ptrs[i] = dest_ptr;
+ for (int j = i; j < cols; j++)
+ {
+
+ // store only non-zero entries
+
+ if (*source_ptr != zero)
+ {
+ dest_ptr->column = j;
+ dest_ptr->value = *source_ptr;
+ ++dest_ptr;
+ }
+ ++source_ptr;
+ }
+ }
+
+ row_ptrs[rows] = data + entries;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from an existing sparse
+// matrix.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const SparseMatrix &rhs) :
+ rows(rhs.rows), cols(rhs.cols), entries(rhs.entries), zero(rhs.zero)
+{
+
+ // check if source matrix is empty
+
+ if (rhs.data == NULL)
+ {
+ data = NULL;
+ row_ptrs = NULL;
+ }
+ else
+ {
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+ Assert(entries >= 0, "Number of entries must be nonnegative.");
+
+ // allocate memory
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // compute proper row offsets
+
+ std::copy(rhs.data, rhs.data + entries, data);
+ for (int i = 0; i <= rows; i++)
+ row_ptrs[i] = data + (rhs.row_ptrs[i] - rhs.data);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build transpose of sparse matrix from an
+// existing sparse matrix.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::SparseMatrix(const SparseMatrix &rhs, ConversionType) :
+ rows(rhs.cols), cols(rhs.rows), entries(rhs.entries), zero(rhs.zero)
+{
+
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+ Assert(entries >= 0, "Number of entries must be nonnegative.");
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ // compute number of elements per row
+
+ int *row_size = new int[rows+1];
+ std::fill(row_size, row_size+rows+1, 0);
+ for (int i = 0; i < rhs.rows; i++)
+ {
+ for (SparseMatrixEntry<T> *ptr = rhs.row_ptrs[i]; ptr != rhs.row_ptrs[i+1]; ++ptr)
+ {
+ ++(row_size[ptr->column]);
+ }
+
+ }
+
+ // compute row pointers
+
+ row_ptrs[0] = data;
+ for (int i = 1; i <= rows; i++)
+ row_ptrs[i] = row_ptrs[i-1] + row_size[i-1];
+
+ delete [] row_size;
+
+ // build sparse matrix
+
+ for (int i = 0; i < rhs.rows; i++)
+ {
+ for (SparseMatrixEntry<T> *ptr = rhs.row_ptrs[i]; ptr != rhs.row_ptrs[i+1]; ++ptr)
+ {
+ row_ptrs[ptr->column]->column = i;
+ row_ptrs[ptr->column]->value = ptr->value;
+ ++(row_ptrs[ptr->column]);
+ }
+ }
+
+ // shift row pointers back
+
+ for (int i = rows; i >= 1; i--)
+ row_ptrs[i] = row_ptrs[i-1];
+ row_ptrs[0] = data;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::~SparseMatrix()
+//
+// Destructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T>::~SparseMatrix()
+{
+ delete [] data;
+ delete [] row_ptrs;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::SparseMatrix()
+//
+// Constructor. Build sparse matrix from an existing sparse
+// matrix.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+SparseMatrix<T> &SparseMatrix<T>::operator=(const SparseMatrix &rhs)
+{
+ if (this != &rhs)
+ {
+ rows = rhs.rows;
+ cols = rhs.cols;
+ entries = rhs.entries;
+ zero = rhs.zero;
+
+ if (rhs.data == NULL)
+ {
+ data = NULL;
+ row_ptrs = NULL;
+ }
+ else
+ {
+ Assert(rows > 0, "Number of rows must be positive.");
+ Assert(cols > 0, "Number of colums must be positive.");
+ Assert(entries >= 0, "Number of entries must be nonnegative.");
+
+ data = new SparseMatrixEntry<T>[entries];
+ row_ptrs = new SparseMatrixEntry<T>*[rows+1];
+
+ std::copy(rhs.data, rhs.data + entries, data);
+ for (int i = 0; i <= rows; i++)
+ row_ptrs[i] = data + (rhs.row_ptrs[i] - rhs.data);
+ }
+ }
+
+ return *this;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::GetNumRows()
+// SparseMatrix::GetNumCols()
+// SparseMatrix::GetNumEntries()
+// SparseMatrix::GetRowBegin()
+// SparseMatrix::GetRowEnd()
+// SparseMatrix::GetRowRBegin()
+// SparseMatrix::GetRowREnd()
+// SparseMatrix::operator()
+// SparseMatrix::GetSum()
+//
+// Accessors.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+inline int SparseMatrix<T>::GetNumRows() const { return rows; }
+
+template<class T>
+inline int SparseMatrix<T>::GetNumCols() const { return cols; }
+
+template<class T>
+inline int SparseMatrix<T>::GetNumEntries() const { return entries; }
+
+template<class T>
+inline SparseMatrixEntry<T> *SparseMatrix<T>::GetRowBegin(int row)
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row];
+}
+
+template<class T>
+inline SparseMatrixEntry<T> *SparseMatrix<T>::GetRowEnd(int row)
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row+1];
+}
+
+template<class T>
+inline SparseMatrixEntry<T> *SparseMatrix<T>::GetRowRBegin(int row)
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row+1]-1;
+}
+
+template<class T>
+inline SparseMatrixEntry<T> *SparseMatrix<T>::GetRowREnd(int row)
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row]-1;
+}
+
+template<class T>
+inline const SparseMatrixEntry<T> *SparseMatrix<T>::GetRowBegin(int row) const
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row];
+}
+
+template<class T>
+inline const SparseMatrixEntry<T> *SparseMatrix<T>::GetRowEnd(int row) const
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row+1];
+}
+
+template<class T>
+inline const SparseMatrixEntry<T> *SparseMatrix<T>::GetRowRBegin(int row) const
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row+1]-1;
+}
+
+template<class T>
+inline const SparseMatrixEntry<T> *SparseMatrix<T>::GetRowREnd(int row) const
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ return row_ptrs[row]-1;
+}
+
+template<class T>
+inline T SparseMatrix<T>::operator()(int row, int col) const
+{
+ Assert(row >= 0 && row < rows, "Invalid row.");
+ Assert(col >= 0 && col < cols, "Invalid row.");
+
+ // binary search for correct element
+
+ SparseMatrixEntry<T> *left = row_ptrs[row];
+ SparseMatrixEntry<T> *right = row_ptrs[row+1] - 1;
+ while (left <= right)
+ {
+ SparseMatrixEntry<T> *mid = left + (right - left) / 2;
+ if (mid->column > col)
+ right = mid-1;
+ else if (mid->column < col)
+ left = mid+1;
+ else
+ return mid->value;
+ }
+ return zero;
+}
+
+template<class T>
+inline T SparseMatrix<T>::GetSum() const
+{
+ T sum = zero;
+ for (int i = 0; i < entries; i++)
+ sum += data[i].value;
+ return sum;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::GetUnsparse()
+//
+// Return copy of matrix as a 2D array.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+std::vector<T> SparseMatrix<T>::GetUnsparse() const
+{
+ std::vector<T> ret(rows * cols);
+ for (int i = 0; i < rows; i++)
+ {
+ for (const SparseMatrixEntry<T> *p = row_ptrs[i]; p != row_ptrs[i+1]; ++p)
+ ret[i * cols + p->column] = p->value;
+ }
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::GetUnsparseMask()
+//
+// Return 2D array of present positions.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+std::vector<T> SparseMatrix<T>::GetUnsparseMask() const
+{
+ std::vector<T> ret(rows * cols);
+ for (int i = 0; i < rows; i++)
+ {
+ for (const SparseMatrixEntry<T> *p = row_ptrs[i]; p != row_ptrs[i+1]; ++p)
+ ret[i * cols + p->column] = T(1);
+ }
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::PrintSparse()
+//
+// Print in sparse matrix format.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+void SparseMatrix<T>::PrintSparse(std::ostream &outfile) const
+{
+ for (int i = 0; i < rows; i++)
+ {
+ if (row_ptrs[i] == row_ptrs[i+1]) continue;
+ outfile << i;
+ for (SparseMatrixEntry<T> *ptr = row_ptrs[i]; ptr != row_ptrs[i+1]; ++ptr)
+ outfile << ' ' << ptr->column << ':' << ptr->value;
+ outfile << std::endl;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////
+// SparseMatrix::PrintSparseBPSEQ()
+//
+// Print BPSEQ posteriors in sparse matrix format.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+void SparseMatrix<T>::PrintSparseBPSEQ(std::ostream &outfile, const std::string &s) const
+{
+ Assert(int(s.length()) == rows, "Sequence length does not match sparse matrix size.");
+ for (int i = 1; i < rows; i++)
+ {
+ outfile << i << ' ' << s[i];
+ for (SparseMatrixEntry<T> *ptr = row_ptrs[i]; ptr != row_ptrs[i+1]; ++ptr)
+ outfile << ' ' << ptr->column << ':' << ptr->value;
+ outfile << std::endl;
+ }
+}
diff --git a/src/SubgradientMethod.hpp b/src/SubgradientMethod.hpp
new file mode 100644
index 0000000..799b1ac
--- /dev/null
+++ b/src/SubgradientMethod.hpp
@@ -0,0 +1,50 @@
+//////////////////////////////////////////////////////////////////////
+// SubgradientMethod.hpp
+//
+// This file contains an implementation of the subgradient
+// optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#ifndef SUBGRADIENTMETHOD_HPP
+#define SUBGRADIENTMETHOD_HPP
+
+#include <vector>
+#include "Utilities.hpp"
+
+//////////////////////////////////////////////////////////////////////
+// SubgradientMethod()
+//
+// Implementation of subgradient optimization routine.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+class SubgradientMethod
+{
+ const int MAX_ITERATIONS;
+ const RealT PARAMETER_NORM_BOUND;
+ const RealT GRADIENT_NORM_BOUND;
+ const RealT CURVATURE;
+
+public:
+ SubgradientMethod
+ (
+ const int MAX_ITERATIONS = 1000, // maximum number of iterations to run subgradient method
+ const RealT PARAMETER_NORM_BOUND = RealT(1e-5), // maximum parameter vector norm
+ const RealT GRADIENT_NORM_BOUND = RealT(1e-5), // maximum gradient vector norm
+ const RealT CURVATURE = RealT(0) // strong convexity constant: f_t(v) >= f_t(w) + g'*(v - w) + 0.5 * CURVATURE * ||v - w||^2
+ );
+
+ virtual ~SubgradientMethod() {}
+
+ RealT Minimize(std::vector<RealT> &x0);
+
+ virtual RealT ComputeFunction(const std::vector<RealT> &x) = 0;
+ virtual void ComputeSubgradient(std::vector<RealT> &g, const std::vector<RealT> &x) = 0;
+ virtual void Report(int iteration, const std::vector<RealT> &x, RealT f, const std::vector<RealT> &g,
+ RealT norm_bound, RealT step_size) = 0;
+ virtual void Report(const std::string &s) = 0;
+};
+
+#include "SubgradientMethod.ipp"
+
+#endif
diff --git a/src/SubgradientMethod.ipp b/src/SubgradientMethod.ipp
new file mode 100644
index 0000000..f00505c
--- /dev/null
+++ b/src/SubgradientMethod.ipp
@@ -0,0 +1,226 @@
+//////////////////////////////////////////////////////////////////////
+// Subgradientmethod.ipp
+//
+// This file contains an implementation of an improved
+// Pegasos style subgradient optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+#define DOUBLY_ADAPTIVE 0
+#define PROXIMAL_ADAPTIVE 1
+#define BARTLETT_ADAPTIVE 0
+#define DYNAMIC_STEPSIZE 0
+
+const double LOWER_BOUND = 0.0;
+const double TOLERANCE = 1e-4;
+const double ACCEPTANCE_RATIO = 0.15;
+const double IMPROVEMENT_FRACTION = 0.75;
+const double MULTIPLIER = 0.95;
+const int MAX_INNER_STEPS = 75;
+const int MAX_NON_IMPROVEMENT_STEPS = 20;
+
+//////////////////////////////////////////////////////////////////////
+// SubgradientMethod::SubgradientMethod()
+//
+// Constructor.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+SubgradientMethod<RealT>::SubgradientMethod
+(
+ const int MAX_ITERATIONS, // maximum number of iterations to run subgradient method
+ const RealT PARAMETER_NORM_BOUND, // maximum parameter vector norm
+ const RealT GRADIENT_NORM_BOUND, // maximum gradient vector norm
+ const RealT CURVATURE // strong convexity constant: f_t(v) >= f_t(w) + g'*(v - w) + 0.5 * CURVATURE * ||v - w||^2
+) :
+ MAX_ITERATIONS(MAX_ITERATIONS),
+ PARAMETER_NORM_BOUND(PARAMETER_NORM_BOUND),
+ GRADIENT_NORM_BOUND(GRADIENT_NORM_BOUND),
+ CURVATURE(CURVATURE)
+{}
+
+//////////////////////////////////////////////////////////////////////
+// SubgradientMethod::Minimize()
+//
+// Implementation of an improved Pegasos style subgradient
+// optimization algorithm.
+//////////////////////////////////////////////////////////////////////
+
+template<class RealT>
+RealT SubgradientMethod<RealT>::Minimize(std::vector<RealT> &x)
+{
+ std::vector<RealT> g;
+ ComputeSubgradient(g, x);
+ RealT f = ComputeFunction(x);
+
+#if DOUBLY_ADAPTIVE || PROXIMAL_ADAPTIVE || BARTLETT_ADAPTIVE
+ RealT bound = 0;
+ RealT sigma_sum = 0;
+ RealT tau_sum = 0;
+#endif
+#if DOUBLY_ADAPTIVE
+ RealT gamma_sum = 0;
+#endif
+
+ // check early termination criteria
+
+ if (f >= RealT(1e20))
+ {
+ Report(SPrintF("Termination before optimization: function value too big (%lf > %lf)", f, 1e20));
+ return f;
+ }
+
+ // keep track of best parameter vector
+
+ RealT best_f = f;
+ std::vector<RealT> best_x = x;
+ std::vector<RealT> best_g = g;
+
+#if DYNAMIC_STEPSIZE
+ RealT target_value = std::max(LOWER_BOUND, f - DotProduct(g, g) / 2.0);
+ RealT outer_acceptance_interval = std::max((best_f - target_value) * ACCEPTANCE_RATIO, TOLERANCE);
+ RealT path_length = 0;
+ int inner_counter = 0;
+ int non_improvement_counter = 0;
+
+ RealT delta = TOLERANCE;
+ int failure_count = 0;
+#endif
+
+ // run optimization algorithm
+
+ for (int epoch = 1; epoch <= MAX_ITERATIONS; epoch++)
+ {
+ // compute learning rate
+
+#if DOUBLY_ADAPTIVE
+ RealT At = 0.5 * Pow(PARAMETER_NORM_BOUND + Norm(x), RealT(2));
+ RealT Bt = 0.5 * PARAMETER_NORM_BOUND * PARAMETER_NORM_BOUND + 0.5 * DotProduct(x, x);
+
+ sigma_sum += CURVATURE;
+ RealT sum = sigma_sum + tau_sum + gamma_sum;
+ RealT tau = (-sum + Sqrt(sum * sum + 4 * (1 + At/Bt) * DotProduct(g, g) / At)) / (2 * (1 + At/Bt));
+ RealT gamma = tau * At/Bt;
+ tau_sum += tau;
+ gamma_sum += gamma;
+
+ RealT eta = 1.0 / (sigma_sum + tau_sum + gamma_sum);
+ bound += tau * At + gamma * Bt + DotProduct(g, g) * eta;
+ g += gamma * x;
+#endif
+
+#if PROXIMAL_ADAPTIVE
+ sigma_sum += CURVATURE;
+ RealT tau = 0.5 * (-(sigma_sum + tau_sum) + Sqrt(Pow(sigma_sum + tau_sum, RealT(2)) + RealT(4) * DotProduct(g, g) / Pow(PARAMETER_NORM_BOUND + Norm(x), RealT(2))));
+ tau_sum += tau;
+ RealT eta = 1.0 / (sigma_sum + tau_sum);
+ bound += 0.5 * tau * Pow(PARAMETER_NORM_BOUND + Norm(x), RealT(2)) + 0.5 * DotProduct(g, g) / (sigma_sum + tau_sum);
+#endif
+
+#if BARTLETT_ADAPTIVE
+ sigma_sum += CURVATURE;
+ RealT tau = 0.5 * (-(sigma_sum + tau_sum) + Sqrt(Pow(sigma_sum + tau_sum, RealT(2)) + RealT(8) * DotProduct(g, g) / (3*PARAMETER_NORM_BOUND*PARAMETER_NORM_BOUND)));
+ tau_sum += tau;
+
+ RealT eta = 1.0 / (sigma_sum + tau_sum);
+ bound += 0.5 * tau * PARAMETER_NORM_BOUND * PARAMETER_NORM_BOUND + 0.5 * Pow(Norm(g) + tau*PARAMETER_NORM_BOUND, 2.0) / (sigma_sum + tau_sum);
+#endif
+
+#if DYNAMIC_STEPSIZE
+ RealT eta = MULTIPLIER * (f - target_value) / std::max(DotProduct(g, g), 1e-10);
+#endif
+ // take a step
+
+ x -= eta * g;
+
+ // project back to ball
+
+ RealT norm = Norm(x);
+ if (norm > PARAMETER_NORM_BOUND)
+ {
+ x *= PARAMETER_NORM_BOUND / norm;
+ }
+
+ // compute new subgradient and function
+
+ ComputeSubgradient(g, x);
+ f = ComputeFunction(x);
+
+#if DYNAMIC_STEPSIZE
+ ++inner_counter;
+
+ if (f < best_f)
+ {
+ non_improvement_counter = 0;
+ path_length += best_f - f;
+
+ // outer loop success
+
+ if (f <= target_value + outer_acceptance_interval)
+ {
+ target_value = std::max(LOWER_BOUND, f - outer_acceptance_interval - IMPROVEMENT_FRACTION * path_length);
+ outer_acceptance_interval = std::max((f - target_value) * ACCEPTANCE_RATIO, TOLERANCE);
+ path_length = 0;
+ Report(SPrintF("Outer loop success after %d inner iterations: new target value = %lf, best f = %lf", inner_counter, double(target_value), double(best_f)));
+ inner_counter = 0;
+ }
+
+ // outer loop failure
+
+ else if (inner_counter >= MAX_INNER_STEPS)
+ {
+ target_value = std::max(LOWER_BOUND, (f - outer_acceptance_interval + target_value) / 2);
+ outer_acceptance_interval = std::max((f - target_value) * ACCEPTANCE_RATIO, TOLERANCE);
+ path_length = 0;
+ Report(SPrintF("Outer loop failure after %d inner iterations: new target value = %lf, best f = %lf", inner_counter, double(target_value), double(best_f)));
+ inner_counter = 0;
+ }
+ }
+ else
+ {
+ ++non_improvement_counter;
+
+ // outer loop failure
+
+ if (inner_counter >= MAX_INNER_STEPS || non_improvement_counter >= MAX_NON_IMPROVEMENT_STEPS)
+ {
+ target_value = std::max(LOWER_BOUND, (best_f - outer_acceptance_interval + target_value) / 2);
+ outer_acceptance_interval = std::max((best_f - target_value) * ACCEPTANCE_RATIO, TOLERANCE);
+ path_length = 0;
+ Report(SPrintF("Outer loop failure after %d inner iterations: new target value = %lf, best f = %lf", inner_counter, double(target_value), double(best_f)));
+ inner_counter = 0;
+ non_improvement_counter = 0;
+ }
+ }
+#endif
+
+ // update best parameter values
+
+ if (Norm(x) != 0 && (f < best_f || Norm(best_x) == 0))
+ {
+ best_f = f;
+ best_g = g;
+ best_x = x;
+ }
+
+ // print updates
+
+ const int update_frequency = std::max(1, MAX_ITERATIONS / 100);
+ if (epoch % update_frequency == 0)
+ {
+ Report(epoch, best_x, best_f, best_g, PARAMETER_NORM_BOUND, eta);
+ }
+
+ // check convergence criteria
+
+ if (epoch >= MAX_ITERATIONS)
+ {
+ Report("Termination condition: maximum number of iterations reached");
+ break;
+ }
+ }
+
+ Report(SPrintF("Cumulative regret bound: %lf", double(bound)));
+
+ x = best_x;
+ return best_f;
+}
diff --git a/src/Utilities.cpp b/src/Utilities.cpp
new file mode 100644
index 0000000..6c69576
--- /dev/null
+++ b/src/Utilities.cpp
@@ -0,0 +1,420 @@
+//////////////////////////////////////////////////////////////////////
+// Utilities.cpp
+//////////////////////////////////////////////////////////////////////
+
+#include "Utilities.hpp"
+
+bool toggle_error = false;
+
+//////////////////////////////////////////////////////////////////////
+// _ASSERT_FAILED()
+//
+// Print error message for a failed assertion and terminate.
+//////////////////////////////////////////////////////////////////////
+
+int _ASSERT_FAILED(const char *fmt, ...)
+{
+ if (toggle_error) return 0;
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ abort();
+ return 0;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Error()
+//
+// Print error message for a user error and terminate.
+//////////////////////////////////////////////////////////////////////
+
+void Error(const char *fmt, ...)
+{
+ fprintf(stderr, "ERROR: ");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+ toggle_error = true;
+ exit(1);
+}
+
+//////////////////////////////////////////////////////////////////////
+// Warning()
+//
+// Print warning message for a user error without terminating.
+//////////////////////////////////////////////////////////////////////
+
+void Warning(const char *fmt, ...)
+{
+ fprintf(stderr, "WARNING: ");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+}
+
+//////////////////////////////////////////////////////////////////////
+// ConvertToNumber()
+//
+// Attempts to parse a number from the character string given.
+// Returns true only if no parsing error occurs.
+//////////////////////////////////////////////////////////////////////
+
+bool ConvertToNumber(const std::string &s, int &val)
+{
+ char *end_ptr;
+ long int temp;
+ errno = 0;
+ temp = strtol(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && (temp == LONG_MIN || temp == LONG_MAX)) return false;
+ if (temp == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ if (temp < INT_MIN || temp > INT_MAX) return false;
+ val = int(temp);
+ return true;
+}
+
+bool ConvertToNumber(const std::string &s, unsigned int &val)
+{
+ char *end_ptr;
+ unsigned long int temp;
+ errno = 0;
+ temp = strtol(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && temp == ULONG_MAX) return false;
+ if (temp == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ if (temp > UINT_MAX) return false;
+ val = int(temp);
+ return true;
+}
+
+bool ConvertToNumber(const std::string &s, long int &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtol(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && (val == LONG_MIN || val == LONG_MAX)) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+bool ConvertToNumber(const std::string &s, unsigned long int &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtoul(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && val == ULONG_MAX) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+#if defined(LLONG_MIN) && defined(LLONG_MAX)
+
+bool ConvertToNumber(const std::string &s, long long int &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtoll(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && (val == LLONG_MIN || val == LLONG_MAX)) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+#endif
+
+#if defined(ULLONG_MAX)
+
+bool ConvertToNumber(const std::string &s, unsigned long long int &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtoull(s.c_str(), &end_ptr, 10);
+ if (errno == ERANGE && val == ULLONG_MAX) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+
+#endif
+
+bool ConvertToNumber(const std::string &s, float &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtof(s.c_str(), &end_ptr);
+ if (errno == ERANGE && (val == HUGE_VALF || val == HUGE_VALF)) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+bool ConvertToNumber(const std::string &s, double &val)
+{
+ char *end_ptr;
+ errno = 0;
+ val = strtod(s.c_str(), &end_ptr);
+ if (errno == ERANGE && (val == HUGE_VAL || val == -HUGE_VAL)) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+bool ConvertToNumber(const std::string &s, long double &val)
+{
+ char *end_ptr;
+ errno = 0;
+#ifdef strtold
+ val = strtold(s.c_str(), &end_ptr);
+#else
+ val = double(strtod(s.c_str(), &end_ptr));
+#endif
+ if (errno == ERANGE && (val == HUGE_VALL || val == -HUGE_VALL)) return false;
+ if (val == 0 && errno != 0) return false;
+ if (s.c_str() == end_ptr) return false;
+ return true;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ConvertToUpperCase()
+//
+// Converts lowercase letters in a string to uppercase.
+//////////////////////////////////////////////////////////////////////
+
+std::string ConvertToUpperCase(const std::string &s)
+{
+ std::string t(s);
+ for (size_t i = 0; i < t.length(); i++)
+ t[i] = toupper(t[i]);
+ return t;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ConvertToLowerCase()
+//
+// Converts uppercase letters in a string to lowercase.
+//////////////////////////////////////////////////////////////////////
+
+std::string ConvertToLowerCase(const std::string &s)
+{
+ std::string t(s);
+ for (size_t i = 0; i < t.length(); i++)
+ t[i] = tolower(t[i]);
+ return t;
+}
+
+////////////////////////////////////////////////////////////
+// Trim()
+//
+// Remove whitespace from either end of a string.
+////////////////////////////////////////////////////////////
+
+std::string Trim(const std::string &s){
+ int left = 0, right = int(s.length());
+ while (left < right && std::isspace(s[left])) left++;
+ while (left < right && std::isspace(s[right-1])) right--;
+ return s.substr(left,right-left);
+}
+
+//////////////////////////////////////////////////////////////////////
+// RemoveGaps()
+//
+// Remove gap characters from a string.
+//////////////////////////////////////////////////////////////////////
+
+std::string RemoveGaps(const std::string &s)
+{
+ std::string ret;
+
+ for (size_t i = 0; i < s.length(); i++)
+ {
+ if (s[i] != '.' && s[i] != '-')
+ ret.push_back(s[i]);
+ }
+
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// SPrintF()
+//
+// Same as sprintf but returns a string.
+//////////////////////////////////////////////////////////////////////
+
+std::string SPrintF(const char *fmt, ...){
+ int buf_size = 1024;
+ char *buffer = new char[buf_size];
+ Assert(buffer, "Failed to allocate memory.");
+
+ while (true)
+ {
+ // print text to buffer
+
+ va_list ap;
+ va_start(ap, fmt);
+ int num_written = vsnprintf(buffer, buf_size, fmt, ap);
+ va_end(ap);
+
+ // double size of buffer if needed
+
+ if (num_written >= buf_size)
+ {
+ char *temp = new char[buf_size*2];
+ Assert(temp, "Failed to allocate memory.");
+ memcpy(temp, buffer, sizeof(char) * buf_size);
+ delete [] buffer;
+ buffer = temp;
+ buf_size *= 2;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ // return text
+
+ const std::string s(buffer);
+ delete [] buffer;
+ return s;
+}
+
+//////////////////////////////////////////////////////////////////////
+// WriteProgressMessage()
+//
+// Write progress message to console (stderr) and return to
+// beginning of line. Wipes out any previous message.
+//////////////////////////////////////////////////////////////////////
+
+void WriteProgressMessage(const std::string &message)
+{
+ static int old_length = 0;
+ std::cerr << '\r' << message;
+ for (int i = message.length(); i < old_length; i++) std::cerr << ' ';
+ std::cerr << '\r';
+ old_length = int(message.length());
+}
+
+//////////////////////////////////////////////////////////////////////
+// GetSystemTime()
+//
+// Retrieve system time in seconds past the Epoch.
+//////////////////////////////////////////////////////////////////////
+
+double GetSystemTime()
+{
+ timeval t;
+ if (gettimeofday(&t, NULL) != 0) Error("Failed to obtain system time.");
+ return t.tv_sec + 1e-6 * t.tv_usec;
+}
+
+//////////////////////////////////////////////////////////////////////
+// MakeDirectory()
+//
+// Make a directory if one doesn't exist.
+//////////////////////////////////////////////////////////////////////
+
+void MakeDirectory(const std::string &directory)
+{
+ if (directory != "" && system(("mkdir -p " + directory).c_str()))
+ Error(SPrintF("Unable to create directory \"%s\"", directory.c_str()).c_str());
+}
+
+//////////////////////////////////////////////////////////////////////
+// MakeTempDirectory()
+//
+// Make a temporary directory that will automatically be deleted
+// once the program is complete.
+//////////////////////////////////////////////////////////////////////
+
+std::string MakeTempDirectory()
+{
+ char *temp_dir_name = new char[10000];
+ Assert(temp_dir_name, "Failed to allocate memory.");
+ strcpy(temp_dir_name, "temp_XXXXXX");
+ char *ret = mkdtemp(temp_dir_name);
+
+ // check for error
+
+ if (ret == NULL)
+ {
+ delete [] temp_dir_name;
+ Error("Unable to create temp directory!");
+ return "";
+ }
+
+ // return name of temporary directory
+
+ std::string res(temp_dir_name);
+ delete [] temp_dir_name;
+ return res;
+}
+
+//////////////////////////////////////////////////////////////////////
+// GetSequencePositions()
+//
+// Return an array whose ith element is the index of the ith
+// letter in the input string. The 0th element is always zero,
+// since there is no 0-th character in the input string.
+//////////////////////////////////////////////////////////////////////
+
+std::vector<int> GetSequencePositions(const std::string &s)
+{
+ std::vector<int> ret(1);
+ for (size_t i = 0; i < s.length(); i++)
+ if (isalpha(s[i])) ret.push_back(i);
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// GetSequenceMapping()
+//
+// Return an array mapping from positions in a gapped sequence to
+// indices of letteres in an ungapped sequence. Positions in
+// the array corresponding to gaps are given a mapping of 0.
+//////////////////////////////////////////////////////////////////////
+
+std::vector<int> GetSequenceMapping(const std::string &s)
+{
+ std::vector<int> ret(1);
+ int ct = 0;
+ for (size_t i = 1; i < s.length(); i++)
+ {
+ if (isalpha(s[i]))
+ {
+ ++ct;
+ ret.push_back(ct);
+ }
+ else
+ ret.push_back(0);
+ }
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// GetDirName()
+// GetBaseName()
+//
+// Retrieve directory and base file name for a given full path.
+//////////////////////////////////////////////////////////////////////
+
+std::string GetDirName(const std::string &filename)
+{
+ const std::string::size_type separator_pos = filename.find_last_of(DIR_SEPARATOR_CHAR);
+ std::string dir_name = ((separator_pos == std::string::npos) ? std::string("") : filename.substr(0, separator_pos));
+ while (dir_name.length() > 0 && dir_name[dir_name.length() - 1] == DIR_SEPARATOR_CHAR) dir_name = dir_name.substr(0, dir_name.length() - 1);
+ return dir_name;
+}
+
+std::string GetBaseName(const std::string &filename)
+{
+ const std::string::size_type separator_pos = filename.find_last_of(DIR_SEPARATOR_CHAR);
+ return ((separator_pos == std::string::npos) ? filename : filename.substr(separator_pos + 1));
+}
diff --git a/src/Utilities.hpp b/src/Utilities.hpp
new file mode 100644
index 0000000..7e05001
--- /dev/null
+++ b/src/Utilities.hpp
@@ -0,0 +1,216 @@
+//////////////////////////////////////////////////////////////////////
+// Utilities.hpp
+//////////////////////////////////////////////////////////////////////
+
+#ifndef UTILITIES_HPP
+#define UTILITIES_HPP
+
+#include <algorithm>
+#include <cerrno>
+#include <cmath>
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <map>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <stack>
+#include <string>
+#include <sys/time.h>
+#include <vector>
+
+typedef unsigned char BYTE;
+const char DIR_SEPARATOR_CHAR = '/';
+
+// necessary macros
+#define _level_2_str(s) _level_1_str(s)
+#define _level_1_str(s) #s
+#define __LINESTR__ _level_2_str(__LINE__)
+#define __FL__ "Assertion failed in file \"" __FILE__ "\", line " __LINESTR__
+int _ASSERT_FAILED(const char *fmt, ...);
+
+// print error message for a user error and terminate
+void Error(const char *fmt, ...);
+
+// print error message for a user error but do not terminate
+void Warning(const char *fmt, ...);
+
+// print error message for a failed assertion and terminate
+#ifdef NDEBUG
+#define Assert(test,fmt,...)
+#else
+#define Assert(test,fmt,...) (test ? 0 : _ASSERT_FAILED(__FL__ ": " fmt "\n", ## __VA_ARGS__))
+#endif
+
+// attempt to parse a number from the character string given; return
+// true only if no parsing error occurs.
+bool ConvertToNumber(const std::string &s, int &val);
+bool ConvertToNumber(const std::string &s, unsigned int &val);
+bool ConvertToNumber(const std::string &s, long int &val);
+bool ConvertToNumber(const std::string &s, unsigned long int &val);
+#if defined(LLONG_MIN) && defined(LLONG_MAX)
+bool ConvertToNumber(const std::string &s, long long int &val);
+#endif
+#if defined(ULLONG_MAX)
+bool ConvertToNumber(const std::string &s, unsigned long long int &val);
+#endif
+bool ConvertToNumber(const std::string &s, float &val);
+bool ConvertToNumber(const std::string &s, double &val);
+bool ConvertToNumber(const std::string &s, long double &val);
+
+// convert lowercase/uppercase letters in a string to uppercase/lowercase
+std::string ConvertToUpperCase(const std::string &s);
+std::string ConvertToLowerCase(const std::string &s);
+
+// remove whitespace from either end of a string
+std::string Trim(const std::string &s);
+
+// remove gap characters from a string
+std::string RemoveGaps(const std::string &s);
+
+// same as sprintf but returns a string
+std::string SPrintF(const char *fmt, ...);
+
+// write progress message to console (stderr) and return to
+// beginning of line; wipes out any previous message on current line
+void WriteProgressMessage(const std::string &message);
+
+// retrieve system time in seconds past the Epoch
+double GetSystemTime();
+
+// make a directory if one doesn't exist
+void MakeDirectory(const std::string &directory);
+
+// make temporary directory
+std::string MakeTempDirectory();
+
+// return an array whose ith element is the index of the ith
+// letter in the input string.
+std::vector<int> GetSequencePositions(const std::string &s);
+
+// return an array from positions in a gapped sequence to
+// positions in the ungapped sequence
+std::vector<int> GetSequenceMapping(const std::string &s);
+
+// indicator function
+inline int Ind(bool condition){ return condition ? 1 : 0; }
+
+// struct triple
+template<typename T1, typename T2, typename T3>
+struct triple {
+ T1 first;
+ T2 second;
+ T3 third;
+
+ // constructors
+ triple();
+ triple(const T1 &first, const T2 &second, const T3 &third);
+ triple(const triple &rhs);
+};
+
+// comparators
+template<typename T1, typename T2, typename T3> inline bool operator==(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+template<typename T1, typename T2, typename T3> inline bool operator<(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+template<typename T1, typename T2, typename T3> inline bool operator!=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+template<typename T1, typename T2, typename T3> inline bool operator>(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+template<typename T1, typename T2, typename T3> inline bool operator<=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+template<typename T1, typename T2, typename T3> inline bool operator>=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y);
+
+// utility function for making triples
+template<typename T1, typename T2, typename T3> inline triple<T1,T2,T3> make_triple(T1 first, T2 second, T3 third);
+
+// printing pairs and triples
+template<typename T1, typename T2> std::ostream &operator<<(std::ostream &out, const std::pair<T1,T2> &x);
+template<typename T1, typename T2, typename T3> std::ostream &operator<<(std::ostream &out, const triple<T1,T2,T3> &x);
+
+// math operators
+template<typename T> T Sqrt(const T x);
+template<typename T> T Exp(const T x);
+template<typename T> T Log(const T x);
+template<typename T> T Pow(const T x, const T p);
+template<typename T> T Tanh(const T x);
+template<typename T> T Floor(const T x);
+template<typename T> T Ceil(const T x);
+template<typename T> T Abs(const T x);
+template<typename T> T Sign(const T x);
+template<typename T> T Clip(const T x, const T lower, const T upper);
+
+// standard linear algebra
+template<typename T> T DotProduct(const std::vector<T> &x, const std::vector<T> &y);
+template<typename T> T Norm(const std::vector<T> &x);
+template<typename T> std::vector<T> Sqrt(const std::vector<T> &x);
+template<typename T> std::vector<T> Exp(const std::vector<T> &x);
+template<typename T> std::vector<T> Log(const std::vector<T> &x);
+template<typename T> std::vector<T> Pow(const std::vector<T> &x, const T p);
+template<typename T> std::vector<T> Tanh(const std::vector<T> &x);
+template<typename T> std::vector<T> Abs(const std::vector<T> &x);
+template<typename T> std::vector<T> Sign(const std::vector<T> &x);
+template<typename T, typename P> std::vector<T> Test(const std::vector<T> &x, P pred);
+template<typename T> T Sum(const std::vector<T> &x);
+template<typename T> T Prod(const std::vector<T> &x);
+template<typename T> const std::vector<T> Min(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> Max(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> Clip(const std::vector<T> &x, const T &lower, const T &upper);
+template<typename T> const std::vector<T> Min(const T &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> Max(const T &x, const std::vector<T> &y);
+template<typename T> T Min(const std::vector<T> &x);
+template<typename T> T Max(const std::vector<T> &x);
+template<typename T> int ArgMin(const std::vector<T> &x);
+template<typename T> int ArgMax(const std::vector<T> &x);
+template<typename T> const std::vector<T> operator-(const std::vector<T> &x);
+template<typename T> const std::vector<T> operator*(const std::vector<T> &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator/(const std::vector<T> &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator+(const std::vector<T> &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator-(const std::vector<T> &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator*(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> operator/(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> operator+(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> operator-(const std::vector<T> &x, const T &y);
+template<typename T> const std::vector<T> operator*(const T &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator/(const T &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator+(const T &x, const std::vector<T> &y);
+template<typename T> const std::vector<T> operator-(const T &x, const std::vector<T> &y);
+template<typename T> std::vector<T> &operator*=(std::vector<T> &x, const std::vector<T> &y);
+template<typename T> std::vector<T> &operator/=(std::vector<T> &x, const std::vector<T> &y);
+template<typename T> std::vector<T> &operator+=(std::vector<T> &x, const std::vector<T> &y);
+template<typename T> std::vector<T> &operator-=(std::vector<T> &x, const std::vector<T> &y);
+template<typename T> std::vector<T> &operator*=(std::vector<T> &x, const T &y);
+template<typename T> std::vector<T> &operator/=(std::vector<T> &x, const T &y);
+template<typename T> std::vector<T> &operator+=(std::vector<T> &x, const T &y);
+template<typename T> std::vector<T> &operator-=(std::vector<T> &x, const T &y);
+template<typename T> std::ostream &operator<<(std::ostream &out, const std::vector<T> &x);
+template<typename T, typename U> std::vector<T> ConvertVector(const std::vector<U> &x);
+template<typename T> std::vector<T> Concatenate(const std::vector<T> &u, const std::vector<T> &v);
+template<typename T> std::vector<T> Transpose(const std::vector<T> &m, const int rows, const int cols);
+
+// expand matrix by adding blank rows and columns
+template<class T>
+std::vector<T> ExpandMatrix(const std::vector<T> &mat,
+ const int new_rows,
+ const int new_cols,
+ const std::vector<int> &positions_rows,
+ const std::vector<int> &positions_cols);
+
+// expand vector by adding blank entries
+template<class T>
+std::vector<T> ExpandVector(const std::vector<T> &v,
+ const int new_length,
+ const std::vector<int> &positions);
+
+// check if two nucleotides are complementary (AU, CG, GU)
+inline bool IsComplementary(char c, char d);
+
+// retrieve directory and basename for a given full path
+std::string GetDirName(const std::string &path);
+std::string GetBaseName(const std::string &path);
+
+
+#include "Utilities.ipp"
+
+#endif
diff --git a/src/Utilities.ipp b/src/Utilities.ipp
new file mode 100644
index 0000000..5583892
--- /dev/null
+++ b/src/Utilities.ipp
@@ -0,0 +1,599 @@
+//////////////////////////////////////////////////////////////////////
+// Utilities.ipp
+//////////////////////////////////////////////////////////////////////
+
+template<typename T1, typename T2, typename T3>
+inline triple<T1,T2,T3>::triple() :
+ first(), second(), third()
+{}
+
+template<typename T1, typename T2, typename T3>
+inline triple<T1,T2,T3>::triple(const T1 &first, const T2 &second, const T3 &third) :
+ first(first), second(second), third(third)
+{}
+
+template<typename T1, typename T2, typename T3>
+inline triple<T1,T2,T3>::triple(const triple<T1,T2,T3> &rhs) :
+ first(rhs.first), second(rhs.second), third(rhs.third)
+{}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator==(const triple<T1,T2,T3> &x,
+ const triple<T1,T2,T3> &y)
+{
+ return
+ x.first == y.first &&
+ x.second == y.second &&
+ x.third == y.third;
+}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator<(const triple<T1,T2,T3> &x,
+ const triple<T1,T2,T3> &y)
+{
+ return
+ x.first < y.first ||
+ !(y.first < x.first) &&
+ (x.second < y.second ||
+ !(y.second < x.second) &&
+ x.third < y.third);
+}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator!=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y)
+{
+ return !(x == y);
+}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator>(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y)
+{
+ return y < x;
+}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator<=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y)
+{
+ return !(y < x);
+}
+
+template<typename T1, typename T2, typename T3>
+inline bool operator>=(const triple<T1,T2,T3> &x, const triple<T1,T2,T3> &y)
+{
+ return !(x < y);
+}
+
+template<typename T1, typename T2, typename T3>
+inline triple<T1,T2,T3> make_triple(T1 first, T2 second, T3 third)
+{
+ return triple<T1,T2,T3>(first, second, third);
+}
+
+template<typename T1, typename T2>
+std::ostream &operator<<(std::ostream &out, const std::pair<T1,T2> &x)
+{
+ out << '(' << x.first << ',' << x.second << ')';
+ return out;
+}
+
+template<typename T1, typename T2, typename T3>
+std::ostream &operator<<(std::ostream &out, const triple<T1,T2,T3> &x)
+{
+ out << '(' << x.first << ',' << x.second << ',' << x.third << ')';
+ return out;
+}
+
+#ifndef sqrtl
+#define sqrtl(x) (static_cast<long double>(sqrt(double(x))))
+#endif
+#ifndef expl
+#define expl(x) (static_cast<long double>(exp(double(x))))
+#endif
+#ifndef logl
+#define logl(x) (static_cast<long double>(log(double(x))))
+#endif
+#ifndef powl
+#define powl(x,y) (static_cast<long double>(pow(double(x),double(y))))
+#endif
+#ifndef tanhl
+#define tanhl(x) (static_cast<long double>(tanh(double(x))))
+#endif
+#ifndef floorl
+#define floorl(x) (static_cast<long double>(floor(double(x))))
+#endif
+#ifndef ceill
+#define ceill(x) (static_cast<long double>(ceil(double(x))))
+#endif
+
+template<> inline float Sqrt(const float x) { return sqrtf(x); }
+template<> inline double Sqrt(const double x) { return sqrt(x); }
+template<> inline long double Sqrt(const long double x) { return sqrtl(x); }
+
+template<> inline float Exp(const float x) { return expf(x); }
+template<> inline double Exp(const double x) { return exp(x); }
+template<> inline long double Exp(const long double x) { return expl(x); }
+
+template<> inline float Log(const float x) { return logf(x); }
+template<> inline double Log(const double x) { return log(x); }
+template<> inline long double Log(const long double x) { return logl(x); }
+
+template<> inline float Pow(const float x, const float p) { return powf(x,p); }
+template<> inline double Pow(const double x, const double p) { return pow(x,p); }
+template<> inline long double Pow(const long double x, const long double p) { return powl(x,p); }
+
+template<> inline float Tanh(const float x) { return tanhf(x); }
+template<> inline double Tanh(const double x) { return tanh(x); }
+template<> inline long double Tanh(const long double x) { return tanhl(x); }
+
+template<> inline float Floor(const float x) { return floorf(x); }
+template<> inline double Floor(const double x) { return floor(x); }
+template<> inline long double Floor(const long double x) { return floorl(x); }
+
+template<> inline float Ceil(const float x) { return ceilf(x); }
+template<> inline double Ceil(const double x) { return ceil(x); }
+template<> inline long double Ceil(const long double x) { return ceill(x); }
+
+template<typename T> inline T Clip(const T x, const T lower, const T upper)
+{
+ return std::min(std::max(x, lower), upper);
+}
+
+template<typename T> T DotProduct(const std::vector<T> &x, const std::vector<T> &y)
+{
+ T ret = 0;
+ for (std::size_t i = 0; i < x.size(); i++) ret += x[i] * y[i];
+ return ret;
+}
+
+template<typename T>
+T Abs(const T x)
+{
+ return x < 0 ? -x : x;
+}
+
+template<typename T>
+T Sign(const T x)
+{
+ return x < 0 ? -1 : 0 < x ? 1 : 0;
+}
+
+template<typename T>
+T Norm(const std::vector<T> &x)
+{
+ return Sqrt(DotProduct(x,x));
+}
+
+template<typename T>
+std::vector<T> Sqrt(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Sqrt(ret[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Exp(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Exp(ret[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Log(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Log(ret[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Pow(const std::vector<T> &x, const T p)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Pow(ret[i],p);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Tanh(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Tanh(ret[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Abs(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Abs(ret[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Sign(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Sign(ret[i]);
+ return ret;
+}
+
+template<typename T, typename P>
+std::vector<T> Test(const std::vector<T> &x, P pred)
+{
+ std::vector<T> ret(x.size());
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = (pred(x[i]) ? T(1) : T(0));
+ return ret;
+}
+
+template<typename T>
+T Sum(const std::vector<T> &x)
+{
+ T ret = 0;
+ for (std::size_t i = 0; i < x.size(); i++) ret += x[i];
+ return ret;
+}
+
+template<typename T>
+T Prod(const std::vector<T> &x)
+{
+ T ret = 1;
+ for (std::size_t i = 0; i < x.size(); i++) ret *= x[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> Min(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = std::min(ret[i], y);
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> Max(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = std::max(ret[i], y);
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> Clip(const std::vector<T> &x, const T &lower, const T &upper)
+{
+ Assert(lower <= upper, "Invalid clipping range.");
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = Clip(x[i], lower, upper);
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> Min(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = std::min(x, ret[i]);
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> Max(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = std::max(x, ret[i]);
+ return ret;
+}
+
+template<typename T>
+T Min(const std::vector<T> &x)
+{
+ T ret = x[0];
+ for (std::size_t i = 1; i < x.size(); i++) if (x[i] < ret) ret = x[i];
+ return ret;
+}
+
+template<typename T>
+T Max(const std::vector<T> &x)
+{
+ T ret = x[0];
+ for (std::size_t i = 1; i < x.size(); i++) if (ret < x[i]) ret = x[i];
+ return ret;
+}
+
+template<typename T>
+int ArgMin(const std::vector<T> &x)
+{
+ int ret = 0;
+ for (std::size_t i = 1; i < x.size(); i++) if (x[i] < x[ret]) ret = int(i);
+ return ret;
+}
+
+template<typename T>
+int ArgMax(const std::vector<T> &x)
+{
+ int ret = 0;
+ for (std::size_t i = 1; i < x.size(); i++) if (x[ret] < x[i]) ret = int(i);
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator-(const std::vector<T> &x)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = -ret[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator*(const std::vector<T> &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] *= y[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator/(const std::vector<T> &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] /= y[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator+(const std::vector<T> &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] += y[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator-(const std::vector<T> &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] -= y[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator*(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] *= y;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator/(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] /= y;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator+(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] += y;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator-(const std::vector<T> &x, const T &y)
+{
+ std::vector<T> ret(x);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] -= y;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator*(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] *= x;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator/(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = x / ret[i];
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator+(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] += x;
+ return ret;
+}
+
+template<typename T>
+const std::vector<T> operator-(const T &x, const std::vector<T> &y)
+{
+ std::vector<T> ret(y);
+ for (std::size_t i = 0; i < ret.size(); i++) ret[i] = x - ret[i];
+ return ret;
+}
+
+template<typename T>
+std::vector<T> &operator*=(std::vector<T> &x, const std::vector<T> &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] *= y[i];
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator/=(std::vector<T> &x, const std::vector<T> &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] /= y[i];
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator+=(std::vector<T> &x, const std::vector<T> &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] += y[i];
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator-=(std::vector<T> &x, const std::vector<T> &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] -= y[i];
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator*=(std::vector<T> &x, const T &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] *= y;
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator/=(std::vector<T> &x, const T &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] /= y;
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator+=(std::vector<T> &x, const T &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] += y;
+ return x;
+}
+
+template<typename T>
+std::vector<T> &operator-=(std::vector<T> &x, const T &y)
+{
+ for (std::size_t i = 0; i < x.size(); i++) x[i] -= y;
+ return x;
+}
+
+template<typename T>
+std::ostream &operator<<(std::ostream &out, const std::vector<T> &x)
+{
+ out << '[';
+ for (std::size_t i = 0; i < x.size(); i++)
+ out << (i > 0 ? " " : "") << x[i];
+ out << ']';
+ return out;
+}
+
+template<typename T, typename U>
+std::vector<T> ConvertVector(const std::vector<U> &x)
+{
+ std::vector<T> ret;
+ ret.reserve(x.size());
+ for (size_t i = 0; i < x.size(); i++)
+ ret.push_back(x[i]);
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Concatenate(const std::vector<T> &u, const std::vector<T> &v)
+{
+ std::vector<T> ret = u;
+ ret.insert(ret.end(), v.begin(), v.end());
+ return ret;
+}
+
+template<typename T>
+std::vector<T> Transpose(const std::vector<T> &m, const int rows, const int cols)
+{
+ Assert(rows * cols == int(m.size()), "Dimension mismatch.");
+
+ std::vector<T> ret(m.size());
+ for (int i = 0; i < rows; i++)
+ for (int j = 0; j < cols; j++)
+ ret[j*rows+i] = m[i*cols+j];
+
+ return ret;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ExpandMatrix()
+//
+// Expand matrix by adding blank rows and columns.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+std::vector<T> ExpandMatrix(const std::vector<T> &mat,
+ const int new_rows,
+ const int new_cols,
+ const std::vector<int> &positions_rows,
+ const std::vector<int> &positions_cols)
+{
+ Assert(new_rows >= 0, "Invalid length.");
+ Assert(new_cols >= 0, "Invalid length.");
+ Assert(positions_rows.size() > 0, "positions_rows should contain at least one element.");
+ Assert(positions_cols.size() > 0, "positions_cols should contain at least one element.");
+ Assert(positions_rows.size() * positions_cols.size() == mat.size(), "Dimension mismatch.");
+
+ const int cols = int(positions_cols.size());
+ std::vector<T> res(new_rows * new_cols);
+
+ for (size_t i = 0; i < positions_rows.size(); i++)
+ {
+ for (size_t j = 0; j < positions_cols.size(); j++)
+ {
+ Assert(0 <= positions_rows[i] && positions_rows[i] < new_rows, "Index out-of-range.");
+ Assert(0 <= positions_cols[j] && positions_cols[j] < new_cols, "Index out-of-range.");
+ res[positions_rows[i] * new_cols + positions_cols[j]] = mat[i * cols + j];
+ }
+ }
+
+ return res;
+}
+
+//////////////////////////////////////////////////////////////////////
+// ExpandVector()
+//
+// Expand vector by adding blank entries.
+//////////////////////////////////////////////////////////////////////
+
+template<class T>
+std::vector<T> ExpandVector(const std::vector<T> &v,
+ const int new_length,
+ const std::vector<int> &positions)
+{
+ Assert(new_length > 0, "Invalid length.");
+ Assert(positions.size() > 0, "positions should contain at least one element.");
+ Assert(positions.size() == v.size(), "Dimension mismatch.");
+
+ std::vector<T> res(new_length);
+
+ for (size_t i = 0; i < positions.size(); i++)
+ {
+ Assert(0 <= positions[i] && positions[i] < new_length, "Index out-of-range.");
+ res[positions[i]] = v[i];
+ }
+
+ return res;
+}
+
+//////////////////////////////////////////////////////////////////////
+// IsComplementary()
+//
+// Check if a base-pairing is one of AU, CG, or GU.
+//////////////////////////////////////////////////////////////////////
+
+inline bool IsComplementary(char c, char d)
+{
+ if ('a' <= c && c <= 'z') c += 'A' - 'a';
+ if ('a' <= d && d <= 'z') d += 'A' - 'a';
+
+ return
+ (c == 'A' && d == 'U') ||
+ (c == 'U' && d == 'A') ||
+ (c == 'C' && d == 'G') ||
+ (c == 'G' && d == 'C') ||
+ (c == 'G' && d == 'U') ||
+ (c == 'U' && d == 'G');
+}
diff --git a/src/contrafold.params.complementary b/src/contrafold.params.complementary
new file mode 100644
index 0000000..90283e9
--- /dev/null
+++ b/src/contrafold.params.complementary
@@ -0,0 +1,708 @@
+base_pair_AA 0
+base_pair_AC 0
+base_pair_AG 0
+base_pair_AU 0.59791199
+base_pair_CC 0
+base_pair_CG 1.544290641
+base_pair_CU 0
+base_pair_GG 0
+base_pair_GU -0.01304754992
+base_pair_UU 0
+terminal_mismatch_AAAA 0
+terminal_mismatch_AAAC 0
+terminal_mismatch_AAAG 0
+terminal_mismatch_AAAU 0
+terminal_mismatch_AACA 0
+terminal_mismatch_AACC 0
+terminal_mismatch_AACG 0
+terminal_mismatch_AACU 0
+terminal_mismatch_AAGA 0
+terminal_mismatch_AAGC 0
+terminal_mismatch_AAGG 0
+terminal_mismatch_AAGU 0
+terminal_mismatch_AAUA 0
+terminal_mismatch_AAUC 0
+terminal_mismatch_AAUG 0
+terminal_mismatch_AAUU 0
+terminal_mismatch_ACAA 0
+terminal_mismatch_ACAC 0
+terminal_mismatch_ACAG 0
+terminal_mismatch_ACAU 0
+terminal_mismatch_ACCA 0
+terminal_mismatch_ACCC 0
+terminal_mismatch_ACCG 0
+terminal_mismatch_ACCU 0
+terminal_mismatch_ACGA 0
+terminal_mismatch_ACGC 0
+terminal_mismatch_ACGG 0
+terminal_mismatch_ACGU 0
+terminal_mismatch_ACUA 0
+terminal_mismatch_ACUC 0
+terminal_mismatch_ACUG 0
+terminal_mismatch_ACUU 0
+terminal_mismatch_AGAA 0
+terminal_mismatch_AGAC 0
+terminal_mismatch_AGAG 0
+terminal_mismatch_AGAU 0
+terminal_mismatch_AGCA 0
+terminal_mismatch_AGCC 0
+terminal_mismatch_AGCG 0
+terminal_mismatch_AGCU 0
+terminal_mismatch_AGGA 0
+terminal_mismatch_AGGC 0
+terminal_mismatch_AGGG 0
+terminal_mismatch_AGGU 0
+terminal_mismatch_AGUA 0
+terminal_mismatch_AGUC 0
+terminal_mismatch_AGUG 0
+terminal_mismatch_AGUU 0
+terminal_mismatch_AUAA -0.184546064
+terminal_mismatch_AUAC -0.1181844187
+terminal_mismatch_AUAG -0.4461469607
+terminal_mismatch_AUAU -0.6175254495
+terminal_mismatch_AUCA 0.004788458708
+terminal_mismatch_AUCC 0.08319395146
+terminal_mismatch_AUCG -0.2249479995
+terminal_mismatch_AUCU -0.3981327204
+terminal_mismatch_AUGA 0.5191110288
+terminal_mismatch_AUGC -0.3524119307
+terminal_mismatch_AUGG -0.4056429433
+terminal_mismatch_AUGU -0.7733932162
+terminal_mismatch_AUUA -0.01574403519
+terminal_mismatch_AUUC 0.268570042
+terminal_mismatch_AUUG -0.0934388741
+terminal_mismatch_AUUU 0.3373711531
+terminal_mismatch_CAAA 0
+terminal_mismatch_CAAC 0
+terminal_mismatch_CAAG 0
+terminal_mismatch_CAAU 0
+terminal_mismatch_CACA 0
+terminal_mismatch_CACC 0
+terminal_mismatch_CACG 0
+terminal_mismatch_CACU 0
+terminal_mismatch_CAGA 0
+terminal_mismatch_CAGC 0
+terminal_mismatch_CAGG 0
+terminal_mismatch_CAGU 0
+terminal_mismatch_CAUA 0
+terminal_mismatch_CAUC 0
+terminal_mismatch_CAUG 0
+terminal_mismatch_CAUU 0
+terminal_mismatch_CCAA 0
+terminal_mismatch_CCAC 0
+terminal_mismatch_CCAG 0
+terminal_mismatch_CCAU 0
+terminal_mismatch_CCCA 0
+terminal_mismatch_CCCC 0
+terminal_mismatch_CCCG 0
+terminal_mismatch_CCCU 0
+terminal_mismatch_CCGA 0
+terminal_mismatch_CCGC 0
+terminal_mismatch_CCGG 0
+terminal_mismatch_CCGU 0
+terminal_mismatch_CCUA 0
+terminal_mismatch_CCUC 0
+terminal_mismatch_CCUG 0
+terminal_mismatch_CCUU 0
+terminal_mismatch_CGAA 0.08386423535
+terminal_mismatch_CGAC -0.2520716816
+terminal_mismatch_CGAG -0.6711841881
+terminal_mismatch_CGAU -0.3816350028
+terminal_mismatch_CGCA 0.1117852189
+terminal_mismatch_CGCC -0.1704393624
+terminal_mismatch_CGCG -0.2179987732
+terminal_mismatch_CGCU -0.459267635
+terminal_mismatch_CGGA 0.8520640313
+terminal_mismatch_CGGC -0.9332488517
+terminal_mismatch_CGGG -0.3289551692
+terminal_mismatch_CGGU -0.7778822056
+terminal_mismatch_CGUA -0.2422339958
+terminal_mismatch_CGUC -0.03780509247
+terminal_mismatch_CGUG -0.4322334143
+terminal_mismatch_CGUU -0.2419976114
+terminal_mismatch_CUAA 0
+terminal_mismatch_CUAC 0
+terminal_mismatch_CUAG 0
+terminal_mismatch_CUAU 0
+terminal_mismatch_CUCA 0
+terminal_mismatch_CUCC 0
+terminal_mismatch_CUCG 0
+terminal_mismatch_CUCU 0
+terminal_mismatch_CUGA 0
+terminal_mismatch_CUGC 0
+terminal_mismatch_CUGG 0
+terminal_mismatch_CUGU 0
+terminal_mismatch_CUUA 0
+terminal_mismatch_CUUC 0
+terminal_mismatch_CUUG 0
+terminal_mismatch_CUUU 0
+terminal_mismatch_GAAA 0
+terminal_mismatch_GAAC 0
+terminal_mismatch_GAAG 0
+terminal_mismatch_GAAU 0
+terminal_mismatch_GACA 0
+terminal_mismatch_GACC 0
+terminal_mismatch_GACG 0
+terminal_mismatch_GACU 0
+terminal_mismatch_GAGA 0
+terminal_mismatch_GAGC 0
+terminal_mismatch_GAGG 0
+terminal_mismatch_GAGU 0
+terminal_mismatch_GAUA 0
+terminal_mismatch_GAUC 0
+terminal_mismatch_GAUG 0
+terminal_mismatch_GAUU 0
+terminal_mismatch_GCAA -0.1703136025
+terminal_mismatch_GCAC -0.09154056357
+terminal_mismatch_GCAG -0.2522413002
+terminal_mismatch_GCAU -0.8520314799
+terminal_mismatch_GCCA 0.04763224188
+terminal_mismatch_GCCC -0.2428654283
+terminal_mismatch_GCCG -0.2079275061
+terminal_mismatch_GCCU -0.1874270053
+terminal_mismatch_GCGA 0.6540033983
+terminal_mismatch_GCGC -0.7823988605
+terminal_mismatch_GCGG 0.1995898255
+terminal_mismatch_GCGU -0.4432169392
+terminal_mismatch_GCUA -0.1736921762
+terminal_mismatch_GCUC 0.288494362
+terminal_mismatch_GCUG -0.01638238057
+terminal_mismatch_GCUU 0.6757988971
+terminal_mismatch_GGAA 0
+terminal_mismatch_GGAC 0
+terminal_mismatch_GGAG 0
+terminal_mismatch_GGAU 0
+terminal_mismatch_GGCA 0
+terminal_mismatch_GGCC 0
+terminal_mismatch_GGCG 0
+terminal_mismatch_GGCU 0
+terminal_mismatch_GGGA 0
+terminal_mismatch_GGGC 0
+terminal_mismatch_GGGG 0
+terminal_mismatch_GGGU 0
+terminal_mismatch_GGUA 0
+terminal_mismatch_GGUC 0
+terminal_mismatch_GGUG 0
+terminal_mismatch_GGUU 0
+terminal_mismatch_GUAA -0.4871607613
+terminal_mismatch_GUAC 0.1105031953
+terminal_mismatch_GUAG 0.363373916
+terminal_mismatch_GUAU -0.6193199348
+terminal_mismatch_GUCA 0.3451056056
+terminal_mismatch_GUCC 0.0314944976
+terminal_mismatch_GUCG -0.3799172956
+terminal_mismatch_GUCU -0.03222973182
+terminal_mismatch_GUGA 0.4948638637
+terminal_mismatch_GUGC -0.2821952552
+terminal_mismatch_GUGG -0.2702227211
+terminal_mismatch_GUGU -0.06658395291
+terminal_mismatch_GUUA -0.4306154451
+terminal_mismatch_GUUC -0.09497863465
+terminal_mismatch_GUUG -0.3130794485
+terminal_mismatch_GUUU -0.2283242981
+terminal_mismatch_UAAA 0.0115363879
+terminal_mismatch_UAAC -0.3923408221
+terminal_mismatch_UAAG 0.05661063599
+terminal_mismatch_UAAU -0.1251485388
+terminal_mismatch_UACA -0.06545074758
+terminal_mismatch_UACC -0.3167200568
+terminal_mismatch_UACG 0.002258383981
+terminal_mismatch_UACU -0.422217724
+terminal_mismatch_UAGA 0.5458416646
+terminal_mismatch_UAGC -0.2085887954
+terminal_mismatch_UAGG -0.1971766062
+terminal_mismatch_UAGU -0.4722410132
+terminal_mismatch_UAUA -0.1779642496
+terminal_mismatch_UAUC 0.1643454344
+terminal_mismatch_UAUG -0.5005617032
+terminal_mismatch_UAUU 0.1333867679
+terminal_mismatch_UCAA 0
+terminal_mismatch_UCAC 0
+terminal_mismatch_UCAG 0
+terminal_mismatch_UCAU 0
+terminal_mismatch_UCCA 0
+terminal_mismatch_UCCC 0
+terminal_mismatch_UCCG 0
+terminal_mismatch_UCCU 0
+terminal_mismatch_UCGA 0
+terminal_mismatch_UCGC 0
+terminal_mismatch_UCGG 0
+terminal_mismatch_UCGU 0
+terminal_mismatch_UCUA 0
+terminal_mismatch_UCUC 0
+terminal_mismatch_UCUG 0
+terminal_mismatch_UCUU 0
+terminal_mismatch_UGAA 0.1218741278
+terminal_mismatch_UGAC 0.1990260141
+terminal_mismatch_UGAG 0.04681893928
+terminal_mismatch_UGAU 0.3256264491
+terminal_mismatch_UGCA 0.1186812326
+terminal_mismatch_UGCC -0.1851065102
+terminal_mismatch_UGCG -0.04311512683
+terminal_mismatch_UGCU -0.6150608139
+terminal_mismatch_UGGA 0.754933218
+terminal_mismatch_UGGC -0.3150708483
+terminal_mismatch_UGGG 0.1569582926
+terminal_mismatch_UGGU -0.514970007
+terminal_mismatch_UGUA -0.2926246029
+terminal_mismatch_UGUC 0.1373068149
+terminal_mismatch_UGUG -0.05422333363
+terminal_mismatch_UGUU 0.03086776921
+terminal_mismatch_UUAA 0
+terminal_mismatch_UUAC 0
+terminal_mismatch_UUAG 0
+terminal_mismatch_UUAU 0
+terminal_mismatch_UUCA 0
+terminal_mismatch_UUCC 0
+terminal_mismatch_UUCG 0
+terminal_mismatch_UUCU 0
+terminal_mismatch_UUGA 0
+terminal_mismatch_UUGC 0
+terminal_mismatch_UUGG 0
+terminal_mismatch_UUGU 0
+terminal_mismatch_UUUA 0
+terminal_mismatch_UUUC 0
+terminal_mismatch_UUUG 0
+terminal_mismatch_UUUU 0
+hairpin_length_at_least_0 -5.993180158
+hairpin_length_at_least_1 -3.108105762
+hairpin_length_at_least_2 0.4168976347
+hairpin_length_at_least_3 2.205419066
+hairpin_length_at_least_4 1.926749692
+hairpin_length_at_least_5 -0.5873245329
+hairpin_length_at_least_6 -0.0827571778
+hairpin_length_at_least_7 0.5783889844
+hairpin_length_at_least_8 -0.7220883372
+hairpin_length_at_least_9 -0.1725874624
+hairpin_length_at_least_10 -0.3025089867
+hairpin_length_at_least_11 -0.0296315939
+hairpin_length_at_least_12 -0.9268995948
+hairpin_length_at_least_13 -0.03157753978
+hairpin_length_at_least_14 -0.1022472101
+hairpin_length_at_least_15 0.1901407346
+hairpin_length_at_least_16 -0.09280909826
+hairpin_length_at_least_17 0.1690448408
+hairpin_length_at_least_18 -0.08172566471
+hairpin_length_at_least_19 -0.3445939031
+hairpin_length_at_least_20 -0.109150294
+hairpin_length_at_least_21 -0.2903523693
+hairpin_length_at_least_22 -0.3393713667
+hairpin_length_at_least_23 -0.1915364117
+hairpin_length_at_least_24 -0.05019209379
+hairpin_length_at_least_25 -0.03874620924
+hairpin_length_at_least_26 0.04751470752
+hairpin_length_at_least_27 0.06744321926
+hairpin_length_at_least_28 0.09721875726
+hairpin_length_at_least_29 0.1673131733
+hairpin_length_at_least_30 0.2329937249
+internal_explicit_1_1 -0.1754591076
+internal_explicit_1_2 0.03083787104
+internal_explicit_1_3 -0.171565435
+internal_explicit_1_4 -0.2294680983
+internal_explicit_2_2 -0.1304072693
+internal_explicit_2_3 -0.07730329553
+internal_explicit_2_4 0.2782767264
+internal_explicit_3_3 -0.02898949617
+internal_explicit_3_4 0.3112350694
+internal_explicit_4_4 -0.3226348245
+bulge_length_at_least_1 -2.399548472
+bulge_length_at_least_2 -0.8945183117
+bulge_length_at_least_3 -0.9088550909
+bulge_length_at_least_4 -0.8412474755
+bulge_length_at_least_5 -0.4365479343
+bulge_length_at_least_6 -0.5699187801
+bulge_length_at_least_7 0.2002834224
+bulge_length_at_least_8 0.7538761358
+bulge_length_at_least_9 -0.6045045455
+bulge_length_at_least_10 -0.7200948098
+bulge_length_at_least_11 -0.5136721921
+bulge_length_at_least_12 -0.3614726679
+bulge_length_at_least_13 -0.2614454392
+bulge_length_at_least_14 -0.1593926893
+bulge_length_at_least_15 -0.08624668281
+bulge_length_at_least_16 -0.03107090996
+bulge_length_at_least_17 -0.01097222032
+bulge_length_at_least_18 0.03001220283
+bulge_length_at_least_19 0.04759123789
+bulge_length_at_least_20 -0.04296172065
+bulge_length_at_least_21 -0.01791899662
+bulge_length_at_least_22 -0.07800551522
+bulge_length_at_least_23 -0.0709932643
+bulge_length_at_least_24 -0.05767952896
+bulge_length_at_least_25 -0.04633794681
+bulge_length_at_least_26 -0.03559420456
+bulge_length_at_least_27 -0.02674934394
+bulge_length_at_least_28 -0.01818957972
+bulge_length_at_least_29 -0.01052300732
+bulge_length_at_least_30 -0.005153626846
+internal_length_at_least_2 -0.429061443
+internal_length_at_least_3 -0.3532111501
+internal_length_at_least_4 -0.3963797535
+internal_length_at_least_5 -0.3111199175
+internal_length_at_least_6 -0.2551945472
+internal_length_at_least_7 -0.05149116898
+internal_length_at_least_8 -0.04319002407
+internal_length_at_least_9 0.001985489485
+internal_length_at_least_10 -0.1761513136
+internal_length_at_least_11 -0.2639686207
+internal_length_at_least_12 -0.3460613577
+internal_length_at_least_13 -0.2926603079
+internal_length_at_least_14 -0.03624250307
+internal_length_at_least_15 -0.1199953761
+internal_length_at_least_16 -0.04354771926
+internal_length_at_least_17 -0.08209293135
+internal_length_at_least_18 -0.007113226038
+internal_length_at_least_19 0.02354824852
+internal_length_at_least_20 0.03066973571
+internal_length_at_least_21 -0.06618241094
+internal_length_at_least_22 -0.1316092383
+internal_length_at_least_23 -0.1407995514
+internal_length_at_least_24 -0.06600291862
+internal_length_at_least_25 -0.07779204744
+internal_length_at_least_26 -0.05084201265
+internal_length_at_least_27 -0.04139875601
+internal_length_at_least_28 0.003276583405
+internal_length_at_least_29 0.00592458284
+internal_length_at_least_30 0.006875738004
+internal_symmetric_length_at_least_1 -0.5467082599
+internal_symmetric_length_at_least_2 -0.3854701647
+internal_symmetric_length_at_least_3 -0.2588466401
+internal_symmetric_length_at_least_4 -0.2340836745
+internal_symmetric_length_at_least_5 0.1450577765
+internal_symmetric_length_at_least_6 -0.6562932515
+internal_symmetric_length_at_least_7 -0.3021088369
+internal_symmetric_length_at_least_8 -0.03032275267
+internal_symmetric_length_at_least_9 -0.3517944058
+internal_symmetric_length_at_least_10 -0.2159132506
+internal_symmetric_length_at_least_11 -0.1228270454
+internal_symmetric_length_at_least_12 -0.1552208595
+internal_symmetric_length_at_least_13 -0.08541120743
+internal_symmetric_length_at_least_14 -0.04592109799
+internal_symmetric_length_at_least_15 -0.02232234236
+internal_asymmetry_at_least_1 -2.105646719
+internal_asymmetry_at_least_2 -0.5520140431
+internal_asymmetry_at_least_3 -0.577070767
+internal_asymmetry_at_least_4 -0.6136667847
+internal_asymmetry_at_least_5 -0.3057156841
+internal_asymmetry_at_least_6 -0.1155052001
+internal_asymmetry_at_least_7 -0.2105612231
+internal_asymmetry_at_least_8 -0.314574313
+internal_asymmetry_at_least_9 -0.3148961681
+internal_asymmetry_at_least_10 -0.09018189492
+internal_asymmetry_at_least_11 -0.2200026794
+internal_asymmetry_at_least_12 -0.1406483243
+internal_asymmetry_at_least_13 -0.2162411259
+internal_asymmetry_at_least_14 -0.1725531435
+internal_asymmetry_at_least_15 -0.1558911866
+internal_asymmetry_at_least_16 -0.1040858663
+internal_asymmetry_at_least_17 -0.06967684228
+internal_asymmetry_at_least_18 -0.04105977494
+internal_asymmetry_at_least_19 -0.01570624316
+internal_asymmetry_at_least_20 0.01382000639
+internal_asymmetry_at_least_21 0.04131988563
+internal_asymmetry_at_least_22 0.0359418595
+internal_asymmetry_at_least_23 0.02822186282
+internal_asymmetry_at_least_24 0.01636585874
+internal_asymmetry_at_least_25 0.02550056175
+internal_asymmetry_at_least_26 0.03348032793
+internal_asymmetry_at_least_27 0.03971924412
+internal_asymmetry_at_least_28 -0.002545113932
+bulge_0x1_nucleotides_A -0.1216861662
+bulge_0x1_nucleotides_C -0.07111241127
+bulge_0x1_nucleotides_G 0.008947026647
+bulge_0x1_nucleotides_U -0.002685763742
+internal_1x1_nucleotides_AA 0.2944404686
+internal_1x1_nucleotides_AC 0.08641360967
+internal_1x1_nucleotides_AG -0.3664197228
+internal_1x1_nucleotides_AU -0.2053107048
+internal_1x1_nucleotides_CC -0.1582543624
+internal_1x1_nucleotides_CG 0.4175273724
+internal_1x1_nucleotides_CU 0.1368762582
+internal_1x1_nucleotides_GG -0.1193514754
+internal_1x1_nucleotides_GU -0.4188101413
+internal_1x1_nucleotides_UU 0.147140653
+helix_stacking_AAAA 0
+helix_stacking_AAAC 0
+helix_stacking_AAAG 0
+helix_stacking_AAAU 0
+helix_stacking_AACA 0
+helix_stacking_AACC 0
+helix_stacking_AACG 0
+helix_stacking_AACU 0
+helix_stacking_AAGA 0
+helix_stacking_AAGC 0
+helix_stacking_AAGG 0
+helix_stacking_AAGU 0
+helix_stacking_AAUA 0
+helix_stacking_AAUC 0
+helix_stacking_AAUG 0
+helix_stacking_AAUU 0
+helix_stacking_ACAC 0
+helix_stacking_ACAG 0
+helix_stacking_ACAU 0
+helix_stacking_ACCA 0
+helix_stacking_ACCC 0
+helix_stacking_ACCG 0
+helix_stacking_ACCU 0
+helix_stacking_ACGA 0
+helix_stacking_ACGC 0
+helix_stacking_ACGG 0
+helix_stacking_ACGU 0
+helix_stacking_ACUA 0
+helix_stacking_ACUC 0
+helix_stacking_ACUG 0
+helix_stacking_ACUU 0
+helix_stacking_AGAC 0
+helix_stacking_AGAG 0
+helix_stacking_AGAU 0
+helix_stacking_AGCC 0
+helix_stacking_AGCG 0
+helix_stacking_AGCU 0
+helix_stacking_AGGA 0
+helix_stacking_AGGC 0
+helix_stacking_AGGG 0
+helix_stacking_AGGU 0
+helix_stacking_AGUA 0
+helix_stacking_AGUC 0
+helix_stacking_AGUG 0
+helix_stacking_AGUU 0
+helix_stacking_AUAC 0
+helix_stacking_AUAG 0
+helix_stacking_AUAU 0.1482005248
+helix_stacking_AUCC 0
+helix_stacking_AUCG 0.4343497127
+helix_stacking_AUCU 0
+helix_stacking_AUGC 0.7079642577
+helix_stacking_AUGG 0
+helix_stacking_AUGU -0.1010777582
+helix_stacking_AUUA 0.243256656
+helix_stacking_AUUC 0
+helix_stacking_AUUG 0.1623654243
+helix_stacking_AUUU 0
+helix_stacking_CAAC 0
+helix_stacking_CAAG 0
+helix_stacking_CAAU 0
+helix_stacking_CACC 0
+helix_stacking_CACG 0
+helix_stacking_CACU 0
+helix_stacking_CAGC 0
+helix_stacking_CAGG 0
+helix_stacking_CAGU 0
+helix_stacking_CAUC 0
+helix_stacking_CAUG 0
+helix_stacking_CAUU 0
+helix_stacking_CCAG 0
+helix_stacking_CCAU 0
+helix_stacking_CCCC 0
+helix_stacking_CCCG 0
+helix_stacking_CCCU 0
+helix_stacking_CCGC 0
+helix_stacking_CCGG 0
+helix_stacking_CCGU 0
+helix_stacking_CCUC 0
+helix_stacking_CCUG 0
+helix_stacking_CCUU 0
+helix_stacking_CGAG 0
+helix_stacking_CGAU 0.4878707793
+helix_stacking_CGCG 0.8481320247
+helix_stacking_CGCU 0
+helix_stacking_CGGC 0.4784248478
+helix_stacking_CGGG 0
+helix_stacking_CGGU -0.1811268205
+helix_stacking_CGUC 0
+helix_stacking_CGUG 0.4849351028
+helix_stacking_CGUU 0
+helix_stacking_CUAG 0
+helix_stacking_CUAU 0
+helix_stacking_CUCG 0
+helix_stacking_CUCU 0
+helix_stacking_CUGG 0
+helix_stacking_CUGU 0
+helix_stacking_CUUC 0
+helix_stacking_CUUG 0
+helix_stacking_CUUU 0
+helix_stacking_GAAG 0
+helix_stacking_GAAU 0
+helix_stacking_GACG 0
+helix_stacking_GACU 0
+helix_stacking_GAGG 0
+helix_stacking_GAGU 0
+helix_stacking_GAUG 0
+helix_stacking_GAUU 0
+helix_stacking_GCAU 0.5551785831
+helix_stacking_GCCG 0.5008324248
+helix_stacking_GCCU 0
+helix_stacking_GCGG 0
+helix_stacking_GCGU 0.2165962476
+helix_stacking_GCUG 0.4864603589
+helix_stacking_GCUU 0
+helix_stacking_GGAU 0
+helix_stacking_GGCU 0
+helix_stacking_GGGG 0
+helix_stacking_GGGU 0
+helix_stacking_GGUG 0
+helix_stacking_GGUU 0
+helix_stacking_GUAU -0.04665365028
+helix_stacking_GUCU 0
+helix_stacking_GUGU 0.1833447295
+helix_stacking_GUUG -0.2858970755
+helix_stacking_GUUU 0
+helix_stacking_UAAU 0.3897593783
+helix_stacking_UACU 0
+helix_stacking_UAGU -0.1157333764
+helix_stacking_UAUU 0
+helix_stacking_UCCU 0
+helix_stacking_UCGU 0
+helix_stacking_UCUU 0
+helix_stacking_UGGU 0.120296538
+helix_stacking_UGUU 0
+helix_stacking_UUUU 0
+helix_closing_AA 0
+helix_closing_AC 0
+helix_closing_AG 0
+helix_closing_AU -0.9770893163
+helix_closing_CA 0
+helix_closing_CC 0
+helix_closing_CG -0.4574650937
+helix_closing_CU 0
+helix_closing_GA 0
+helix_closing_GC -0.8265995623
+helix_closing_GG 0
+helix_closing_GU -1.051678928
+helix_closing_UA -0.9246140521
+helix_closing_UC 0
+helix_closing_UG -0.3698708172
+helix_closing_UU 0
+multi_base -1.199055076
+multi_unpaired -0.1983300391
+multi_paired -0.9253883752
+dangle_left_AAA 0
+dangle_left_AAC 0
+dangle_left_AAG 0
+dangle_left_AAU 0
+dangle_left_ACA 0
+dangle_left_ACC 0
+dangle_left_ACG 0
+dangle_left_ACU 0
+dangle_left_AGA 0
+dangle_left_AGC 0
+dangle_left_AGG 0
+dangle_left_AGU 0
+dangle_left_AUA -0.1251037681
+dangle_left_AUC 0.0441606708
+dangle_left_AUG -0.02541879082
+dangle_left_AUU 0.00785098466
+dangle_left_CAA 0
+dangle_left_CAC 0
+dangle_left_CAG 0
+dangle_left_CAU 0
+dangle_left_CCA 0
+dangle_left_CCC 0
+dangle_left_CCG 0
+dangle_left_CCU 0
+dangle_left_CGA 0.07224381372
+dangle_left_CGC 0.05279281874
+dangle_left_CGG 0.1009554299
+dangle_left_CGU -0.1515059013
+dangle_left_CUA 0
+dangle_left_CUC 0
+dangle_left_CUG 0
+dangle_left_CUU 0
+dangle_left_GAA 0
+dangle_left_GAC 0
+dangle_left_GAG 0
+dangle_left_GAU 0
+dangle_left_GCA -0.1829535099
+dangle_left_GCC 0.03393000394
+dangle_left_GCG 0.1335339061
+dangle_left_GCU -0.1604274506
+dangle_left_GGA 0
+dangle_left_GGC 0
+dangle_left_GGG 0
+dangle_left_GGU 0
+dangle_left_GUA -0.06517511341
+dangle_left_GUC -0.04250882422
+dangle_left_GUG 0.02875971806
+dangle_left_GUU -0.04359727428
+dangle_left_UAA -0.03373847659
+dangle_left_UAC -0.005070324324
+dangle_left_UAG -0.1186861149
+dangle_left_UAU -0.01162357727
+dangle_left_UCA 0
+dangle_left_UCC 0
+dangle_left_UCG 0
+dangle_left_UCU 0
+dangle_left_UGA -0.08047139148
+dangle_left_UGC 0.001608000669
+dangle_left_UGG 0.1016272216
+dangle_left_UGU -0.09200842832
+dangle_left_UUA 0
+dangle_left_UUC 0
+dangle_left_UUG 0
+dangle_left_UUU 0
+dangle_right_AAA 0
+dangle_right_AAC 0
+dangle_right_AAG 0
+dangle_right_AAU 0
+dangle_right_ACA 0
+dangle_right_ACC 0
+dangle_right_ACG 0
+dangle_right_ACU 0
+dangle_right_AGA 0
+dangle_right_AGC 0
+dangle_right_AGG 0
+dangle_right_AGU 0
+dangle_right_AUA 0.03232578201
+dangle_right_AUC -0.09096819493
+dangle_right_AUG -0.0740750973
+dangle_right_AUU -0.01621157379
+dangle_right_CAA 0
+dangle_right_CAC 0
+dangle_right_CAG 0
+dangle_right_CAU 0
+dangle_right_CCA 0
+dangle_right_CCC 0
+dangle_right_CCG 0
+dangle_right_CCU 0
+dangle_right_CGA 0.2133964379
+dangle_right_CGC -0.06234810991
+dangle_right_CGG -0.07008531041
+dangle_right_CGU -0.2141912285
+dangle_right_CUA 0
+dangle_right_CUC 0
+dangle_right_CUG 0
+dangle_right_CUU 0
+dangle_right_GAA 0
+dangle_right_GAC 0
+dangle_right_GAG 0
+dangle_right_GAU 0
+dangle_right_GCA 0.01581957549
+dangle_right_GCC 0.005644320058
+dangle_right_GCG -0.00943297687
+dangle_right_GCU -0.2597793095
+dangle_right_GGA 0
+dangle_right_GGC 0
+dangle_right_GGG 0
+dangle_right_GGU 0
+dangle_right_GUA -0.04480271781
+dangle_right_GUC -0.07321213002
+dangle_right_GUG 0.01270494867
+dangle_right_GUU -0.05717033985
+dangle_right_UAA -0.1631918513
+dangle_right_UAC 0.06769304994
+dangle_right_UAG -0.08789074414
+dangle_right_UAU -0.05525570007
+dangle_right_UCA 0
+dangle_right_UCC 0
+dangle_right_UCG 0
+dangle_right_UCU 0
+dangle_right_UGA 0.04105458185
+dangle_right_UGC -0.008136642572
+dangle_right_UGG -0.03808592022
+dangle_right_UGU -0.08629373429
+dangle_right_UUA 0
+dangle_right_UUC 0
+dangle_right_UUG 0
+dangle_right_UUU 0
+external_unpaired -0.00972883093
+external_paired -0.0009674111431
diff --git a/src/contrafold.params.noncomplementary b/src/contrafold.params.noncomplementary
new file mode 100644
index 0000000..5fa2226
--- /dev/null
+++ b/src/contrafold.params.noncomplementary
@@ -0,0 +1,708 @@
+base_pair_AA -1.455381052
+base_pair_AC -1.681172954
+base_pair_AG -1.47494575
+base_pair_AU -0.08766530555
+base_pair_CC -1.452235075
+base_pair_CG 0.840264982
+base_pair_CU -1.745991973
+base_pair_GG -1.303406306
+base_pair_GU -0.3219267076
+base_pair_UU -1.119676882
+terminal_mismatch_AAAA 0.03548650731
+terminal_mismatch_AAAC -0.1530635317
+terminal_mismatch_AAAG -0.1376895893
+terminal_mismatch_AAAU -0.09760810471
+terminal_mismatch_AACA -0.09015626933
+terminal_mismatch_AACC -0.01734775691
+terminal_mismatch_AACG -0.05119129104
+terminal_mismatch_AACU -0.02762762058
+terminal_mismatch_AAGA 0.1157343804
+terminal_mismatch_AAGC 0.04805456983
+terminal_mismatch_AAGG -0.1768981931
+terminal_mismatch_AAGU -0.04130010973
+terminal_mismatch_AAUA -0.05642123443
+terminal_mismatch_AAUC 0.04026275765
+terminal_mismatch_AAUG -0.113140336
+terminal_mismatch_AAUU 0.2857400342
+terminal_mismatch_ACAA -0.03174949786
+terminal_mismatch_ACAC -0.05584099215
+terminal_mismatch_ACAG -0.1095104674
+terminal_mismatch_ACAU 0.0788614779
+terminal_mismatch_ACCA 0.2040702031
+terminal_mismatch_ACCC 0.02426094662
+terminal_mismatch_ACCG -0.06732210613
+terminal_mismatch_ACCU -0.03376257973
+terminal_mismatch_ACGA 0.01447581865
+terminal_mismatch_ACGC -0.01816554143
+terminal_mismatch_ACGG -0.01232449334
+terminal_mismatch_ACGU 0.02205380402
+terminal_mismatch_ACUA -0.09543463449
+terminal_mismatch_ACUC 0.02494558334
+terminal_mismatch_ACUG -0.05188159098
+terminal_mismatch_ACUU -0.09276149434
+terminal_mismatch_AGAA -0.08482318485
+terminal_mismatch_AGAC -0.0794464582
+terminal_mismatch_AGAG -0.1037895444
+terminal_mismatch_AGAU -0.09496046339
+terminal_mismatch_AGCA -0.04202348127
+terminal_mismatch_AGCC 0.04341317385
+terminal_mismatch_AGCG 0.2132451101
+terminal_mismatch_AGCU 0.1819992194
+terminal_mismatch_AGGA -0.1623879358
+terminal_mismatch_AGGC -0.08883186313
+terminal_mismatch_AGGG -0.08290758092
+terminal_mismatch_AGGU 0.05949558674
+terminal_mismatch_AGUA -0.06117321899
+terminal_mismatch_AGUC -0.06128521785
+terminal_mismatch_AGUG -0.1054431644
+terminal_mismatch_AGUU 0.06949356391
+terminal_mismatch_AUAA -0.00458510664
+terminal_mismatch_AUAC -0.07878887429
+terminal_mismatch_AUAG -0.02619268423
+terminal_mismatch_AUAU -0.3376671294
+terminal_mismatch_AUCA 0.1407209183
+terminal_mismatch_AUCC 0.1963991331
+terminal_mismatch_AUCG 0.09828889449
+terminal_mismatch_AUCU -0.3588639465
+terminal_mismatch_AUGA 0.4005052091
+terminal_mismatch_AUGC 0.01781958322
+terminal_mismatch_AUGG 0.04946655024
+terminal_mismatch_AUGU -0.28478324
+terminal_mismatch_AUUA 0.008263168506
+terminal_mismatch_AUUC 0.1196292682
+terminal_mismatch_AUUG -0.1635291659
+terminal_mismatch_AUUU 0.2247224398
+terminal_mismatch_CAAA -0.1898713443
+terminal_mismatch_CAAC -0.03335667292
+terminal_mismatch_CAAG 0.08723167906
+terminal_mismatch_CAAU -0.07172816945
+terminal_mismatch_CACA -0.02198504015
+terminal_mismatch_CACC -0.05537866644
+terminal_mismatch_CACG -0.09701470969
+terminal_mismatch_CACU -0.0890545223
+terminal_mismatch_CAGA 0.1678518209
+terminal_mismatch_CAGC -0.06352318718
+terminal_mismatch_CAGG 0.0618238264
+terminal_mismatch_CAGU -0.04902670029
+terminal_mismatch_CAUA -0.06131882561
+terminal_mismatch_CAUC -0.0792933062
+terminal_mismatch_CAUG -0.1723035626
+terminal_mismatch_CAUU 0.1880000212
+terminal_mismatch_CCAA -0.0017250768
+terminal_mismatch_CCAC -0.1354308286
+terminal_mismatch_CCAG 0.1624126758
+terminal_mismatch_CCAU -0.06927944251
+terminal_mismatch_CCCA -0.153728639
+terminal_mismatch_CCCC -0.0734554444
+terminal_mismatch_CCCG 0.006861414241
+terminal_mismatch_CCCU -0.07739617052
+terminal_mismatch_CCGA 0.1364828673
+terminal_mismatch_CCGC -0.0516673223
+terminal_mismatch_CCGG 0.2693659476
+terminal_mismatch_CCGU 0.1065688311
+terminal_mismatch_CCUA -0.102127533
+terminal_mismatch_CCUC -0.04706919179
+terminal_mismatch_CCUG 0.04531457991
+terminal_mismatch_CCUU -0.1436397983
+terminal_mismatch_CGAA 0.3029696819
+terminal_mismatch_CGAC -0.09414037153
+terminal_mismatch_CGAG -0.1515946813
+terminal_mismatch_CGAU -0.2189502915
+terminal_mismatch_CGCA 0.2069876839
+terminal_mismatch_CGCC 0.02945773892
+terminal_mismatch_CGCG 0.1520809123
+terminal_mismatch_CGCU 0.2517091925
+terminal_mismatch_CGGA 0.4215461402
+terminal_mismatch_CGGC -0.3878585773
+terminal_mismatch_CGGG 0.1308663325
+terminal_mismatch_CGGU -0.4961206727
+terminal_mismatch_CGUA -0.2549457084
+terminal_mismatch_CGUC -0.1386448896
+terminal_mismatch_CGUG -0.1523039446
+terminal_mismatch_CGUU 0.08793909966
+terminal_mismatch_CUAA 0.06387837984
+terminal_mismatch_CUAC -0.07812174584
+terminal_mismatch_CUAG 0.1052978704
+terminal_mismatch_CUAU -0.0824793182
+terminal_mismatch_CUCA -0.07458530212
+terminal_mismatch_CUCC -0.1149551661
+terminal_mismatch_CUCG -0.07463817896
+terminal_mismatch_CUCU 0.003704199146
+terminal_mismatch_CUGA -0.05614849468
+terminal_mismatch_CUGC -0.07976137157
+terminal_mismatch_CUGG 0.05290883652
+terminal_mismatch_CUGU -0.09613211571
+terminal_mismatch_CUUA 0.1260507476
+terminal_mismatch_CUUC 0.05963271555
+terminal_mismatch_CUUG 0.03250637507
+terminal_mismatch_CUUU -0.05500743975
+terminal_mismatch_GAAA 0.3319532212
+terminal_mismatch_GAAC -0.02325408406
+terminal_mismatch_GAAG -0.07888481876
+terminal_mismatch_GAAU -0.0491212286
+terminal_mismatch_GACA -0.1480164013
+terminal_mismatch_GACC 0.09719881598
+terminal_mismatch_GACG -0.08194170058
+terminal_mismatch_GACU -0.03071808267
+terminal_mismatch_GAGA 0.1596878404
+terminal_mismatch_GAGC -0.03130712334
+terminal_mismatch_GAGG -0.1694026596
+terminal_mismatch_GAGU -0.02930651499
+terminal_mismatch_GAUA -0.0635010819
+terminal_mismatch_GAUC -0.02132569977
+terminal_mismatch_GAUG 0.003164806792
+terminal_mismatch_GAUU 0.04144462938
+terminal_mismatch_GCAA -0.01074565221
+terminal_mismatch_GCAC -0.2344372673
+terminal_mismatch_GCAG 0.123536825
+terminal_mismatch_GCAU -0.3240115044
+terminal_mismatch_GCCA -0.2463240629
+terminal_mismatch_GCCC 0.08129935832
+terminal_mismatch_GCCG -0.3418477741
+terminal_mismatch_GCCU 0.1141264232
+terminal_mismatch_GCGA 0.4502405743
+terminal_mismatch_GCGC -0.1295241897
+terminal_mismatch_GCGG 0.197683769
+terminal_mismatch_GCGU -0.5725821693
+terminal_mismatch_GCUA -0.2527075422
+terminal_mismatch_GCUC 0.1861584031
+terminal_mismatch_GCUG 0.1973216517
+terminal_mismatch_GCUU 0.5585036196
+terminal_mismatch_GGAA -0.04470277309
+terminal_mismatch_GGAC -0.01724329831
+terminal_mismatch_GGAG -0.1217482982
+terminal_mismatch_GGAU -0.1684999018
+terminal_mismatch_GGCA 0.02801781956
+terminal_mismatch_GGCC -0.01674223925
+terminal_mismatch_GGCG -0.03504232734
+terminal_mismatch_GGCU -0.03276120569
+terminal_mismatch_GGGA -0.0133016361
+terminal_mismatch_GGGC 0.01047297248
+terminal_mismatch_GGGG 0.3582150361
+terminal_mismatch_GGGU -0.03010618888
+terminal_mismatch_GGUA -0.1529902536
+terminal_mismatch_GGUC -0.07708990024
+terminal_mismatch_GGUG -0.02019770319
+terminal_mismatch_GGUU 0.05855880277
+terminal_mismatch_GUAA -0.08257997955
+terminal_mismatch_GUAC 0.1861922864
+terminal_mismatch_GUAG 0.03008963566
+terminal_mismatch_GUAU -0.3405347249
+terminal_mismatch_GUCA 0.3747115441
+terminal_mismatch_GUCC 0.192335074
+terminal_mismatch_GUCG -0.1968330458
+terminal_mismatch_GUCU 0.07064619539
+terminal_mismatch_GUGA -0.1505860398
+terminal_mismatch_GUGC -0.131095968
+terminal_mismatch_GUGG 0.06882613182
+terminal_mismatch_GUGU -0.03101679698
+terminal_mismatch_GUUA -0.08824004968
+terminal_mismatch_GUUC 0.1283837462
+terminal_mismatch_GUUG -0.138559096
+terminal_mismatch_GUUU -0.0300234821
+terminal_mismatch_UAAA 0.02283082369
+terminal_mismatch_UAAC -0.1983025729
+terminal_mismatch_UAAG 0.09746999761
+terminal_mismatch_UAAU -0.1433175754
+terminal_mismatch_UACA 0.1996635373
+terminal_mismatch_UACC 0.1225010573
+terminal_mismatch_UACG 0.2514404349
+terminal_mismatch_UACU -0.04351517952
+terminal_mismatch_UAGA 0.2185566232
+terminal_mismatch_UAGC -0.006725144059
+terminal_mismatch_UAGG -0.04907016091
+terminal_mismatch_UAGU -0.2602507314
+terminal_mismatch_UAUA 0.02185572153
+terminal_mismatch_UAUC 0.1683684793
+terminal_mismatch_UAUG -0.197115384
+terminal_mismatch_UAUU 0.09991604638
+terminal_mismatch_UCAA 0.04754397008
+terminal_mismatch_UCAC -0.1076421324
+terminal_mismatch_UCAG 0.000546295272
+terminal_mismatch_UCAU -0.09075003872
+terminal_mismatch_UCCA -0.009929594847
+terminal_mismatch_UCCC 0.08625049436
+terminal_mismatch_UCCG 0.01016387199
+terminal_mismatch_UCCU -0.03884690353
+terminal_mismatch_UCGA 0.07937791677
+terminal_mismatch_UCGC 0.0733326335
+terminal_mismatch_UCGG -0.05794509908
+terminal_mismatch_UCGU -0.06967546609
+terminal_mismatch_UCUA -0.01744925571
+terminal_mismatch_UCUC -0.154353948
+terminal_mismatch_UCUG -0.09646566966
+terminal_mismatch_UCUU 0.1799949698
+terminal_mismatch_UGAA 0.2216946906
+terminal_mismatch_UGAC 0.2816462956
+terminal_mismatch_UGAG 0.3821923258
+terminal_mismatch_UGAU -0.1799939484
+terminal_mismatch_UGCA -0.02591640495
+terminal_mismatch_UGCC 0.09656780653
+terminal_mismatch_UGCG 0.1574431837
+terminal_mismatch_UGCU -0.1959233487
+terminal_mismatch_UGGA 0.4898105659
+terminal_mismatch_UGGC -0.3450962473
+terminal_mismatch_UGGG -0.01842665281
+terminal_mismatch_UGGU -0.3829115707
+terminal_mismatch_UGUA -0.3351997341
+terminal_mismatch_UGUC 0.1524349141
+terminal_mismatch_UGUG -0.1513850177
+terminal_mismatch_UGUU 0.04317792483
+terminal_mismatch_UUAA 0.02769865739
+terminal_mismatch_UUAC -0.09165961571
+terminal_mismatch_UUAG 0.06675012635
+terminal_mismatch_UUAU -0.1342800896
+terminal_mismatch_UUCA -0.02187885406
+terminal_mismatch_UUCC -0.09522061904
+terminal_mismatch_UUCG -0.02018630332
+terminal_mismatch_UUCU -0.06427815389
+terminal_mismatch_UUGA 0.1835869429
+terminal_mismatch_UUGC -0.04369228327
+terminal_mismatch_UUGG 0.08954340923
+terminal_mismatch_UUGU -0.03453485593
+terminal_mismatch_UUUA 0.00588187155
+terminal_mismatch_UUUC -0.05924264311
+terminal_mismatch_UUUG -0.09576486096
+terminal_mismatch_UUUU 0.09512802721
+hairpin_length_at_least_0 -7.001893834
+hairpin_length_at_least_1 -0.810882712
+hairpin_length_at_least_2 0.9459053555
+hairpin_length_at_least_3 1.943166338
+hairpin_length_at_least_4 0.9552748326
+hairpin_length_at_least_5 -0.3580276865
+hairpin_length_at_least_6 -0.09582316887
+hairpin_length_at_least_7 -0.09645793924
+hairpin_length_at_least_8 -0.2889275791
+hairpin_length_at_least_9 -0.2894783109
+hairpin_length_at_least_10 0.1273574641
+hairpin_length_at_least_11 -0.2948182011
+hairpin_length_at_least_12 -0.7491577273
+hairpin_length_at_least_13 0.04397809432
+hairpin_length_at_least_14 -0.1619802369
+hairpin_length_at_least_15 -0.1588823981
+hairpin_length_at_least_16 -0.04186935212
+hairpin_length_at_least_17 0.1708840763
+hairpin_length_at_least_18 0.156566303
+hairpin_length_at_least_19 -0.5294077573
+hairpin_length_at_least_20 0.03758501541
+hairpin_length_at_least_21 -0.1840024096
+hairpin_length_at_least_22 -0.4869328207
+hairpin_length_at_least_23 -0.2177204558
+hairpin_length_at_least_24 -0.0871135986
+hairpin_length_at_least_25 -0.2041734367
+hairpin_length_at_least_26 -0.2576012642
+hairpin_length_at_least_27 -0.1732091363
+hairpin_length_at_least_28 0.003030468289
+hairpin_length_at_least_29 0.1243949244
+hairpin_length_at_least_30 0.332670531
+internal_explicit_1_1 0.0303202931
+internal_explicit_1_2 -0.1078180662
+internal_explicit_1_3 0.02032624142
+internal_explicit_1_4 -0.0583460487
+internal_explicit_2_2 -0.05045983481
+internal_explicit_2_3 0.06753030947
+internal_explicit_2_4 0.09555257267
+internal_explicit_3_3 0.08245934476
+internal_explicit_3_4 0.1746772079
+internal_explicit_4_4 -0.06004526272
+bulge_length_at_least_1 -1.089067125
+bulge_length_at_least_2 -1.456312872
+bulge_length_at_least_3 -0.8642668045
+bulge_length_at_least_4 -0.7684476691
+bulge_length_at_least_5 -0.7987608753
+bulge_length_at_least_6 -0.4559018916
+bulge_length_at_least_7 0.1718975607
+bulge_length_at_least_8 0.1122889416
+bulge_length_at_least_9 -0.009052840127
+bulge_length_at_least_10 -0.3528021103
+bulge_length_at_least_11 -0.6210992298
+bulge_length_at_least_12 -0.4667844678
+bulge_length_at_least_13 -0.3545731643
+bulge_length_at_least_14 -0.1869129881
+bulge_length_at_least_15 -0.1029032471
+bulge_length_at_least_16 0.01243360939
+bulge_length_at_least_17 -0.05806294221
+bulge_length_at_least_18 0.03048664164
+bulge_length_at_least_19 0.06974626007
+bulge_length_at_least_20 -0.1053239465
+bulge_length_at_least_21 -0.00802761809
+bulge_length_at_least_22 -0.1693063411
+bulge_length_at_least_23 -0.1869480197
+bulge_length_at_least_24 -0.1338882236
+bulge_length_at_least_25 -0.08912941184
+bulge_length_at_least_26 -0.04449397991
+bulge_length_at_least_27 -0.004535602471
+bulge_length_at_least_28 -0.06387849235
+bulge_length_at_least_29 -0.04061029734
+bulge_length_at_least_30 -0.01966669104
+internal_length_at_least_2 -0.8384982243
+internal_length_at_least_3 -0.8145392134
+internal_length_at_least_4 -0.4274419136
+internal_length_at_least_5 -0.3196080489
+internal_length_at_least_6 -0.3482691295
+internal_length_at_least_7 -0.01467310233
+internal_length_at_least_8 -0.1704908718
+internal_length_at_least_9 -0.274749809
+internal_length_at_least_10 -0.2816993027
+internal_length_at_least_11 -0.05013094202
+internal_length_at_least_12 -0.1157513247
+internal_length_at_least_13 -0.3261911703
+internal_length_at_least_14 -0.0979074061
+internal_length_at_least_15 -0.2117025949
+internal_length_at_least_16 -0.2165291009
+internal_length_at_least_17 -0.07060279378
+internal_length_at_least_18 -0.02628945077
+internal_length_at_least_19 0.08180085115
+internal_length_at_least_20 0.102830986
+internal_length_at_least_21 -0.1224144603
+internal_length_at_least_22 -0.3109183699
+internal_length_at_least_23 -0.2718499691
+internal_length_at_least_24 -0.001046006366
+internal_length_at_least_25 -0.1323005654
+internal_length_at_least_26 -0.1094109113
+internal_length_at_least_27 -0.05415788664
+internal_length_at_least_28 0.03762837628
+internal_length_at_least_29 0.04247139017
+internal_length_at_least_30 0.06643629429
+internal_symmetric_length_at_least_1 -0.02915617376
+internal_symmetric_length_at_least_2 -0.05938423254
+internal_symmetric_length_at_least_3 -0.01009895933
+internal_symmetric_length_at_least_4 -0.09032555513
+internal_symmetric_length_at_least_5 -0.03183245324
+internal_symmetric_length_at_least_6 -0.03131585342
+internal_symmetric_length_at_least_7 -0.07880361719
+internal_symmetric_length_at_least_8 -0.0977018705
+internal_symmetric_length_at_least_9 -0.06983587385
+internal_symmetric_length_at_least_10 -0.03844463615
+internal_symmetric_length_at_least_11 -0.009379790458
+internal_symmetric_length_at_least_12 -0.01728393975
+internal_symmetric_length_at_least_13 -0.0137997314
+internal_symmetric_length_at_least_14 -0.01370426331
+internal_symmetric_length_at_least_15 -0.007916687601
+internal_asymmetry_at_least_1 -0.6248854515
+internal_asymmetry_at_least_2 -0.2130110176
+internal_asymmetry_at_least_3 -0.4388708483
+internal_asymmetry_at_least_4 -0.4577389008
+internal_asymmetry_at_least_5 -0.1073350387
+internal_asymmetry_at_least_6 -0.08152625833
+internal_asymmetry_at_least_7 -0.2498828148
+internal_asymmetry_at_least_8 -0.3261732738
+internal_asymmetry_at_least_9 -0.2648834833
+internal_asymmetry_at_least_10 -0.1413539627
+internal_asymmetry_at_least_11 -0.1611720656
+internal_asymmetry_at_least_12 -0.08403184675
+internal_asymmetry_at_least_13 -0.2796784443
+internal_asymmetry_at_least_14 -0.2315456117
+internal_asymmetry_at_least_15 -0.2250828215
+internal_asymmetry_at_least_16 -0.1790093214
+internal_asymmetry_at_least_17 -0.1394856705
+internal_asymmetry_at_least_18 -0.1088535885
+internal_asymmetry_at_least_19 -0.07752505474
+internal_asymmetry_at_least_20 -0.0595936208
+internal_asymmetry_at_least_21 -0.04142445114
+internal_asymmetry_at_least_22 -0.03155354561
+internal_asymmetry_at_least_23 -0.0228581872
+internal_asymmetry_at_least_24 -0.01676596118
+internal_asymmetry_at_least_25 -0.01097177419
+internal_asymmetry_at_least_26 -0.007109067914
+internal_asymmetry_at_least_27 -0.003497465027
+internal_asymmetry_at_least_28 -0.001670726436
+bulge_0x1_nucleotides_A -0.06085271607
+bulge_0x1_nucleotides_C 0.001052730792
+bulge_0x1_nucleotides_G 0.07919701898
+bulge_0x1_nucleotides_U -0.007885407258
+internal_1x1_nucleotides_AA -0.03984657055
+internal_1x1_nucleotides_AC 0.1799238798
+internal_1x1_nucleotides_AG -0.1776183674
+internal_1x1_nucleotides_AU 0.09643563353
+internal_1x1_nucleotides_CC 0.0111219169
+internal_1x1_nucleotides_CG 0.1783947299
+internal_1x1_nucleotides_CU -0.125960312
+internal_1x1_nucleotides_GG 0.01287639876
+internal_1x1_nucleotides_GU 0.00409050545
+internal_1x1_nucleotides_UU -0.1064232283
+helix_stacking_AAAA 0.3546221768
+helix_stacking_AAAC -0.04315446985
+helix_stacking_AAAG 0.5016203773
+helix_stacking_AAAU -0.530796649
+helix_stacking_AACA 0.2872436178
+helix_stacking_AACC 0.3691829843
+helix_stacking_AACG -0.9953433048
+helix_stacking_AACU 0.08087157122
+helix_stacking_AAGA 0.1677825197
+helix_stacking_AAGC -0.678265872
+helix_stacking_AAGG -0.09383669676
+helix_stacking_AAGU -0.5104866535
+helix_stacking_AAUA -0.8089949963
+helix_stacking_AAUC 0.03755925657
+helix_stacking_AAUG -0.5472344567
+helix_stacking_AAUU 0.5539306686
+helix_stacking_ACAC 0.2589964621
+helix_stacking_ACAG -0.3522259076
+helix_stacking_ACAU -0.3266392301
+helix_stacking_ACCA 0.1047725635
+helix_stacking_ACCC 0.03115102797
+helix_stacking_ACCG -0.5174541432
+helix_stacking_ACCU -0.001488212881
+helix_stacking_ACGA 0.406426376
+helix_stacking_ACGC -0.1867679593
+helix_stacking_ACGG 0.3704147668
+helix_stacking_ACGU -0.04475096326
+helix_stacking_ACUA -0.4204688848
+helix_stacking_ACUC -0.08147109792
+helix_stacking_ACUG -0.2525676388
+helix_stacking_ACUU 0.1202721996
+helix_stacking_AGAC -0.05436023834
+helix_stacking_AGAG 0.1845945852
+helix_stacking_AGAU -0.6334949651
+helix_stacking_AGCC 0.164129666
+helix_stacking_AGCG -0.2877376098
+helix_stacking_AGCU 0.4098605924
+helix_stacking_AGGA -0.1055221936
+helix_stacking_AGGC -0.2921646674
+helix_stacking_AGGG 0.01018764286
+helix_stacking_AGGU 0.1931504062
+helix_stacking_AGUA -0.4943545079
+helix_stacking_AGUC 0.06201686305
+helix_stacking_AGUG -0.2403228316
+helix_stacking_AGUU 0.2978012208
+helix_stacking_AUAC -0.1929017687
+helix_stacking_AUAG -0.3754001328
+helix_stacking_AUAU 0.8097237953
+helix_stacking_AUCC -0.4891632689
+helix_stacking_AUCG 1.132030335
+helix_stacking_AUCU -0.2122122383
+helix_stacking_AUGC 1.130963415
+helix_stacking_AUGG -0.4156413885
+helix_stacking_AUGU 0.1489146395
+helix_stacking_AUUA 0.9217793712
+helix_stacking_AUUC -0.520976219
+helix_stacking_AUUG 0.4958919356
+helix_stacking_AUUU -0.6967771576
+helix_stacking_CAAC 0.3516155114
+helix_stacking_CAAG -0.2006481968
+helix_stacking_CAAU -0.2214284327
+helix_stacking_CACC -0.08007197634
+helix_stacking_CACG -0.2421687057
+helix_stacking_CACU -0.08214918555
+helix_stacking_CAGC -0.4295873116
+helix_stacking_CAGG -0.1981385431
+helix_stacking_CAGU -0.1313833772
+helix_stacking_CAUC 0.1792984608
+helix_stacking_CAUG -0.04612680611
+helix_stacking_CAUU 0.194875416
+helix_stacking_CCAG 0.01571465697
+helix_stacking_CCAU -0.4249477318
+helix_stacking_CCCC -0.1256736814
+helix_stacking_CCCG -0.8566085239
+helix_stacking_CCCU -0.1141455454
+helix_stacking_CCGC -0.8614197122
+helix_stacking_CCGG 0.5770731595
+helix_stacking_CCGU -0.0133855091
+helix_stacking_CCUC 0.06065031698
+helix_stacking_CCUG 0.01087238715
+helix_stacking_CCUU 0.08193995381
+helix_stacking_CGAG -0.6764832054
+helix_stacking_CGAU 1.024511436
+helix_stacking_CGCG 1.57803623
+helix_stacking_CGCU -0.3538203962
+helix_stacking_CGGC 1.080098392
+helix_stacking_CGGG -0.8315727806
+helix_stacking_CGGU 0.2329343463
+helix_stacking_CGUC -0.3528944225
+helix_stacking_CGUG 0.8098234766
+helix_stacking_CGUU -0.6244259092
+helix_stacking_CUAG 0.3519082557
+helix_stacking_CUAU -0.6877454743
+helix_stacking_CUCG -0.6612901629
+helix_stacking_CUCU 0.1733793766
+helix_stacking_CUGG 0.2222988309
+helix_stacking_CUGU -0.113850913
+helix_stacking_CUUC 0.4351153974
+helix_stacking_CUUG -0.1156703626
+helix_stacking_CUUU 0.06735956828
+helix_stacking_GAAG 1.019661253
+helix_stacking_GAAU -0.5917556439
+helix_stacking_GACG -0.9041064326
+helix_stacking_GACU -0.03361631028
+helix_stacking_GAGG 0.1934827478
+helix_stacking_GAGU -0.2383785167
+helix_stacking_GAUG 0.02359602987
+helix_stacking_GAUU 0.1306743273
+helix_stacking_GCAU 1.194356152
+helix_stacking_GCCG 1.11343121
+helix_stacking_GCCU -0.172229872
+helix_stacking_GCGG -0.6672644747
+helix_stacking_GCGU 0.7230014783
+helix_stacking_GCUG 0.8610183195
+helix_stacking_GCUU -0.5428076909
+helix_stacking_GGAU -0.5509451391
+helix_stacking_GGCU 0.210715382
+helix_stacking_GGGG 0.06960247404
+helix_stacking_GGGU -0.1636600385
+helix_stacking_GGUG -0.2597428596
+helix_stacking_GGUU -0.02447239452
+helix_stacking_GUAU 0.3621158768
+helix_stacking_GUCU -0.2341237246
+helix_stacking_GUGU 0.3424878938
+helix_stacking_GUUG 0.1039838924
+helix_stacking_GUUU -0.499537785
+helix_stacking_UAAU 1.012974086
+helix_stacking_UACU -0.47263992
+helix_stacking_UAGU 0.5340581688
+helix_stacking_UAUU -0.5309098207
+helix_stacking_UCCU 0.1344187062
+helix_stacking_UCGU -0.4078546373
+helix_stacking_UCUU 0.3822876695
+helix_stacking_UGGU 0.334079314
+helix_stacking_UGUU -0.6872220771
+helix_stacking_UUUU 0.4541580212
+helix_closing_AA -1.97703412
+helix_closing_AC -1.559634114
+helix_closing_AG -1.528906666
+helix_closing_AU -1.361601514
+helix_closing_CA -1.459461559
+helix_closing_CC -1.555753304
+helix_closing_CG -1.103608648
+helix_closing_CU -1.623084781
+helix_closing_GA -2.003860326
+helix_closing_GC -1.254327447
+helix_closing_GG -1.594937359
+helix_closing_GU -1.596953265
+helix_closing_UA -1.337125303
+helix_closing_UC -1.580368218
+helix_closing_UG -1.113836351
+helix_closing_UU -1.84763533
+multi_base -0.4432287566
+multi_unpaired -0.1661807958
+multi_paired -1.039351046
+dangle_left_AAA 0.03080526899
+dangle_left_AAC -0.01557812237
+dangle_left_AAG 0.005733379308
+dangle_left_AAU -0.03027159081
+dangle_left_ACA 0.03212878555
+dangle_left_ACC 0.004479095383
+dangle_left_ACG -0.01329286796
+dangle_left_ACU -0.04757639295
+dangle_left_AGA 0.04869146121
+dangle_left_AGC 0.06101118253
+dangle_left_AGG 0.02340476123
+dangle_left_AGU -0.05415946356
+dangle_left_AUA -0.1555170799
+dangle_left_AUC -0.1584186333
+dangle_left_AUG 0.1049449411
+dangle_left_AUU 0.05887206858
+dangle_left_CAA 0.1688104162
+dangle_left_CAC -0.03515655397
+dangle_left_CAG 0.004093977101
+dangle_left_CAU -0.05951539547
+dangle_left_CCA -0.0227679388
+dangle_left_CCC -0.01248555321
+dangle_left_CCG -0.01867105725
+dangle_left_CCU -0.01003146169
+dangle_left_CGA 0.135601271
+dangle_left_CGC 0.07220319428
+dangle_left_CGG -0.05993462651
+dangle_left_CGU -0.1386561816
+dangle_left_CUA 0.01929374685
+dangle_left_CUC -0.0121379923
+dangle_left_CUG 0.02490464873
+dangle_left_CUU -0.04542963587
+dangle_left_GAA 0.03369772989
+dangle_left_GAC -0.02764565386
+dangle_left_GAG -0.05575621631
+dangle_left_GAU -0.02556515177
+dangle_left_GCA -0.03311971263
+dangle_left_GCC -0.03921251092
+dangle_left_GCG 0.155539817
+dangle_left_GCU -0.1536314435
+dangle_left_GGA -0.02420162504
+dangle_left_GGC 0.03626403997
+dangle_left_GGG -0.01007514186
+dangle_left_GGU -0.02169863834
+dangle_left_GUA -0.01352068634
+dangle_left_GUC -0.08374768846
+dangle_left_GUG -0.004632326489
+dangle_left_GUU 0.04715719551
+dangle_left_UAA 0.05907682094
+dangle_left_UAC -0.001278066597
+dangle_left_UAG -0.1736001705
+dangle_left_UAU -0.1902719183
+dangle_left_UCA 0.00215982961
+dangle_left_UCC -0.01234608674
+dangle_left_UCG -0.0206367473
+dangle_left_UCU 0.007781429081
+dangle_left_UGA -0.0664420362
+dangle_left_UGC -0.02389170256
+dangle_left_UGG 0.15745342
+dangle_left_UGU -0.2585948344
+dangle_left_UUA 0.07429357557
+dangle_left_UUC -0.03175447289
+dangle_left_UUG -0.06579589475
+dangle_left_UUU 0.01213199242
+dangle_right_AAA -0.0143044921
+dangle_right_AAC 0.04831926343
+dangle_right_AAG -0.05126763791
+dangle_right_AAU -0.01037013332
+dangle_right_ACA -0.01743102026
+dangle_right_ACC -0.03150531944
+dangle_right_ACG -0.01372262757
+dangle_right_ACU -0.006550228968
+dangle_right_AGA -0.002106495322
+dangle_right_AGC 0.03122820169
+dangle_right_AGG -0.02073876328
+dangle_right_AGU 0.07957971108
+dangle_right_AUA 0.1347319363
+dangle_right_AUC -0.08134494835
+dangle_right_AUG 0.02017547447
+dangle_right_AUU -0.2190612231
+dangle_right_CAA 0.06980706882
+dangle_right_CAC -0.00397133095
+dangle_right_CAG 0.003180980761
+dangle_right_CAU 0.003885266667
+dangle_right_CCA -0.03507234721
+dangle_right_CCC -0.004352404793
+dangle_right_CCG 0.004283786874
+dangle_right_CCU -0.03083811358
+dangle_right_CGA 0.05145563403
+dangle_right_CGC -0.06416589045
+dangle_right_CGG -0.07528397081
+dangle_right_CGU -0.2239441833
+dangle_right_CUA -0.04695534908
+dangle_right_CUC -0.03913439462
+dangle_right_CUG 0.03078720785
+dangle_right_CUU -0.0122169923
+dangle_right_GAA 0.02627632554
+dangle_right_GAC -0.04581478599
+dangle_right_GAG -0.05128913992
+dangle_right_GAU -0.0206673532
+dangle_right_GCA 0.01152777323
+dangle_right_GCC 0.06552675211
+dangle_right_GCG -0.1032959984
+dangle_right_GCU -0.1965599296
+dangle_right_GGA -0.04966311608
+dangle_right_GGC -0.01420877212
+dangle_right_GGG 0.05901570375
+dangle_right_GGU -0.01692533326
+dangle_right_GUA 0.1957498321
+dangle_right_GUC -0.1568392687
+dangle_right_GUG -0.08120000376
+dangle_right_GUU -0.1048127001
+dangle_right_UAA -0.1921468549
+dangle_right_UAC 0.06599085738
+dangle_right_UAG 0.0875506286
+dangle_right_UAU -0.2545133106
+dangle_right_UCA -0.04399379124
+dangle_right_UCC 0.05367978815
+dangle_right_UCG -0.04923752956
+dangle_right_UCU -0.01563493306
+dangle_right_UGA 0.04136216618
+dangle_right_UGC 0.1933397566
+dangle_right_UGG -0.2310365581
+dangle_right_UGU -0.2098640295
+dangle_right_UUA 0.01167557578
+dangle_right_UUC 0.02826979568
+dangle_right_UUG -0.01949416153
+dangle_right_UUU -0.04598079783
+external_unpaired -0.04912022704
+external_paired -0.02669537752
diff --git a/src/contrafold.params.profile b/src/contrafold.params.profile
new file mode 100644
index 0000000..55e92a3
--- /dev/null
+++ b/src/contrafold.params.profile
@@ -0,0 +1,708 @@
+base_pair_AA -3.403516504
+base_pair_AC -3.862432698
+base_pair_AG -3.503299585
+base_pair_AU 1.55371372
+base_pair_CC -2.720683579
+base_pair_CG 2.980236517
+base_pair_CU -3.15431334
+base_pair_GG -2.954515074
+base_pair_GU 0.3889900071
+base_pair_UU -3.330146848
+terminal_mismatch_AAAA -0.01904970265
+terminal_mismatch_AAAC -0.00171917935
+terminal_mismatch_AAAG 0.006356620738
+terminal_mismatch_AAAU 0.006867574624
+terminal_mismatch_AACA -0.005178300248
+terminal_mismatch_AACC -0.0004470034307
+terminal_mismatch_AACG 0.001951057499
+terminal_mismatch_AACU 0.00874769454
+terminal_mismatch_AAGA -0.007667069844
+terminal_mismatch_AAGC -0.004455086168
+terminal_mismatch_AAGG -0.003017906685
+terminal_mismatch_AAGU 0.0008812458173
+terminal_mismatch_AAUA -0.007924263765
+terminal_mismatch_AAUC 0.0007074860281
+terminal_mismatch_AAUG -0.005550468339
+terminal_mismatch_AAUU 0.0268638911
+terminal_mismatch_ACAA 0.01283774155
+terminal_mismatch_ACAC 0.001532725957
+terminal_mismatch_ACAG 0.005102035955
+terminal_mismatch_ACAU 0.009836723594
+terminal_mismatch_ACCA 0.01582784926
+terminal_mismatch_ACCC -0.003590171583
+terminal_mismatch_ACCG -0.00176008426
+terminal_mismatch_ACCU -0.002668964324
+terminal_mismatch_ACGA 0.008920693142
+terminal_mismatch_ACGC -0.001596747805
+terminal_mismatch_ACGG 0.01783069364
+terminal_mismatch_ACGU 0.01031251309
+terminal_mismatch_ACUA 0.008948457519
+terminal_mismatch_ACUC 0.001483118683
+terminal_mismatch_ACUG -0.007812721348
+terminal_mismatch_ACUU -0.003786602155
+terminal_mismatch_AGAA -0.01097479284
+terminal_mismatch_AGAC 0.008806985634
+terminal_mismatch_AGAG -0.003935476784
+terminal_mismatch_AGAU -0.01062123666
+terminal_mismatch_AGCA 0.001172715616
+terminal_mismatch_AGCC -0.001576525909
+terminal_mismatch_AGCG 0.004290528068
+terminal_mismatch_AGCU 0.003329183868
+terminal_mismatch_AGGA -0.009168502704
+terminal_mismatch_AGGC -0.002223655317
+terminal_mismatch_AGGG -0.001697505571
+terminal_mismatch_AGGU -0.002164665551
+terminal_mismatch_AGUA 0.005292106172
+terminal_mismatch_AGUC 0.005945069686
+terminal_mismatch_AGUG -0.009909498007
+terminal_mismatch_AGUU -0.005338025794
+terminal_mismatch_AUAA -0.133945917
+terminal_mismatch_AUAC 0.02666467332
+terminal_mismatch_AUAG -0.1274129643
+terminal_mismatch_AUAU -0.1310489237
+terminal_mismatch_AUCA 0.02888943484
+terminal_mismatch_AUCC -0.139105181
+terminal_mismatch_AUCG -0.01911874829
+terminal_mismatch_AUCU -0.0518206712
+terminal_mismatch_AUGA 0.03327202154
+terminal_mismatch_AUGC 0.005351360542
+terminal_mismatch_AUGG 0.05716183472
+terminal_mismatch_AUGU -0.04484004378
+terminal_mismatch_AUUA 0.002769299783
+terminal_mismatch_AUUC -0.01750335543
+terminal_mismatch_AUUG -0.03002986312
+terminal_mismatch_AUUU 0.1241301829
+terminal_mismatch_CAAA -0.007741274346
+terminal_mismatch_CAAC -0.01113526737
+terminal_mismatch_CAAG 0.01063378712
+terminal_mismatch_CAAU -0.001203223422
+terminal_mismatch_CACA 0.001306282015
+terminal_mismatch_CACC 0.003551482589
+terminal_mismatch_CACG 0.01610024043
+terminal_mismatch_CACU -0.0006937631325
+terminal_mismatch_CAGA -0.00833709303
+terminal_mismatch_CAGC -0.008163367969
+terminal_mismatch_CAGG 0.000916138201
+terminal_mismatch_CAGU -0.003732567462
+terminal_mismatch_CAUA -0.002506289552
+terminal_mismatch_CAUC 0.01170235943
+terminal_mismatch_CAUG -0.0007243619174
+terminal_mismatch_CAUU 0.01849755925
+terminal_mismatch_CCAA -0.00400912659
+terminal_mismatch_CCAC -0.009622061283
+terminal_mismatch_CCAG 0.005033557866
+terminal_mismatch_CCAU -0.0003158384523
+terminal_mismatch_CCCA -0.000384390207
+terminal_mismatch_CCCC -0.001815581833
+terminal_mismatch_CCCG -0.009696141122
+terminal_mismatch_CCCU 0.006271185798
+terminal_mismatch_CCGA -0.003471419719
+terminal_mismatch_CCGC -0.001642766049
+terminal_mismatch_CCGG 0.007719178216
+terminal_mismatch_CCGU 0.0004860521455
+terminal_mismatch_CCUA 0.003356420707
+terminal_mismatch_CCUC -0.002062876824
+terminal_mismatch_CCUG -0.002847608394
+terminal_mismatch_CCUU -0.003059392051
+terminal_mismatch_CGAA 0.1257942724
+terminal_mismatch_CGAC -0.09090938709
+terminal_mismatch_CGAG -0.1530280212
+terminal_mismatch_CGAU -0.07397889602
+terminal_mismatch_CGCA -0.05019618903
+terminal_mismatch_CGCC -0.05982744949
+terminal_mismatch_CGCG 0.0280456704
+terminal_mismatch_CGCU 0.07792988472
+terminal_mismatch_CGGA -0.09212300878
+terminal_mismatch_CGGC -0.1428512254
+terminal_mismatch_CGGG 0.07800688961
+terminal_mismatch_CGGU -0.1724080214
+terminal_mismatch_CGUA -0.1867967474
+terminal_mismatch_CGUC -0.02773657641
+terminal_mismatch_CGUG -0.1486660582
+terminal_mismatch_CGUU 0.1957108536
+terminal_mismatch_CUAA -0.01255252954
+terminal_mismatch_CUAC -0.002874975485
+terminal_mismatch_CUAG -0.0002115997942
+terminal_mismatch_CUAU 0.001356316347
+terminal_mismatch_CUCA -0.002328472154
+terminal_mismatch_CUCC -0.002560410815
+terminal_mismatch_CUCG 0.0004526657554
+terminal_mismatch_CUCU 0.004176132057
+terminal_mismatch_CUGA -2.603859596e-05
+terminal_mismatch_CUGC 0.01024095619
+terminal_mismatch_CUGG -0.006260918446
+terminal_mismatch_CUGU -0.002439898104
+terminal_mismatch_CUUA -0.001507656597
+terminal_mismatch_CUUC -0.008256890902
+terminal_mismatch_CUUG 0.0006283881029
+terminal_mismatch_CUUU -0.007735637532
+terminal_mismatch_GAAA 0.001129550339
+terminal_mismatch_GAAC 0.02345002039
+terminal_mismatch_GAAG -0.00676809037
+terminal_mismatch_GAAU 0.004688453632
+terminal_mismatch_GACA -0.004052966162
+terminal_mismatch_GACC 0.001412062596
+terminal_mismatch_GACG 0.005366585871
+terminal_mismatch_GACU -0.002973731173
+terminal_mismatch_GAGA 0.006028218368
+terminal_mismatch_GAGC 0.00147822049
+terminal_mismatch_GAGG -0.001869123533
+terminal_mismatch_GAGU 0.001653985367
+terminal_mismatch_GAUA -0.002159857187
+terminal_mismatch_GAUC -0.005951368194
+terminal_mismatch_GAUG 0.001225245586
+terminal_mismatch_GAUU -0.003480004281
+terminal_mismatch_GCAA -0.07896130717
+terminal_mismatch_GCAC -0.2472491533
+terminal_mismatch_GCAG -0.0542143859
+terminal_mismatch_GCAU -0.1537956801
+terminal_mismatch_GCCA -0.2045648428
+terminal_mismatch_GCCC -0.1627529082
+terminal_mismatch_GCCG -0.1987055124
+terminal_mismatch_GCCU 0.01175816564
+terminal_mismatch_GCGA -0.03508500143
+terminal_mismatch_GCGC 0.02900176709
+terminal_mismatch_GCGG 0.12264211
+terminal_mismatch_GCGU -0.2183733722
+terminal_mismatch_GCUA -0.1588111192
+terminal_mismatch_GCUC 0.01092596754
+terminal_mismatch_GCUG 0.2014929454
+terminal_mismatch_GCUU 0.1266862807
+terminal_mismatch_GGAA 0.01192290232
+terminal_mismatch_GGAC -0.0009572411777
+terminal_mismatch_GGAG -0.004135025659
+terminal_mismatch_GGAU -0.004845408792
+terminal_mismatch_GGCA -0.001607902434
+terminal_mismatch_GGCC -0.005317240945
+terminal_mismatch_GGCG -0.004503156102
+terminal_mismatch_GGCU -0.01171637414
+terminal_mismatch_GGGA -0.009493388427
+terminal_mismatch_GGGC 0.005465305634
+terminal_mismatch_GGGG 0.0001936640516
+terminal_mismatch_GGGU 0.002182498107
+terminal_mismatch_GGUA 0.002660674381
+terminal_mismatch_GGUC -0.004200489991
+terminal_mismatch_GGUG 0.008943375152
+terminal_mismatch_GGUU -0.002894025063
+terminal_mismatch_GUAA -0.08514923274
+terminal_mismatch_GUAC -0.03475473344
+terminal_mismatch_GUAG -0.04382034099
+terminal_mismatch_GUAU -0.09602988776
+terminal_mismatch_GUCA 0.1353368763
+terminal_mismatch_GUCC -0.01651385023
+terminal_mismatch_GUCG -0.0149790406
+terminal_mismatch_GUCU -0.0008423759273
+terminal_mismatch_GUGA -0.09187091731
+terminal_mismatch_GUGC -0.04089469373
+terminal_mismatch_GUGG -0.02815524114
+terminal_mismatch_GUGU -0.04522288593
+terminal_mismatch_GUUA -0.02444865752
+terminal_mismatch_GUUC 0.04690078148
+terminal_mismatch_GUUG -0.06055497218
+terminal_mismatch_GUUU -0.03537665706
+terminal_mismatch_UAAA 0.008673507679
+terminal_mismatch_UAAC -0.08593833572
+terminal_mismatch_UAAG -0.05055088176
+terminal_mismatch_UAAU -0.07986703842
+terminal_mismatch_UACA -0.02127756772
+terminal_mismatch_UACC -0.1184051629
+terminal_mismatch_UACG 0.04655718888
+terminal_mismatch_UACU -0.03605388855
+terminal_mismatch_UAGA -0.07960278009
+terminal_mismatch_UAGC -0.02617770477
+terminal_mismatch_UAGG -0.01086805047
+terminal_mismatch_UAGU -0.07833794555
+terminal_mismatch_UAUA -0.08146724852
+terminal_mismatch_UAUC -0.0197234843
+terminal_mismatch_UAUG -0.07955118333
+terminal_mismatch_UAUU 0.08109358027
+terminal_mismatch_UCAA -0.0006883735319
+terminal_mismatch_UCAC -0.005403061813
+terminal_mismatch_UCAG -0.002246329499
+terminal_mismatch_UCAU -0.002443610985
+terminal_mismatch_UCCA 0.002483869597
+terminal_mismatch_UCCC -0.00295603996
+terminal_mismatch_UCCG 0.002005734214
+terminal_mismatch_UCCU 0.0113499652
+terminal_mismatch_UCGA -0.004600799269
+terminal_mismatch_UCGC 0.002228500089
+terminal_mismatch_UCGG -0.001815960518
+terminal_mismatch_UCGU -0.002488681926
+terminal_mismatch_UCUA -0.003119396207
+terminal_mismatch_UCUC -0.00033559435
+terminal_mismatch_UCUG -0.002712057975
+terminal_mismatch_UCUU -0.001060199394
+terminal_mismatch_UGAA -0.02929531066
+terminal_mismatch_UGAC 0.08182989129
+terminal_mismatch_UGAG 0.08038292615
+terminal_mismatch_UGAU -0.05853769779
+terminal_mismatch_UGCA 0.005149870556
+terminal_mismatch_UGCC -0.0003422258805
+terminal_mismatch_UGCG 0.0416446163
+terminal_mismatch_UGCU -0.01773701562
+terminal_mismatch_UGGA 0.08194601547
+terminal_mismatch_UGGC -0.01172291947
+terminal_mismatch_UGGG -0.07717791497
+terminal_mismatch_UGGU -0.1379654806
+terminal_mismatch_UGUA -0.04444127245
+terminal_mismatch_UGUC 0.04114673413
+terminal_mismatch_UGUG -0.07033025997
+terminal_mismatch_UGUU 0.08620481104
+terminal_mismatch_UUAA 0.007881276297
+terminal_mismatch_UUAC 0.003143096691
+terminal_mismatch_UUAG -0.004170043288
+terminal_mismatch_UUAU 0.0007948167433
+terminal_mismatch_UUCA -0.01262934341
+terminal_mismatch_UUCC -0.00703034962
+terminal_mismatch_UUCG 0.003995517129
+terminal_mismatch_UUCU 0.01405893229
+terminal_mismatch_UUGA -0.004213478999
+terminal_mismatch_UUGC 0.002645455816
+terminal_mismatch_UUGG 0.006500918067
+terminal_mismatch_UUGU 0.004717395272
+terminal_mismatch_UUUA 0.02046924368
+terminal_mismatch_UUUC -0.006520735394
+terminal_mismatch_UUUG -0.003545775749
+terminal_mismatch_UUUU -0.0099112907
+hairpin_length_at_least_0 -11.27454044
+hairpin_length_at_least_1 -0.7049874787
+hairpin_length_at_least_2 1.65840785
+hairpin_length_at_least_3 2.548888739
+hairpin_length_at_least_4 0.9944638313
+hairpin_length_at_least_5 0.1114930516
+hairpin_length_at_least_6 -0.08164018938
+hairpin_length_at_least_7 -0.2314032079
+hairpin_length_at_least_8 0.08419016627
+hairpin_length_at_least_9 -0.4135353798
+hairpin_length_at_least_10 -0.05450407482
+hairpin_length_at_least_11 0.1352513403
+hairpin_length_at_least_12 0.05005382422
+hairpin_length_at_least_13 -0.2420180411
+hairpin_length_at_least_14 -0.1932403432
+hairpin_length_at_least_15 -0.4459876615
+hairpin_length_at_least_16 0.07401798451
+hairpin_length_at_least_17 1.145386731
+hairpin_length_at_least_18 -0.15527476
+hairpin_length_at_least_19 -1.122154071
+hairpin_length_at_least_20 -0.459612545
+hairpin_length_at_least_21 0.8845817922
+hairpin_length_at_least_22 -0.479883715
+hairpin_length_at_least_23 0.2232373651
+hairpin_length_at_least_24 0.352874964
+hairpin_length_at_least_25 -0.6952888987
+hairpin_length_at_least_26 -0.12415393
+hairpin_length_at_least_27 -0.2405423305
+hairpin_length_at_least_28 -0.009526529813
+hairpin_length_at_least_29 0.3154128371
+hairpin_length_at_least_30 0.9702676486
+internal_explicit_1_1 -0.100214157
+internal_explicit_1_2 -0.1388267427
+internal_explicit_1_3 -0.0690085675
+internal_explicit_1_4 -0.07626927213
+internal_explicit_2_2 0.1272754652
+internal_explicit_2_3 0.08452336463
+internal_explicit_2_4 0.1113771719
+internal_explicit_3_3 0.03720826788
+internal_explicit_3_4 0.1607708252
+internal_explicit_4_4 -0.04944809363
+bulge_length_at_least_1 -4.258153757
+bulge_length_at_least_2 -1.816492919
+bulge_length_at_least_3 -0.3008950692
+bulge_length_at_least_4 -1.037575805
+bulge_length_at_least_5 -1.054161193
+bulge_length_at_least_6 -0.7583329783
+bulge_length_at_least_7 0.06164215921
+bulge_length_at_least_8 0.6588945757
+bulge_length_at_least_9 0.8794927911
+bulge_length_at_least_10 -0.1800467105
+bulge_length_at_least_11 -0.8482388288
+bulge_length_at_least_12 -0.4888415235
+bulge_length_at_least_13 -0.07405856845
+bulge_length_at_least_14 0.1024246688
+bulge_length_at_least_15 -0.3298852888
+bulge_length_at_least_16 -0.1539144958
+bulge_length_at_least_17 -0.06871760118
+bulge_length_at_least_18 -0.008601887319
+bulge_length_at_least_19 0.04936216855
+bulge_length_at_least_20 0.09539329507
+bulge_length_at_least_21 0.1569528946
+bulge_length_at_least_22 -0.00690463143
+bulge_length_at_least_23 0.03502788268
+bulge_length_at_least_24 0.1004819585
+bulge_length_at_least_25 0.131705365
+bulge_length_at_least_26 0.1748203998
+bulge_length_at_least_27 0.2117574286
+bulge_length_at_least_28 0.0800126973
+bulge_length_at_least_29 -0.0375842188
+bulge_length_at_least_30 -0.01536004101
+internal_length_at_least_2 -2.578541102
+internal_length_at_least_3 -1.732113021
+internal_length_at_least_4 -0.4426925225
+internal_length_at_least_5 -0.4436652728
+internal_length_at_least_6 -0.4464306446
+internal_length_at_least_7 0.5028354199
+internal_length_at_least_8 -0.254430029
+internal_length_at_least_9 -0.3709535245
+internal_length_at_least_10 -0.1894424283
+internal_length_at_least_11 -0.1533676121
+internal_length_at_least_12 0.05007542403
+internal_length_at_least_13 -0.4638130633
+internal_length_at_least_14 0.3620488764
+internal_length_at_least_15 -0.3155004838
+internal_length_at_least_16 0.1757922455
+internal_length_at_least_17 -0.1475837316
+internal_length_at_least_18 -0.3435837159
+internal_length_at_least_19 -0.2127962066
+internal_length_at_least_20 0.3934874237
+internal_length_at_least_21 -0.128500225
+internal_length_at_least_22 -0.162627261
+internal_length_at_least_23 0.03725117415
+internal_length_at_least_24 0.2345360224
+internal_length_at_least_25 -0.4777910787
+internal_length_at_least_26 -0.218498122
+internal_length_at_least_27 0.003525297415
+internal_length_at_least_28 0.1565818883
+internal_length_at_least_29 0.2632979177
+internal_length_at_least_30 -0.08663258666
+internal_symmetric_length_at_least_1 0.01246041311
+internal_symmetric_length_at_least_2 0.09042993953
+internal_symmetric_length_at_least_3 0.006257055853
+internal_symmetric_length_at_least_4 -0.01830782773
+internal_symmetric_length_at_least_5 0.01861494979
+internal_symmetric_length_at_least_6 -0.07590502612
+internal_symmetric_length_at_least_7 -0.04549833943
+internal_symmetric_length_at_least_8 -0.06124585502
+internal_symmetric_length_at_least_9 -0.0554688099
+internal_symmetric_length_at_least_10 -0.03643553967
+internal_symmetric_length_at_least_11 -0.02835526064
+internal_symmetric_length_at_least_12 -0.01997837824
+internal_symmetric_length_at_least_13 -0.006504409801
+internal_symmetric_length_at_least_14 -0.01020991605
+internal_symmetric_length_at_least_15 -0.00664329209
+internal_asymmetry_at_least_1 -0.3398337864
+internal_asymmetry_at_least_2 -0.2327646341
+internal_asymmetry_at_least_3 -0.2742544636
+internal_asymmetry_at_least_4 -0.2714034775
+internal_asymmetry_at_least_5 -0.1066829219
+internal_asymmetry_at_least_6 -0.07034134421
+internal_asymmetry_at_least_7 -0.1077694713
+internal_asymmetry_at_least_8 -0.1191531781
+internal_asymmetry_at_least_9 -0.11919056
+internal_asymmetry_at_least_10 -0.06019955797
+internal_asymmetry_at_least_11 -0.07287171151
+internal_asymmetry_at_least_12 -0.03864921778
+internal_asymmetry_at_least_13 -0.1195306559
+internal_asymmetry_at_least_14 -0.07461967
+internal_asymmetry_at_least_15 -0.06945050817
+internal_asymmetry_at_least_16 -0.05012396434
+internal_asymmetry_at_least_17 -0.03861141122
+internal_asymmetry_at_least_18 -0.02974023788
+internal_asymmetry_at_least_19 -0.009502340874
+internal_asymmetry_at_least_20 0.0008804241189
+internal_asymmetry_at_least_21 0.008010169511
+internal_asymmetry_at_least_22 0.01194670322
+internal_asymmetry_at_least_23 0.01490318782
+internal_asymmetry_at_least_24 0.01710155162
+internal_asymmetry_at_least_25 0.01876858833
+internal_asymmetry_at_least_26 0.02007099557
+internal_asymmetry_at_least_27 0.01705426821
+internal_asymmetry_at_least_28 0.0192648588
+bulge_0x1_nucleotides_A -0.1607307825
+bulge_0x1_nucleotides_C -0.143024997
+bulge_0x1_nucleotides_G -0.1132349177
+bulge_0x1_nucleotides_U -0.07388844373
+internal_1x1_nucleotides_AA -0.746330951
+internal_1x1_nucleotides_AC -0.5734758932
+internal_1x1_nucleotides_AG -0.7056134096
+internal_1x1_nucleotides_AU 0.5549172931
+internal_1x1_nucleotides_CC -0.7817824419
+internal_1x1_nucleotides_CG 1.861370626
+internal_1x1_nucleotides_CU -0.8061017588
+internal_1x1_nucleotides_GG -0.3909273675
+internal_1x1_nucleotides_GU -0.2337333892
+internal_1x1_nucleotides_UU -0.5427783767
+helix_stacking_AAAA 0.001166516955
+helix_stacking_AAAC 0.002640627995
+helix_stacking_AAAG -0.001966788093
+helix_stacking_AAAU -0.005569153322
+helix_stacking_AACA -0.003746246574
+helix_stacking_AACC 0.002250387165
+helix_stacking_AACG -0.005825121723
+helix_stacking_AACU 0.002419278275
+helix_stacking_AAGA 0.002935327081
+helix_stacking_AAGC -0.01784828725
+helix_stacking_AAGG 0.0002872312893
+helix_stacking_AAGU 0.001398737276
+helix_stacking_AAUA -0.02203838213
+helix_stacking_AAUC 0.0008496010418
+helix_stacking_AAUG 0.008215304107
+helix_stacking_AAUU -0.001680255351
+helix_stacking_ACAC 0.0003182844922
+helix_stacking_ACAG 0.002331586969
+helix_stacking_ACAU -0.01155958526
+helix_stacking_ACCA 0.002139541531
+helix_stacking_ACCC -0.0001087924628
+helix_stacking_ACCG -0.0155477107
+helix_stacking_ACCU 0.001630003086
+helix_stacking_ACGA 0.001888597783
+helix_stacking_ACGC -0.02743524558
+helix_stacking_ACGG 0.0003814573155
+helix_stacking_ACGU -0.001830555874
+helix_stacking_ACUA -0.009872353501
+helix_stacking_ACUC 0.0005562193645
+helix_stacking_ACUG -0.003445097333
+helix_stacking_ACUU 0.003371835915
+helix_stacking_AGAC 0.002170897985
+helix_stacking_AGAG 0.004000321243
+helix_stacking_AGAU 0.001368212691
+helix_stacking_AGCC 0.001572834204
+helix_stacking_AGCG -0.01442489314
+helix_stacking_AGCU 0.0008485224385
+helix_stacking_AGGA -0.0003380933883
+helix_stacking_AGGC -0.002325688802
+helix_stacking_AGGG 0.001605197321
+helix_stacking_AGGU 0.001190540044
+helix_stacking_AGUA -0.004320452863
+helix_stacking_AGUC 1.235074087e-05
+helix_stacking_AGUG 0.002492314277
+helix_stacking_AGUU 0.0001247494973
+helix_stacking_AUAC -0.02159667672
+helix_stacking_AUAG -0.00352554765
+helix_stacking_AUAU 0.05180273901
+helix_stacking_AUCC -0.005022903864
+helix_stacking_AUCG 0.1459431996
+helix_stacking_AUCU 0.003399080434
+helix_stacking_AUGC 0.06120942172
+helix_stacking_AUGG -0.003101215038
+helix_stacking_AUGU -0.1303183556
+helix_stacking_AUUA 0.009453478524
+helix_stacking_AUUC 0.0004138046651
+helix_stacking_AUUG -0.01465500338
+helix_stacking_AUUU -0.02786985724
+helix_stacking_CAAC 0.004635911403
+helix_stacking_CAAG 0.002682963005
+helix_stacking_CAAU 0.004509195205
+helix_stacking_CACC 0.001485352303
+helix_stacking_CACG -0.02236581278
+helix_stacking_CACU -0.0004636246858
+helix_stacking_CAGC -0.03152878951
+helix_stacking_CAGG -0.002140716215
+helix_stacking_CAGU 0.002434240103
+helix_stacking_CAUC -0.004548166551
+helix_stacking_CAUG 0.004684994508
+helix_stacking_CAUU 0.001206644417
+helix_stacking_CCAG -0.00241355473
+helix_stacking_CCAU -0.01439924213
+helix_stacking_CCCC -0.0002994047694
+helix_stacking_CCCG -0.006806145811
+helix_stacking_CCCU -0.0009760748952
+helix_stacking_CCGC -0.00933609321
+helix_stacking_CCGG 0.0003024764046
+helix_stacking_CCGU -0.008054339909
+helix_stacking_CCUC 0.002671500763
+helix_stacking_CCUG -0.002291596204
+helix_stacking_CCUU 0.002098533237
+helix_stacking_CGAG -0.0002736467059
+helix_stacking_CGAU -0.03616102611
+helix_stacking_CGCG 0.250088377
+helix_stacking_CGCU -0.01454879146
+helix_stacking_CGGC 0.007850673173
+helix_stacking_CGGG -0.006491144276
+helix_stacking_CGGU -0.1026917617
+helix_stacking_CGUC -0.005086747975
+helix_stacking_CGUG 0.06362037742
+helix_stacking_CGUU -0.03865409317
+helix_stacking_CUAG -0.0002186996893
+helix_stacking_CUAU -0.01719550423
+helix_stacking_CUCG 0.0001908407612
+helix_stacking_CUCU 0.003964705559
+helix_stacking_CUGG 0.004913454503
+helix_stacking_CUGU 0.004097747681
+helix_stacking_CUUC 0.006232678623
+helix_stacking_CUUG -0.005277009052
+helix_stacking_CUUU 0.008478246413
+helix_stacking_GAAG 0.003957736664
+helix_stacking_GAAU -0.004151259479
+helix_stacking_GACG -0.02363719758
+helix_stacking_GACU 0.001072088742
+helix_stacking_GAGG 0.005896380229
+helix_stacking_GAGU -0.003343022298
+helix_stacking_GAUG 0.004563031462
+helix_stacking_GAUU 0.001197771843
+helix_stacking_GCAU 0.1377929899
+helix_stacking_GCCG -0.07321146825
+helix_stacking_GCCU -0.006184992889
+helix_stacking_GCGG -0.02978685293
+helix_stacking_GCGU 0.03148823858
+helix_stacking_GCUG 0.08533193713
+helix_stacking_GCUU -0.01165246231
+helix_stacking_GGAU 0.00278144632
+helix_stacking_GGCU 0.002483233682
+helix_stacking_GGGG 0.00206269238
+helix_stacking_GGGU -0.001753270937
+helix_stacking_GGUG -0.003032109534
+helix_stacking_GGUU 0.004603269298
+helix_stacking_GUAU 0.004142264094
+helix_stacking_GUCU -0.005597156097
+helix_stacking_GUGU -0.004856561393
+helix_stacking_GUUG -0.01114103029
+helix_stacking_GUUU 0.007214731297
+helix_stacking_UAAU -0.02305434503
+helix_stacking_UACU 0.01472008304
+helix_stacking_UAGU 0.1366622997
+helix_stacking_UAUU -0.0009717172193
+helix_stacking_UCCU 0.001034401284
+helix_stacking_UCGU -0.007302085369
+helix_stacking_UCUU 0.007617286222
+helix_stacking_UGGU -0.04287023482
+helix_stacking_UGUU -0.003793070343
+helix_stacking_UUUU 0.008697148972
+helix_closing_AA 0.01112907317
+helix_closing_AC 0.02702474303
+helix_closing_AG -0.002842332091
+helix_closing_AU 0.03901896238
+helix_closing_CA 0.02117014154
+helix_closing_CC -0.001354586062
+helix_closing_CG 0.01765975296
+helix_closing_CU -0.004887034652
+helix_closing_GA 0.002451173347
+helix_closing_GC -0.1668369921
+helix_closing_GG -0.004529632936
+helix_closing_GU -0.03706297394
+helix_closing_UA -0.07476031148
+helix_closing_UC -0.004251593783
+helix_closing_UG 0.1652654756
+helix_closing_UU 0.01240189133
+multi_base -1.743095925
+multi_unpaired -0.04301817108
+multi_paired -1.991318697
+dangle_left_AAA 0.005948458627
+dangle_left_AAC -0.004933634398
+dangle_left_AAG 0.02041455689
+dangle_left_AAU -0.01282665112
+dangle_left_ACA -0.002708719105
+dangle_left_ACC 0.01190995264
+dangle_left_ACG -0.004359427094
+dangle_left_ACU -0.001459221394
+dangle_left_AGA 0.002031856768
+dangle_left_AGC -0.004641636381
+dangle_left_AGG 0.002595685998
+dangle_left_AGU 0.0004582145506
+dangle_left_AUA -0.03995502829
+dangle_left_AUC -0.03933324463
+dangle_left_AUG 0.008497626198
+dangle_left_AUU 0.02854093626
+dangle_left_CAA -0.0008038517881
+dangle_left_CAC -0.001828561915
+dangle_left_CAG 0.004305690221
+dangle_left_CAU -0.0006153585637
+dangle_left_CCA -0.008511989971
+dangle_left_CCC 0.00882250202
+dangle_left_CCG -0.002863744141
+dangle_left_CCU -0.00308598011
+dangle_left_CGA 0.1969440519
+dangle_left_CGC -0.05597758517
+dangle_left_CGG -0.07167283418
+dangle_left_CGU -0.06014062109
+dangle_left_CUA 0.008756986609
+dangle_left_CUC -0.001769071646
+dangle_left_CUG -7.177615665e-05
+dangle_left_CUU -0.005352487069
+dangle_left_GAA 0.001340321821
+dangle_left_GAC -0.001450023194
+dangle_left_GAG -0.004492228298
+dangle_left_GAU -0.004931844598
+dangle_left_GCA -0.02814467781
+dangle_left_GCC -0.1196813055
+dangle_left_GCG 0.02760933335
+dangle_left_GCU -0.1151969255
+dangle_left_GGA 0.006786363176
+dangle_left_GGC -0.00778984838
+dangle_left_GGG -0.002536557236
+dangle_left_GGU -0.002259744556
+dangle_left_GUA 0.01776118039
+dangle_left_GUC 0.01870049588
+dangle_left_GUG -0.001531917163
+dangle_left_GUU 0.0173535396
+dangle_left_UAA 0.07872204824
+dangle_left_UAC 0.03453970753
+dangle_left_UAG -0.0912635895
+dangle_left_UAU -0.1141568474
+dangle_left_UCA -0.001542188356
+dangle_left_UCC 0.0001140509121
+dangle_left_UCG -0.006496398844
+dangle_left_UCU -0.0006285922569
+dangle_left_UGA 0.06298230028
+dangle_left_UGC -0.04145486281
+dangle_left_UGG 0.1677199451
+dangle_left_UGU -0.05515100259
+dangle_left_UUA 0.01188133916
+dangle_left_UUC -0.005791934709
+dangle_left_UUG -0.009911314024
+dangle_left_UUU 0.002964834408
+dangle_right_AAA -0.008652668487
+dangle_right_AAC 0.006510064597
+dangle_right_AAG -0.002000829541
+dangle_right_AAU 0.0005196107101
+dangle_right_ACA -0.01104633436
+dangle_right_ACC 0.003472792341
+dangle_right_ACG 0.004701113106
+dangle_right_ACU 0.002358174685
+dangle_right_AGA 0.008485654228
+dangle_right_AGC -0.0008570778903
+dangle_right_AGG -0.002019488661
+dangle_right_AGU -0.003861597426
+dangle_right_AUA 0.09553023142
+dangle_right_AUC -0.04069216123
+dangle_right_AUG 0.0204608955
+dangle_right_AUU -0.1201069678
+dangle_right_CAA 0.001994214581
+dangle_right_CAC 0.002212520881
+dangle_right_CAG -0.001730096395
+dangle_right_CAU 0.001679929093
+dangle_right_CCA 0.00742644786
+dangle_right_CCC -0.004122247133
+dangle_right_CCG -0.0004970750237
+dangle_right_CCU -0.005940314564
+dangle_right_CGA 0.03496663954
+dangle_right_CGC 0.008493434784
+dangle_right_CGG 0.02187193828
+dangle_right_CGU -0.1189782197
+dangle_right_CUA 8.363852213e-06
+dangle_right_CUC -0.004354871147
+dangle_right_CUG 0.002273849752
+dangle_right_CUU -0.003594518195
+dangle_right_GAA -0.002619444421
+dangle_right_GAC -0.006356273177
+dangle_right_GAG -0.005979019
+dangle_right_GAU 0.001594382639
+dangle_right_GCA 0.02299918126
+dangle_right_GCC -0.03739676075
+dangle_right_GCG -0.1860843714
+dangle_right_GCU -0.03762747579
+dangle_right_GGA -0.006192272441
+dangle_right_GGC 0.007767462616
+dangle_right_GGG -0.006800439465
+dangle_right_GGU -0.003501642696
+dangle_right_GUA 0.06866928967
+dangle_right_GUC -0.02231872052
+dangle_right_GUG 0.004417236043
+dangle_right_GUU -0.04199756619
+dangle_right_UAA -0.115257733
+dangle_right_UAC 0.0430268951
+dangle_right_UAG 0.1803824889
+dangle_right_UAU -0.1840398136
+dangle_right_UCA -0.00446716353
+dangle_right_UCC 0.005541930581
+dangle_right_UCG -0.003985196114
+dangle_right_UCU -0.005178007618
+dangle_right_UGA 0.156927665
+dangle_right_UGC 0.08885932329
+dangle_right_UGG -0.1080537799
+dangle_right_UGU 0.03131806836
+dangle_right_UUA 0.003855960903
+dangle_right_UUC 0.006581081943
+dangle_right_UUG -0.006346979344
+dangle_right_UUU -0.01098815016
+external_unpaired 0.0388346167
+external_paired -0.2577193192
diff --git a/src/roc_area.pl b/src/roc_area.pl
new file mode 100755
index 0000000..eec9b13
--- /dev/null
+++ b/src/roc_area.pl
@@ -0,0 +1,171 @@
+#!/usr/bin/perl
+
+use strict;
+use Cwd qw(realpath);
+use File::Basename;
+
+# Globals
+
+my $SCORE_DIRECTORY = dirname(realpath($0))."/score_directory.pl";
+my @dir_names = ();
+
+############################################################
+# Version()
+#
+# Display program version.
+############################################################
+
+sub Version
+{
+ print STDERR "ROCarea version 1.00 - Compute ROC area for a set of RNA secondary structure predictions\n";
+ print STDERR "\n";
+ print STDERR "Written by Chuong B. Do\n";
+ exit(0);
+}
+
+############################################################
+# Usage()
+#
+# Print program usage.
+############################################################
+
+sub Usage
+{
+ print STDERR "\n";
+ print STDERR "Usage: ".basename(realpath($0))." [protein|rna] [OPTION]... REF_DIR TEST_DIR\n";
+ print STDERR "\n";
+ print STDERR " where [OPTION]... is a list of zero or more optional arguments\n";
+ print STDERR " REF_DIR is the name of the reference directory\n";
+ print STDERR " TEST_DIR is the name of the test directory\n";
+ print STDERR "\n";
+ print STDERR "Miscellaneous arguments:\n";
+ print STDERR " --version display program version information\n";
+ print STDERR "\n";
+ exit(1);
+}
+
+############################################################
+# CompareDirectoryNames()
+#
+# Compare two directory names for sorting.
+############################################################
+
+sub CompareDirectoryNames
+{
+ $a =~ /gamma=([.0-9]*)/;
+ my $gamma_a = $1;
+ $b =~ /gamma=([.0-9]*)/;
+ my $gamma_b = $1;
+
+ if ($gamma_a < $gamma_b) {
+ return -1;
+ } elsif ($gamma_a > $gamma_b) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+############################################################
+# ParseParameters()
+#
+# Parse program parameters.
+############################################################
+
+sub ParseParameters
+{
+ if (@ARGV < 3)
+ {
+ Usage();
+ }
+ @dir_names = ();
+
+ if ($ARGV[0] ne "protein" && $ARGV[0] ne "rna")
+ {
+ print STDERR "ERROR: First argument should be \"protein\" or \"rna\".\n";
+ exit(1);
+ }
+ else
+ {
+ $SCORE_DIRECTORY .= " ".$ARGV[0];
+ }
+
+ for (my $argno = 1; $argno < @ARGV; $argno++)
+ {
+ if ($ARGV[$argno] eq "--version")
+ {
+ Version();
+ }
+ else
+ {
+ push(@dir_names, $ARGV[$argno]);
+ }
+ }
+
+ if (@dir_names != 2)
+ {
+ print STDERR "ERROR: Incorrect number of directory names.\n";
+ exit(1);
+ }
+}
+
+############################################################
+# main()
+#
+# Main program.
+############################################################
+
+ParseParameters();
+
+my $ref_dir_name = $dir_names[0];
+my $test_dir_name = $dir_names[1];
+chomp(my @test_subdirs = `ls $test_dir_name`);
+
+my @sens = ();
+my @ppv = ();
+
+# compute score for each subdirectory
+
+for (my $i = 0; $i < @test_subdirs; $i++)
+{
+ print ".";
+ my @ret = `$SCORE_DIRECTORY $ref_dir_name $test_dir_name/$test_subdirs[$i]`;
+ $ret[0] =~ /sens=([e\.0-9+-]+); ppv=([e\.0-9+-]+)/;
+ push(@sens, $1);
+ push(@ppv, $2);
+}
+
+print "\n";
+
+# sort scores by increasing PPV (and decreasing sensitivity)
+
+for (my $i = 0; $i < @ppv; $i++)
+{
+ for (my $j = $i+1; $j < @ppv; $j++)
+ {
+ if ($ppv[$j] < $ppv[$i] || ($ppv[$j] == $ppv[$i] && $sens[$j] > $sens[$i]))
+ {
+ ($sens[$i], $sens[$j]) = ($sens[$j], $sens[$i]);
+ ($ppv[$i], $ppv[$j]) = ($ppv[$j], $ppv[$i]);
+ ($test_subdirs[$i], $test_subdirs[$j]) = ($test_subdirs[$j], $test_subdirs[$i]);
+ }
+ }
+}
+
+# compute ROC area
+
+my $area = 0;
+my $prev_sens = $sens[0];
+my $prev_ppv = 0;
+
+for (my $i = 0; $i < @ppv; $i++)
+{
+ $area += ($ppv[$i] - $prev_ppv) * ($prev_sens + $sens[$i]) / 2.0;
+ $prev_sens = $sens[$i];
+ $prev_ppv = $ppv[$i];
+
+ chomp(my $count = `ls $test_dir_name/$test_subdirs[$i] | wc | awk '{print \$1;}'`);
+ print "ref=$ref_dir_name; test=$test_dir_name/$test_subdirs[$i]; n=$count; sens=$sens[$i]; ppv=$ppv[$i];\n";
+}
+
+print "\nroc=$area;\n";
diff --git a/src/score_directory.pl b/src/score_directory.pl
new file mode 100755
index 0000000..10c26b6
--- /dev/null
+++ b/src/score_directory.pl
@@ -0,0 +1,147 @@
+#!/usr/bin/perl
+
+use strict;
+use Cwd qw(realpath);
+use File::Basename;
+
+# Globals
+
+my $SCORE_PREDICTION = dirname(realpath($0))."/score_prediction";
+my @dir_names = ();
+
+############################################################
+# Version()
+#
+# Display program version.
+############################################################
+
+sub Version
+{
+ print STDERR "ScoreDirectory version 1.00 - Compare two directories of RNA secondary structure predictions\n";
+ print STDERR "\n";
+ print STDERR "Written by Chuong B. Do\n";
+ exit(0);
+}
+
+############################################################
+# Usage()
+#
+# Print program usage.
+############################################################
+
+sub Usage
+{
+ print STDERR "\n";
+ print STDERR "Usage: ".basename(realpath($0))." [protein|rna] [OPTION]... REF_DIR TEST_DIR\n";
+ print STDERR "\n";
+ print STDERR " where [OPTION]... is a list of zero or more optional arguments\n";
+ print STDERR " REF_DIR is the name of the reference directory\n";
+ print STDERR " TEST_DIR is the name of the test directory\n";
+ print STDERR "\n";
+ print STDERR "Miscellaneous arguments:\n";
+ print STDERR " --version display program version information\n";
+ print STDERR "\n";
+ exit(1);
+}
+
+############################################################
+# ParseParameters()
+#
+# Parse program parameters.
+############################################################
+
+sub ParseParameters
+{
+ if (@ARGV < 3)
+ {
+ Usage();
+ }
+ @dir_names = ();
+
+ if ($ARGV[0] ne "protein" && $ARGV[0] ne "rna")
+ {
+ print STDERR "ERROR: First argument should be \"protein\" or \"rna\".\n";
+ exit(1);
+ }
+ else
+ {
+ $SCORE_PREDICTION .= " ".$ARGV[0];
+ }
+
+ for (my $argno = 1; $argno < @ARGV; $argno++)
+ {
+ if ($ARGV[$argno] eq "--version")
+ {
+ Version();
+ }
+ elsif ($ARGV[$argno] eq "--core")
+ {
+ $SCORE_PREDICTION .= " --core";
+ }
+ else
+ {
+ push(@dir_names, $ARGV[$argno]);
+ }
+ }
+
+ if (@dir_names != 2)
+ {
+ print STDERR "ERROR: Incorrect number of directory names.\n";
+ exit(1);
+ }
+}
+
+############################################################
+# main()
+#
+# Main program.
+############################################################
+
+ParseParameters();
+
+my $ref_dir_name = $dir_names[0];
+my $test_dir_name = $dir_names[1];
+
+my @ref_filenames = `ls $ref_dir_name`;
+my @test_filenames = `ls $test_dir_name`;
+
+my $count = 0;
+my $Q_total = 0;
+my $fM_total = 0;
+my $sens_total = 0;
+my $ppv_total = 0;
+
+foreach my $test_file (@test_filenames)
+{
+ chomp $test_file;
+
+ # check for reference file
+
+ if (-e "$ref_dir_name/$test_file")
+ {
+ my @ret = `$SCORE_PREDICTION $ref_dir_name/$test_file $test_dir_name/$test_file`;
+
+ # now, parse results from score_prediction
+
+ $ret[0] =~ /Q=([e\.0-9+-]+); fM=([e\.0-9+-]+); sens=([e\.0-9+-]+); ppv=([e\.0-9+-]+)/;
+
+ $count++;
+ $Q_total += $1;
+ $fM_total += $2;
+ $sens_total += $3;
+ $ppv_total += $4;
+ }
+ else
+ {
+ print STDERR "ERROR: Unable to find test file \"$test_file\" in reference directory.\n";
+ }
+}
+
+$Q_total /= $count;
+$fM_total /= $count;
+$sens_total /= $count;
+$ppv_total /= $count;
+
+# print results
+
+print "ref=$ref_dir_name; test=$test_dir_name; n=$count; Q=$Q_total; fM=$fM_total; sens=$sens_total; ppv=$ppv_total;\n";
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/contrafold.git
More information about the debian-med-commit
mailing list