[med-svn] [Git][med-team/hyphy][upstream] New upstream version 2.5.32+dfsg
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Wed Aug 25 15:23:13 BST 2021
Nilesh Patra pushed to branch upstream at Debian Med / hyphy
Commits:
d482898d by Nilesh Patra at 2021-08-25T19:32:00+05:30
New upstream version 2.5.32+dfsg
- - - - -
29 changed files:
- CMakeLists.txt
- res/TemplateBatchFiles/AddABias.ibf
- + res/TemplateBatchFiles/DirectionalREL.bf
- res/TemplateBatchFiles/GARD.bf
- res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
- res/TemplateBatchFiles/SelectionAnalyses/FADE.bf
- res/TemplateBatchFiles/SelectionAnalyses/FEL.bf
- res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf
- res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf
- res/TemplateBatchFiles/SelectionAnalyses/MEME.bf
- res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
- res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
- res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf
- res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf
- res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf
- res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf
- res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf
- res/TemplateBatchFiles/files.lst
- res/TemplateBatchFiles/libv3/UtilityFunctions.bf
- res/TemplateBatchFiles/libv3/all-terms.bf
- res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
- res/TemplateBatchFiles/libv3/tasks/estimators.bf
- src/core/global_things.cpp
- src/core/include/likefunc.h
- src/core/likefunc.cpp
- src/core/matrix.cpp
- src/core/simplelist.cpp
- src/core/topology.cpp
- src/mains/unix.cpp
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -307,21 +307,16 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
#-------------------------------------------------------------------------------
find_package(CURL)
if(${CURL_FOUND})
- #if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
- # set(DEFAULT_LIBRARIES " crypto curl ssl")
- #else(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
- # set(DEFAULT_LIBRARIES "dl crypto curl ssl")
- #endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
message(${CURL_LIBRARIES})
- string(APPEND DEFAULT_LIBRARIES " " ${CURL_LIBRARIES})
+ list(APPEND DEFAULT_LIBRARIES ${CURL_LIBRARIES})
add_definitions (-D__HYPHYCURL__)
endif(${CURL_FOUND})
if(NOT NOZLIB)
find_package(ZLIB 1.2.9)
if(${ZLIB_FOUND})
- string(APPEND DEFAULT_LIBRARIES " " ${ZLIB_LIBRARIES})
- include_directories(${ZLIB_INCLUDE_DIRS})
+ list(APPEND DEFAULT_LIBRARIES ${ZLIB_LIBRARIES})
+ include_directories(${ZLIB_INCLUDE_DIRS})
add_definitions (-D__ZLIB__)
endif(${ZLIB_FOUND})
endif(NOT NOZLIB)
@@ -637,6 +632,6 @@ add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf)
add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf)
add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf)
add_test (ABSREL HYPHYMP tests/hbltests/libv3/ABSREL.wbf)
-add_test (FMM HYPHYMP tests/hbltests/libv3/FMM.wbf)
+#add_test (FMM HYPHYMP tests/hbltests/libv3/FMM.wbf)
#add_test (NAME ABSREL-MH COMMAND HYPHYMP tests/hbltests/libv3/ABSREL-MH.wbf)
=====================================
res/TemplateBatchFiles/AddABias.ibf
=====================================
@@ -47,7 +47,7 @@ function AddABiasREL (ModelMatrixName&, ModelMatrixName2&, biasedBase)
t = 1; /* branch length, local parameter */
c = 1; /* rate variation */
- _numericRateMatrix = ModelMatrixName;
+ _numericRateMatrix = Eval ("ModelMatrixName");
/* the probability that site is undergoing biased substitution rates */
global P_bias = 0.1; P_bias :< 0.5;
@@ -55,25 +55,29 @@ function AddABiasREL (ModelMatrixName&, ModelMatrixName2&, biasedBase)
category catVar = (2,{{1-P_bias,P_bias}},MEAN,,{{0,1}},0,1);
- for (ri = 0; ri < 20; ri = ri+1)
- {
- for (ci = ri+1; ci < 20; ci = ci+1)
- {
+ for (ri = 0; ri < 20; ri += 1) {
+ // make sure the row is not all 0
+ max_row = Max (_numericRateMatrix[ri][-1], 0);
+ //fprintf (stdout, "\n", ri, ":", max_row, "\t");
+ if (max_row < 1e-10) {
+ for (ci = 0; ci < 20; ci+=1) {
+ _numericRateMatrix[ci][ri] = 1e-6;
+ _numericRateMatrix[ri][ci] = 1e-6;
+ }
+ }
+ for (ci = ri+1; ci < 20; ci += 1) {
ModelMatrixName2[ri][ci] := _numericRateMatrix__[ri__][ci__] * t * c;
ModelMatrixName2[ci][ri] := _numericRateMatrix__[ri__][ci__] * t * c;
-
}
}
- if (biasedBase < 20)
- {
+ if (biasedBase < 20) {
global rateBiasTo = 5.0;
global rateBiasFrom := 1/rateBiasTo;
rateBiasTo :>1;
relBias :>1; /* UNUSED ?!? */
- for (ri = 0; ri < 20; ri = ri+1)
- {
+ for (ri = 0; ri < 20; ri += 1) {
if (ri != biasedBase)
{
ModelMatrixName2[ri][biasedBase] := _numericRateMatrix__[ri__][biasedBase__] * t * c * ((catVar==1)*rateBiasTo+(catVar==0));
@@ -81,7 +85,7 @@ function AddABiasREL (ModelMatrixName&, ModelMatrixName2&, biasedBase)
}
}
}
-
+
return 1;
}
=====================================
res/TemplateBatchFiles/DirectionalREL.bf
=====================================
@@ -0,0 +1,592 @@
+
+/* define an associative array with key = amino acid and value = integer
+ index for AA in alphabetical order */
+AAString = "ACDEFGHIKLMNPQRSTVWY";
+
+AACharToIdx = {}; /* this is never used :-P */
+for (k=0; k<20; k=k+1)
+{
+ AACharToIdx [AAString[k]] = k;
+}
+
+SKIP_MODEL_PARAMETER_LIST = 0;
+
+#include "AddABias.ibf";
+#include "Utility/GrabBag.bf";
+
+test_p_values = {20,2};
+
+/*--------------------------------------------------------------------------------------------*/
+/*
+ returns stationary distribution vector for amino acids
+ UNUSED?!?
+*/
+function GetEqFreqs (ModelMatrixName&, baseFreqs)
+{
+ t = 1;
+
+ /* duplicate instantaneous rate parameterization from template model */
+ numRateMx = ModelMatrixName;
+
+ for (ri = 0; ri < 20; ri = ri+1)
+ {
+ for (ci = 0; ci < 20; ci = ci+1)
+ {
+ if (ri != ci)
+ {
+ /* multiply entries by base frequencies */
+ numRateMx[ri][ci] = numRateMx[ri][ci] * baseFreqs[ci];
+
+ /* diagonal entry is sum of off-diagonal row entries (Markov process) */
+ numRateMx[ri][ri] = numRateMx[ri][ri] - numRateMx[ri][ci];
+ }
+ }
+ }
+
+
+ for (ri = 0; ri < 20; ri = ri+1)
+ {
+ numRateMx [ri][19] = 1;
+ }
+ numRateMxI = Inverse (numRateMx);
+ return numRateMxI [19][-1];
+}
+
+/*--------------------------------------------------------------------------------------------*/
+
+SetDialogPrompt ("Protein file to examine:");
+
+ChoiceList (reloadFlag, "Reload/New", 1, SKIP_NONE, "New analysis","Start a new analysis",
+ "Reload","Reload a baseline protein fit.",
+ "Reprocess the results","Regenerate sites from model fits.");
+
+
+ACCEPT_ROOTED_TREES = 1;
+
+if (reloadFlag == 0) {
+ /* new analysis, fit baseline model */
+
+ /* this should be a protein alignment */
+ DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
+ basePath = LAST_FILE_PATH;
+ DataSetFilter filteredData = CreateFilter (ds,1);
+
+ GetDataInfo (checkCharacters, filteredData, "CHARACTERS");
+ if (Columns (checkCharacters) != 20)
+ {
+ fprintf (stdout, "ERROR: please ensure that the input alignment contains protein sequences");
+ return 0;
+ }
+
+ /* from AddABias.ibf; select from one of several AA rate matrices estimated from
+ curated alignments (e.g., HIV within); by default, set rate variation to
+ adaptive 4-bin beta-gamma -- argument 0 skips dialog to set [pickATarget] */
+
+ promptModel (0);
+
+
+ ExecuteAFile (HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "queryTree.bf");
+ ChoiceList (fixFB, "Fix Branch", 1, SKIP_NONE, "Unknown root","The character at the root of the tree is drawn from the stationary distribution",
+ "Fix 1st sequence as root","The 1st sequence in the file (assumed to be a direct descendant of the root) is used to populate the root sequences.");
+
+
+ /* check if the tree is rooted */
+
+ treeAVL = givenTree^0;
+ rootNode = treeAVL[(treeAVL[0])["Root"]];
+ if (Abs(rootNode["Children"]) != 2) {
+ fprintf (stdout, "ERROR: please ensure that the tree is rooted");
+ return 0;
+ }
+ root_left = "givenTree." + (treeAVL[(rootNode["Children"])[0]])["Name"] + ".t";
+ root_right = "givenTree." + (treeAVL[(rootNode["Children"])[1]])["Name"] + ".t";
+
+ if (fixFB>0) {
+ /* branch to first sequence is collapsed to length zero;
+ enforcing identifiability with root sequence */
+ ExecuteCommands ("givenTree."+TipName(givenTree,0)+".t:=0");
+ }
+ else
+ {
+ if (fixFB < 0)
+ {
+ return 0;
+ }
+ }
+
+ /* only the sum of branch lengths leading to two immediate descendants
+ of root can be estimated; this measure prevents one of the branches
+ from collapsing to zero length */
+ ExecuteCommands (root_left + ":=" + root_right);
+
+ LikelihoodFunction lf = (filteredData, givenTree);
+ fprintf (stdout, "[PHASE 0.1] Standard model fit\n");
+
+
+ VERBOSITY_LEVEL = 1;
+ AUTO_PARALLELIZE_OPTIMIZE = 1;
+ Optimize (res0,lf);
+ AUTO_PARALLELIZE_OPTIMIZE = 0;
+ VERBOSITY_LEVEL = -1;
+
+
+ /* export baseline model LF */
+ LIKELIHOOD_FUNCTION_OUTPUT = 7;
+ outPath = basePath + ".base";
+ fprintf (outPath, CLEAR_FILE, lf);
+
+ baselineLogL = res0[1][0];
+
+}
+else {
+ /* import baseline LF from file */
+ modelNameString = "_customAAModelMatrix";
+ SetDialogPrompt ("Locate an existing fit:");
+ ExecuteAFile (PROMPT_FOR_FILE);
+ GetString (lfInfo,lf,-1);
+ if ((lfInfo["Models"])[0] == "mtREVModel")
+ {
+ modelNameString = "mtREVMatrix";
+ }
+ bpSplit = splitFilePath (LAST_FILE_PATH);
+ basePath = bpSplit["DIRECTORY"] + bpSplit["FILENAME"];
+ outPath = basePath + ".base";
+ treeString = Format (givenTree,0,0); /* topology only, no branch lengths */
+
+ treeAVL = givenTree^0;
+ rootNode = treeAVL[(treeAVL[0])["Root"]];
+ if (Abs(rootNode["Children"]) != 2)
+ {
+ fprintf (stdout, "ERROR: please ensure that the tree is rooted");
+ return 0;
+ }
+
+
+ ChoiceList (fixFB, "Fix Branch", 1, SKIP_NONE, "Unknown root","The character at the root of the tree is drawn from the stationary distribution",
+ "Fix 1st sequence as root","The 1st sequence in the file (assumed to be a direct descendant of the root) is used to populate the root sequences.");
+ if (fixFB>0)
+ {
+ ExecuteCommands ("givenTree."+TipName(givenTree,0)+".t:=0");
+ }
+ else
+ {
+ if (fixFB < 0)
+ {
+ return 0;
+ }
+ }
+
+ LFCompute (lf,LF_START_COMPUTE);
+ LFCompute (lf,baselineLogL);
+ LFCompute (lf,LF_DONE_COMPUTE);
+}
+
+
+/* when the heck does biasedTree get called? It's tucked inside runAFit() in AddABias.ibf... */
+root_left = "biasedTree." + (treeAVL[(rootNode["Children"])[0]])["Name"] + ".t";
+root_right = "biasedTree." + (treeAVL[(rootNode["Children"])[1]])["Name"] + ".t";
+
+
+/* vector of branch lengths for entire tree */
+baselineBL = BranchLength (givenTree,-1);
+
+referenceL = (baselineBL * (Transpose(baselineBL)["1"]))[0];
+
+summaryPath = basePath+".summary";
+substitutionsPath = basePath+"_subs.csv";
+siteReportMap = basePath+"_bysite.csv";
+fprintf (summaryPath, CLEAR_FILE, KEEP_OPEN);
+fprintf (stdout, "[PHASE 0.2] Standard model fit. Log-L = ",baselineLogL,". Tree length = ",referenceL, " subs/site \n");
+fprintf (summaryPath, "[PHASE 0.2] Standard model fit. Log-L = ",baselineLogL,". Tree length = ",referenceL, " subs/site \n");
+
+
+ExecuteAFile (HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "Utility" + DIRECTORY_SEPARATOR + "GrabBag.bf");
+ExecuteAFile (HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "Utility" + DIRECTORY_SEPARATOR + "AncestralMapper.bf");
+
+fixGlobalParameters ("lf");
+
+byResidueSummary = {};
+bySiteSummary = {};
+
+/*------------------------------------------------------------------------------*/
+
+if (MPI_NODE_COUNT > 1)
+{
+ MPINodeStatus = {MPI_NODE_COUNT-1,1}["-1"];
+}
+
+
+
+for (residue = 0; residue < 20; residue = residue + 1)
+{
+ if (reloadFlag == 2) /* regenerate sites from results files */
+ {
+ lfb_MLES = {2,1};
+ ExecuteAFile (basePath + "." + AAString[residue]);
+ LFCompute (lfb,LF_START_COMPUTE);
+ LFCompute (lfb,res);
+ LFCompute (lfb,LF_DONE_COMPUTE);
+ lfb_MLES [1][0] = res;
+ DoResults (residue);
+ }
+ else
+ {
+ AddABiasREL (modelNameString,"biasedMatrix",residue); /* returns biasedMatrix object */
+ global P_bias2 := 1;
+ global relBias := 1;
+
+ /* vectorOfFrequencies comes from Custom_AA_empirical.mdl, in turn imported from a file such as "HIVWithin" */
+ /* rate matrix is multiplied by this vector (third argument) */
+ Model biasedModel = (biasedMatrix, vectorOfFrequencies, 1);
+ Tree biasedTree = treeString;
+ global treeScaler = 1;
+
+ /* constrains tree to be congruent to that estimated under baseline model */
+ ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__",biasedTree,givenTree);
+ ExecuteCommands (root_left + "=" + root_left);
+ ExecuteCommands (root_right + "=" + root_right);
+ LikelihoodFunction lfb = (filteredData, biasedTree);
+
+
+ if (MPI_NODE_COUNT > 1)
+ {
+ SendAJob (residue);
+ }
+ else
+ {
+ Optimize (lfb_MLES,lfb);
+ DoResults (residue);
+ }
+ }
+}
+
+/*------------------------------------------------------------------------------*/
+
+if (MPI_NODE_COUNT > 1)
+{
+ jobsLeft = ({1,MPI_NODE_COUNT-1}["1"] * MPINodeStatus["_MATRIX_ELEMENT_VALUE_>=0"])[0];
+
+ for (nodeID = 0; nodeID < jobsLeft; nodeID = nodeID + 1)
+ {
+ MPIReceive (-1, fromNode, theJob);
+ oldRes = MPINodeStatus[fromNode-1];
+ ExecuteCommands (theJob);
+ DoResults (oldRes);
+ }
+}
+
+/*------------------------------------------------------------------------------*/
+
+fprintf (substitutionsPath, CLEAR_FILE, KEEP_OPEN, "Site,From,To,Count");
+fprintf (siteReportMap, CLEAR_FILE, KEEP_OPEN, "Site");
+for (k=0; k<20; k=k+1)
+{
+ fprintf (siteReportMap, ",", AAString[k]);
+}
+fprintf (siteReportMap, "\nLRT p-value");
+
+test_p_values = test_p_values % 0;
+rejectedHypotheses = {};
+
+for (k=0; k<20; k=k+1)
+{
+ pv = (byResidueSummary[AAString[k]])["p"];
+ fprintf (siteReportMap, ",", pv);
+}
+
+fprintf (stdout, "\nResidues (and p-values) for which there is evidence of directional selection\n");
+fprintf (summaryPath, "\nResidues (and p-values) for which there is evidence of directional selection");
+
+for (k=0; k<20; k=k+1)
+{
+ if (test_p_values[k][0] < (0.05/(20-k)))
+ {
+ rejectedHypotheses [test_p_values[k][1]] = 1;
+ rejectedHypotheses [AAString[test_p_values[k][1]]] = 1;
+ fprintf (stdout, "\n\t", AAString[test_p_values[k][1]], " : ",test_p_values[k][0] );
+ fprintf (summaryPath, "\n\t", AAString[test_p_values[k][1]], " : ",test_p_values[k][0] );
+ }
+ else
+ {
+ break;
+ }
+}
+
+fprintf (stdout, "\n");
+fprintf (summaryPath, "\n");
+
+
+ancCacheID = _buildAncestralCache ("lf", 0);
+outputcount = 0;
+
+for (k=0; k<filteredData.sites; k=k+1)
+{
+ thisSite = _substitutionsBySite (ancCacheID,k);
+
+ for (char1 = 0; char1 < 20; char1 = char1+1)
+ {
+ for (char2 = 0; char2 < 20; char2 = char2+1)
+ {
+ if (char1 != char2 && (thisSite["COUNTS"])[char1][char2])
+ {
+ ccount = (thisSite["COUNTS"])[char1][char2];
+ fprintf (substitutionsPath, "\n", k+1, ",", AAString[char1], ",", AAString[char2], "," , ccount);
+ }
+ }
+ }
+
+
+ if (Abs(bySiteSummary[k]))
+ {
+ fprintf (siteReportMap, "\n", k+1);
+
+ didSomething = 0;
+ pv = 0;
+ for (k2=0; k2<20; k2=k2+1)
+ {
+ if (Abs((byResidueSummary[AAString[k2]])["BFs"]) == 0 || rejectedHypotheses[k2] == 0)
+ {
+ fprintf (siteReportMap, ",N/A");
+ }
+ else
+ {
+ thisSitePV = ((byResidueSummary[AAString[k2]])["BFs"])[k];
+ pv = Max(pv,thisSitePV);
+ fprintf (siteReportMap, ",", thisSitePV);
+ if (pv > 100)
+ {
+ didSomething = 1;
+ }
+ }
+ }
+
+ if (!didSomething)
+ {
+ continue;
+ }
+
+ if (outputcount == 0)
+ {
+ outputcount = 1;
+ fprintf (stdout, "\nThe list of sites which show evidence of directional selection (Bayes Factor > 20)\n",
+ "together with the target residues and inferred substitution counts\n");
+ fprintf (summaryPath, "\nThe list of sites which show evidence of directional selection (Bayes Factor > 20)\n",
+ "together with the target residues and inferred substitution counts\n");
+ }
+ fprintf (stdout, "\nSite ", Format (k+1,8,0), " (max BF = ", pv, ")\n Preferred residues: ");
+ fprintf (summaryPath, "\nSite ", Format (k+1,8,0), " (max BF = ", pv, ")\n Preferred residues: ");
+
+
+ for (k2 = 0; k2 < Abs (bySiteSummary[k]); k2=k2+1)
+ {
+ thisChar = (bySiteSummary[k])[k2];
+ if (rejectedHypotheses[thisChar])
+ {
+ fprintf (stdout, thisChar);
+ fprintf (summaryPath, thisChar);
+ }
+ }
+
+ fprintf (stdout, "\n Substitution counts:");
+ fprintf (summaryPath, "\n Substitution counts:");
+
+ for (char1 = 0; char1 < 20; char1 = char1+1)
+ {
+ for (char2 = char1+1; char2 < 20; char2 = char2+1)
+ {
+ ccount = (thisSite["COUNTS"])[char1][char2];
+ ccount2 = (thisSite["COUNTS"])[char2][char1];
+ if (ccount+ccount2)
+ {
+ fprintf (stdout, "\n\t", AAString[char1], "->", AAString[char2], ":", Format (ccount, 5, 0), "/",
+ AAString[char2], "->", AAString[char1], ":", Format (ccount2, 5, 0));
+ fprintf (summaryPath, "\n\t", AAString[char1], "->", AAString[char2], ":", Format (ccount, 5, 0), "/",
+ AAString[char2], "->", AAString[char1], ":", Format (ccount2, 5, 0));
+ }
+ }
+ }
+
+ }
+}
+
+_destroyAncestralCache (ancCacheID);
+fprintf (substitutionsPath, CLOSE_FILE);
+fprintf (summaryPath, CLOSE_FILE);
+fprintf (siteReportMap, CLOSE_FILE);
+fprintf (stdout, "\n");
+
+/*--------------------------------------------------------------------------------------------*/
+/*
+ Compute the difference vector between the stationary distribution at the root (efv) and the expected
+ distribution of residues after time t0.
+*/
+function computeDelta (ModelMatrixName&, efv, t_0, which_cat)
+{
+ t = t_0;
+ c = 1;
+ catVar = which_cat;
+ rmx = ModelMatrixName;
+ for (r=0; r<20; r=r+1)
+ {
+ diag = 0;
+ for (c=0; c<20; c=c+1)
+ {
+ rmx[r][c] = rmx[r][c] * efv[c];
+ diag = diag - rmx[r][c];
+ }
+ rmx[r][r] = diag;
+ }
+ return Transpose(efv)*(Exp (rmx) - {20,20}["_MATRIX_ELEMENT_ROW_==_MATRIX_ELEMENT_COLUMN_"]);
+}
+
+/*------------------------------------------------------------------------------*/
+
+function SendAJob (residueIn)
+{
+ for (nodeID = 0; nodeID < MPI_NODE_COUNT -1; nodeID = nodeID + 1)
+ {
+ if (MPINodeStatus[nodeID] < 0) /* semaphore indicates available node */
+ {
+ MPINodeStatus[nodeID] = residueIn;
+ MPISend (nodeID+1,lfb);
+ break;
+ }
+ }
+ if (nodeID == MPI_NODE_COUNT - 1) /* looped through semaphore with no idle nodes */
+ {
+ MPIReceive (-1, fromNode, theJob); /* wait until one of the node completes its job */
+ oldRes = MPINodeStatus[fromNode-1];
+ MPISend (fromNode,lfb);
+ MPINodeStatus[fromNode-1] = residueIn;
+ ExecuteCommands (theJob); /* load serialized LF into master node memory */
+ DoResults (oldRes);
+ }
+ return 0;
+}
+
+/*------------------------------------------------------------------------------*/
+
+function DoResults (residueIn)
+{
+ residueC = AAString[residueIn];
+ fprintf (stdout, "[PHASE ",residueIn+1,".1] Model biased for ",residueC,"\n");
+ fprintf (summaryPath, "[PHASE ",residueIn+1,".1] Model biased for ",residueC,"\n");
+
+ pv = 1-CChi2(2(lfb_MLES[1][0]-baselineLogL),3); /* approximate p-value */
+ fprintf (stdout, "[PHASE ",residueIn+1,".2] Finished with the model biased for ",residueC,". Log-L = ",Format(lfb_MLES[1][0],8,3),"\n");
+ fprintf (summaryPath, "[PHASE ",residueIn+1,".2] Finished with the model biased for ",residueC,". Log-L = ",Format(lfb_MLES[1][0],8,3),"\n");
+
+ fr1 = P_bias;
+
+ rateAccel1 = (computeDelta("biasedMatrix",vectorOfFrequencies,referenceL,1))[residueIn];
+
+ fprintf (stdout, "\n\tBias term = ", Format(rateBiasTo,8,3),
+ "\n\tproportion = ", Format(fr1,8,3),
+ "\n\tExp freq increase = ", Format(rateAccel1*100,8,3), "%",
+ "\n\tp-value = ", Format(pv,8,3),"\n");
+
+ fprintf (summaryPath, "\n\tBias term = ", Format(rateBiasTo,8,3),
+ "\n\tproportion = ", Format(fr1,8,3),
+ "\n\tExp freq increase = ", Format(rateAccel1*100,8,3), "%",
+ "\n\tp-value = ", Format(pv,8,3),"\n");
+ if (reloadFlag != 2)
+ {
+ LIKELIHOOD_FUNCTION_OUTPUT = 7;
+ outPath = basePath + "." + residueC;
+ fprintf (outPath, CLEAR_FILE, lfb);
+ }
+
+ byResidueSummary [residueC] = {};
+ (byResidueSummary [residueC])["p"] = pv;
+
+ test_p_values [residueIn][0] = pv;
+ test_p_values [residueIn][1] = residueIn;
+
+ /*if (pv < 0.0025)*/
+ {
+ (byResidueSummary [residueC])["sites"] = {};
+ (byResidueSummary [residueC])["BFs"] = {};
+
+ ConstructCategoryMatrix (mmx,lfb,COMPLETE);
+ GetInformation (catOrder, lfb);
+ dim = Columns (mmx);
+ _MARGINAL_MATRIX_ = {2, dim};
+
+ GetInformation (cInfo, c);
+ GetInformation (_CATEGORY_VARIABLE_CDF_, catVar);
+
+ ccc = Columns (cInfo);
+
+ _CATEGORY_VARIABLE_CDF_ = _CATEGORY_VARIABLE_CDF_[1][-1];
+ if (catOrder [0] == "c")
+ {
+ for (k=0; k<dim; k=k+1)
+ {
+ for (k2 = 0; k2 < ccc; k2=k2+1)
+ {
+ _MARGINAL_MATRIX_ [0][k] = _MARGINAL_MATRIX_ [0][k] + mmx[2*k2][k] *cInfo[1][k2];
+ _MARGINAL_MATRIX_ [1][k] = _MARGINAL_MATRIX_ [1][k] + mmx[2*k2+1][k]*cInfo[1][k2];
+ }
+ }
+ }
+ else
+ {
+ for (k=0; k<dim; k=k+1)
+ {
+ for (k2 = 0; k2 < ccc; k2=k2+1)
+ {
+ _MARGINAL_MATRIX_ [0][k] = _MARGINAL_MATRIX_ [0][k] + mmx[k2][k]*cInfo[1][k2];
+ _MARGINAL_MATRIX_ [1][k] = _MARGINAL_MATRIX_ [1][k] + mmx[ccc+k2][k]*cInfo[1][k2];
+ }
+ }
+ }
+
+ _sites = Columns (_MARGINAL_MATRIX_);
+ _classes = Rows (_MARGINAL_MATRIX_);
+
+ _siteVector = {1, _classes};
+
+ STASH_MARGINAL_MATRIX = _MARGINAL_MATRIX_;
+
+ _post :< 1e300;
+ _norm :< 1e300;
+
+ for (_counter = 0; _counter < _sites; _counter = _counter + 1) {
+ _norm = 0;
+ for (_counter2 = 0; _counter2 < _classes; _counter2 = _counter2 + 1)
+ {
+ _post = _MARGINAL_MATRIX_[_counter2][_counter] * _CATEGORY_VARIABLE_CDF_ [0][_counter2];
+ _norm = _norm + _post;
+ _siteVector [_counter2] = _post;
+ }
+ _siteVector = _siteVector * (1/_norm);
+
+ for (_counter2 = 0; _counter2 < _classes; _counter2 = _counter2 + 1)
+ {
+ _MARGINAL_MATRIX_[_counter2][_counter] = _siteVector [_counter2];
+ }
+ }
+
+
+ prior = (_CATEGORY_VARIABLE_CDF_[1])/(1-_CATEGORY_VARIABLE_CDF_[1]);
+
+ for (k=0; k<dim; k=k+1)
+ {
+ bayesF = _MARGINAL_MATRIX_[1][k]/_MARGINAL_MATRIX_[0][k]/prior;
+ ((byResidueSummary [residueC])["BFs"])[k] = bayesF;
+ if (bayesF > 100)
+ {
+ ((byResidueSummary [residueC])["sites"])[Abs((byResidueSummary [residueC])["sites"])] = k+1;
+ if (Abs(bySiteSummary[k]) == 0)
+ {
+ bySiteSummary[k] = {};
+ }
+ (bySiteSummary[k])[Abs(bySiteSummary[k])] = residueC;
+ }
+ }
+
+ }
+ return 0;
+}
+
+
=====================================
res/TemplateBatchFiles/GARD.bf
=====================================
@@ -248,6 +248,7 @@ for (_pattern_; in; alignments.Extract_site_patterns ("gard.filter")) {
gard.variableSiteMap = Transpose (utility.DictToArray (gard.variableSiteMap)) % 0; // sort by 1st column
gard.variableSites = Rows (gard.variableSiteMap);
+
gard.inverseVariableSiteMap = {};
for (index, pattern; in; gard.variableSiteMap) {
gard.inverseVariableSiteMap[pattern] = index;
=====================================
res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
=====================================
@@ -671,6 +671,10 @@ console.log ( "Likelihood ratio test for episodic diversifying positive selectio
selection.io.stopTimer (busted.json [terms.json.timers], "Overall");
+GetString (_hpv,HYPHY_VERSION,0);
+busted.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (busted.json, busted.codon_data_info [terms.json.json]);
return busted.json;
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FADE.bf
=====================================
@@ -811,6 +811,9 @@ utility.ForEachPair (fade.sites_found_summary, "_residue_", "_count_",
selection.io.stopTimer (fade.json [terms.json.timers], "Overall");
+GetString (_hpv,HYPHY_VERSION,0);
+fade.json[terms.json.runtime] = _hpv;
+
io.SpoolJSON (fade.json, fade.alignment_info[terms.json.json]);
// HELPER FUNCTIONS GO HERE
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FEL.bf
=====================================
@@ -93,25 +93,12 @@ KeywordArgument ("tree", "A phylogenetic tree (optionally annotated with {})", n
KeywordArgument ("branches", "Branches to test", "All");
KeywordArgument ("srv", "Include synonymous rate variation in the model", "Yes");
KeywordArgument ("pvalue", "The p-value threshold to use when testing for selection", "0.1");
+KeywordArgument ("ci", "Compute profile likelihood confidence intervals for each variable site", "No");
// One additional KeywordArgument ("output") is called below after namespace fel.
-fel.table_headers = {{"alpha", "Synonymous substitution rate at a site"}
- {"beta", "Non-synonymous substitution rate at a site"}
- {"alpha=beta", "The rate estimate under the neutral model"}
- {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"}
- {"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"}
- {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}};
-/**
-This table is meant for HTML rendering in the results web-app; can use HTML characters, the second column
-is 'pop-over' explanation of terms. This is ONLY saved to the JSON file. For Markdown screen output see
-the next set of variables.
-*/
-
-fel.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "LRT", "Selection detected?"}};
-fel.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"};
namespace fel {
LoadFunctionLibrary ("modules/shared-load-file.bf");
@@ -130,9 +117,48 @@ if (fel.srv == "Yes"){
/* Prompt for p value threshold */
fel.pvalue = io.PromptUser ("\n>Select the p-value threshold to use when testing for selection",0.1,0,1,FALSE);
+fel.ci = "Yes" == io.SelectAnOption( {
+ {"Yes", "Compute profile likelihood confidence intervals for dN/dS at each site (~2x slower)."},
+ {"No", "[Default] Do not compute likelihood confidence intervals"}},
+ "Compute profile likelihood intervals for site-level dN/dS");
+
+KeywordArgument ("resample", "[Advanced setting, will result in MUCH SLOWER run time] Perform parametric bootstrap resampling to derive site-level null LRT distributions up to this many replicates per site. Recommended use for small to medium (<30 sequences) datasets", "0");
+fel.resample = io.PromptUser ("\n>[Advanced setting, will result in MUCH SLOWER run time] Perform parametric bootstrap resampling to derive site-level null LRT distributions up to this many replicates per site. Recommended use for small to medium (<30 sequences) datasets",50,0,1000,TRUE);
+
KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'FEL.json')", fel.codon_data_info [terms.json.json]);
fel.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");
+
+if (^"fel.ci") {
+ fel.table_headers = {
+ {"alpha", "Synonymous substitution rate at a site"}
+ {"beta", "Non-synonymous substitution rate at a site"}
+ {"alpha=beta", "The rate estimate under the neutral model"}
+ {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"}
+ {"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"}
+ {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}
+ {"dN/dS LB", "95% profile likelihood CI lower bound for dN/dS (if available)"}
+ {"dN/dS MLE", "Point estimate for site dN/dS"}
+ {"dN/dS UB", "95% profile likelihood CI upper bound for dN/dS (if available)"}
+ };
+ fel.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "LRT", "Selection detected?","dN/dS with confidence intervals"}};
+ fel.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"};
+
+} else {
+ fel.table_headers = {{"alpha", "Synonymous substitution rate at a site"}
+ {"beta", "Non-synonymous substitution rate at a site"}
+ {"alpha=beta", "The rate estimate under the neutral model"}
+ {"LRT", "Likelihood ration test statistic for beta = alpha, versus beta &neq; alpha"}
+ {"p-value", "Asymptotic p-value for evidence of selection, i.e. beta &neq; alpha"}
+ {"Total branch length", "The total length of branches contributing to inference at this site, and used to scale dN-dS"}};
+ fel.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "LRT", "Selection detected?"}};
+ fel.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width: 16, terms.table_options.align : "center"};
+
+}
+
+
+
+
fel.partition_count = Abs (fel.filter_specification);
io.ReportProgressMessageMD('FEL', 'selector', 'Branches to include in the FEL analysis');
@@ -304,16 +330,14 @@ model.generic.AddGlobal (fel.site.mg_rev, "fel.beta_scaler_nuisance", fel.site_b
parameters.DeclareGlobal (fel.scalers, {});
-
//----------------------------------------------------------------------------------------
-lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, model_mapping) {
-
-
-
+lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, model_mapping, sim_mode) {
+
GetString (lfInfo, ^lf,-1);
- ExecuteCommands (filter_data);
+ ExecuteCommands (filter_data);
__make_filter ((lfInfo["Datafilters"])[0]);
+
utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE);
if (^"fel.srv"){
@@ -382,7 +406,6 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
^"fel.beta_scaler_test" = 1;
^"fel.beta_scaler_nuisance" = 1;
- //utility.SetEnvVariable ("VERBOSITY_LEVEL", 10);
Optimize (results, ^lf
@@ -393,30 +416,71 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
}
);
- //assert (0);
- //Optimize (results, ^lf);
-
- //alternative = estimators.ExtractMLEs (lf, model_mapping);
- alternative = estimators.ExtractMLEsOptions (lf, model_mapping, {^"terms.globals_only" : TRUE});
- alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+
+ ci = None;
+ if (^"fel.ci") {
+ if (^"fel.srv") {
+ lf_stash = estimators.TakeLFStateSnapshot (lf);
+ global omega_ratio_for_ci;
+ if (^"fel.alpha_scaler" == 0.0) { // has no syn_variation
+ ^"fel.alpha_scaler" = 1e-8;
+ }
+ omega_ratio_for_ci :< 10000;
+ omega_ratio_for_ci :>0;
+ omega_ratio_for_ci = ^"fel.beta_scaler_test" / ^"fel.alpha_scaler";
+
+ ^"fel.beta_scaler_test" := omega_ratio_for_ci * ^"fel.alpha_scaler";
+ ci = parameters.GetProfileCI (&omega_ratio_for_ci, lf, 0.95);
+ estimators.RestoreLFStateFromSnapshot (lf, lf_stash);
+ } else {
+ ci = parameters.GetProfileCI ("fel.beta_scaler_test", lf, 0.95);
+ }
+ }
+
+ if (sim_mode) {
+ lrt = 2*results[1][0];
+ } else {
+ alternative = estimators.ExtractMLEsOptions (lf, model_mapping, {^"terms.globals_only" : TRUE});
+ alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ }
^"fel.alpha_scaler" = (^"fel.alpha_scaler" + 3*^"fel.beta_scaler_test")/4;
parameters.SetConstraint ("fel.beta_scaler_test","fel.alpha_scaler", "");
- //Optimize (results, ^lf, {"OPTIMIZATION_METHOD" : "nedler-mead"});
Optimize (results, ^lf);
- //Null = estimators.ExtractMLEs (lf, model_mapping);
- Null = estimators.ExtractMLEsOptions (lf, model_mapping, {^"terms.globals_only" : TRUE});
+ if (sim_mode) {
+ return lrt - 2*results[1][0];
+ } else {
+ Null = estimators.ExtractMLEsOptions (lf, model_mapping, {^"terms.globals_only" : TRUE});
+ Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ }
- Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+
+ if (!sim_mode) {
+ if (^"fel.resample") {
+ N = ^"fel.resample";
+ sims = {};
+ GetDataInfo (fi, ^((lfInfo["Datafilters"])[0]), "PARAMETERS");
+ //Export (lfe, ^lf);
+ //console.log (lfe);
+ for (i = 0; i < N; i+=1) {
+ DataSet null_sim = SimulateDataSet (^lf);
+ DataSetFilter null_filter = CreateFilter (null_sim,3,,,fi["EXCLUSIONS"]);
+ sims + alignments.serialize_site_filter (&null_filter, 0);
+ }
+ null_LRT = {N,1};
+ for (i = 0; i < N; i+=1) {
+ is = fel.handle_a_site (lf, sims[i], partition_index, pattern_info, model_mapping, TRUE);
+ null_LRT[i] = is;
+ }
+ return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null, utility.getGlobalValue("terms.simulated"): null_LRT};
+ }
+ }
- /*
- Export (lfs, ^lf);
- fprintf (MESSAGE_LOG, lfs);
- assert (0);
- */
- return {utility.getGlobalValue("terms.alternative") : alternative, utility.getGlobalValue("terms.Null"): Null};
+ return {utility.getGlobalValue("terms.alternative") : alternative,
+ utility.getGlobalValue("terms.Null"): Null,
+ utility.getGlobalValue("terms.confidence_interval"): ci};
}
/* echo to screen calls */
@@ -424,20 +488,38 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
fel.report.counts = {{0,0}};
+if (^"fel.ci") {
+ fel.report.positive_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
+ fel.report.partition + 1,
+ Format(fel.report.row[0],10,3),
+ Format(fel.report.row[1],10,3),
+ Format(fel.report.row[3],10,3),
+ "Pos. p = " + Format(fel.report.row[4],6,4),
+ Format(fel.report.row[7],7,3) + "(" + Format(fel.report.row[6],7,2) + "-" + Format(fel.report.row[8],7,2) + ")"}};
-fel.report.positive_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
- fel.report.partition + 1,
- Format(fel.report.row[0],10,3),
- Format(fel.report.row[1],10,3),
- Format(fel.report.row[3],10,3),
- "Pos. p = " + Format(fel.report.row[4],6,4)}};
+ fel.report.negative_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
+ fel.report.partition + 1,
+ Format(fel.report.row[0],10,3),
+ Format(fel.report.row[1],10,3),
+ Format(fel.report.row[3],10,3),
+ "Neg. p = " + Format(fel.report.row[4],6,4),
+ Format(fel.report.row[7],7,3) + "(" + Format(fel.report.row[6],7,2) + "-" + Format(fel.report.row[8],7,2) + ")"}};
-fel.report.negative_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
- fel.report.partition + 1,
- Format(fel.report.row[0],10,3),
- Format(fel.report.row[1],10,3),
- Format(fel.report.row[3],10,3),
- "Neg. p = " + Format(fel.report.row[4],6,4)}};
+} else {
+ fel.report.positive_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
+ fel.report.partition + 1,
+ Format(fel.report.row[0],10,3),
+ Format(fel.report.row[1],10,3),
+ Format(fel.report.row[3],10,3),
+ "Pos. p = " + Format(fel.report.row[4],6,4)}};
+
+ fel.report.negative_site = {{"" + (1+((fel.filter_specification[fel.report.partition])[terms.data.coverage])[fel.report.site]),
+ fel.report.partition + 1,
+ Format(fel.report.row[0],10,3),
+ Format(fel.report.row[1],10,3),
+ Format(fel.report.row[3],10,3),
+ "Neg. p = " + Format(fel.report.row[4],6,4)}};
+}
function fel.report.echo (fel.report.site, fel.report.partition, fel.report.row) {
@@ -477,21 +559,40 @@ lfunction fel.store_results (node, result, arguments) {
partition_index = arguments [2];
pattern_info = arguments [3];
+
+ if (^"fel.ci") {
+ result_row = { { 0, // alpha
+ 0, // beta
+ 0, // alpha==beta
+ 0, // LRT
+ 1, // p-value,
+ 0, // total branch length of tested branches
+ 0, // lower bound dN/dS
+ 0, // dN/dS MLE
+ 0 // upper bound dN/dS
+ } };
+ } else {
+ result_row = { { 0, // alpha
+ 0, // beta
+ 0, // alpha==beta
+ 0, // LRT
+ 1, // p-value,
+ 0, // total branch length of tested branches
+ } };
+ }
- result_row = { { 0, // alpha
- 0, // beta
- 0, // alpha==beta
- 0, // LRT
- 1, // p-value,
- 0 // total branch length of tested branches
- } };
if (None != result) { // not a constant site
-
+
lrt = math.DoLRT ((result[utility.getGlobalValue("terms.Null")])[utility.getGlobalValue("terms.fit.log_likelihood")],
(result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")],
1);
+
+ if (result / ^"terms.simulated") {
+ pv = +((result[^"terms.simulated"])["_MATRIX_ELEMENT_VALUE_>=" + lrt [utility.getGlobalValue("terms.LRT")]]);
+ lrt [utility.getGlobalValue("terms.p_value")] = (pv+1)/(1+^"fel.resample");
+ }
result_row [0] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_alpha");
result_row [1] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], ^"fel.site_beta");
result_row [2] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.Null")], ^"fel.site_beta");
@@ -499,7 +600,6 @@ lfunction fel.store_results (node, result, arguments) {
result_row [4] = lrt [utility.getGlobalValue("terms.p_value")];
-
sum = 0;
/*alternative_lengths = ((result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.branch_length")])[0];
@@ -511,6 +611,12 @@ lfunction fel.store_results (node, result, arguments) {
');*/
result_row [5] = sum;
+
+ if (None != result [^"terms.confidence_interval"]) {
+ result_row [6] = (result [^"terms.confidence_interval"])[^"terms.lower_bound"];
+ result_row [7] = (result [^"terms.confidence_interval"])[^"terms.fit.MLE"];
+ result_row [8] = (result [^"terms.confidence_interval"])[^"terms.upper_bound"];
+ }
}
@@ -588,8 +694,8 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
fel.queue = mpi.CreateQueue ({"LikelihoodFunctions": {{"fel.site_likelihood"}},
"Models" : {{"fel.site.mg_rev"}},
- "Headers" : {{"libv3/all-terms.bf"}},
- "Variables" : {{"fel.srv"}}
+ "Headers" : {{"libv3/all-terms.bf","libv3/tasks/alignments.bf"}},
+ "Variables" : {{"fel.srv","fel.resample","fel.ci"}}
});
@@ -608,7 +714,8 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
"1" : None,
"2" : fel.partition_index,
"3" : _pattern_info_,
- "4" : fel.site_model_mapping
+ "4" : fel.site_model_mapping,
+ "5" : 0
});
} else {
mpi.QueueJob (fel.queue, "fel.handle_a_site", {"0" : "fel.site_likelihood",
@@ -617,7 +724,8 @@ for (fel.partition_index = 0; fel.partition_index < fel.partition_count; fel.par
(_pattern_info_[utility.getGlobalValue("terms.data.sites")])[0]),
"2" : fel.partition_index,
"3" : _pattern_info_,
- "4" : fel.site_model_mapping
+ "4" : fel.site_model_mapping,
+ "5" : 0
},
"fel.store_results");
}
@@ -651,9 +759,12 @@ io.ReportProgressMessageMD ("fel", "results", "** Found _" + fel.report.counts[0
selection.io.stopTimer (fel.json [terms.json.timers], "Total time");
selection.io.stopTimer (fel.json [terms.json.timers], "FEL analysis");
+if (fel.resamples) {
+ fel.json [terms.simulated] = fel.resamples;
+}
-
-
+GetString (_hpv,HYPHY_VERSION,0);
+fel.json[terms.json.runtime] = _hpv;
io.SpoolJSON (fel.json, fel.codon_data_info[terms.json.json]);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf
=====================================
@@ -636,6 +636,9 @@ namespace fubar {
fubar.json [terms.fubar.grid] = fubar.grid_with_weights;
selection.io.stopTimer (fubar.json [terms.json.timers], "Overall");
+GetString (_hpv,HYPHY_VERSION,0);
+fubar.json[terms.json.runtime] = _hpv;
+
io.SpoolJSON (fubar.json, fubar.codon_data_info[terms.json.json]);
return fubar.json;
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf
=====================================
@@ -540,6 +540,10 @@ if (utility.Array1D (fitter.callout)) {
io.ReportProgressMessageMD ("fitter", "writing", "Writing detailed analysis report to \`" + fitter.codon_data_info [terms.json.json] + "\'");
+GetString (_hpv,HYPHY_VERSION,0);
+fitter.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (fitter.json, fitter.codon_data_info [terms.json.json]);
return fitter.json;
=====================================
res/TemplateBatchFiles/SelectionAnalyses/MEME.bf
=====================================
@@ -132,6 +132,10 @@ namespace meme {
meme.pvalue = io.PromptUser ("\n>Select the p-value threshold to use when testing for selection",meme.pvalue,0,1,FALSE);
+KeywordArgument ("resample", "[Advanced setting, will result in MUCH SLOWER run time] Perform parametric bootstrap resampling to derive site-level null LRT distributions up to this many replicates per site. Recommended use for small to medium (<30 sequences) datasets", "0");
+meme.resample = io.PromptUser ("\n>[Advanced setting, will result in MUCH SLOWER run time] Perform parametric bootstrap resampling to derive site-level null LRT distributions up to this many replicates per site. Recommended use for small to medium (<30 sequences) datasets",50,0,1000,TRUE);
+
+
KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'MEME.json')", meme.codon_data_info [terms.json.json]);
meme.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");
@@ -433,7 +437,7 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme
meme.queue = mpi.CreateQueue ({terms.mpi.LikelihoodFunctions: {{"meme.site_likelihood","meme.site_likelihood_bsrel"}},
terms.mpi.Models : {{"meme.site.background_fel","meme.site.bsrel"}},
terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/"),
- terms.mpi.Variables : {{"meme.selected_branches","meme.branch_mixture","meme.pairwise_counts","meme.codon_data_info"}},
+ terms.mpi.Variables : {{"meme.selected_branches","meme.branch_mixture","meme.pairwise_counts","meme.codon_data_info","meme.resample"}},
terms.mpi.Functions : {{"meme.compute_branch_EBF"}}
});
@@ -452,7 +456,8 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme
"2" : None,
"3" : meme.partition_index,
"4" : _pattern_info_,
- "5" : meme.site_model_mapping
+ "5" : meme.site_model_mapping,
+ "6" : 0
});
} else {
mpi.QueueJob (meme.queue, "meme.handle_a_site", {"0" : "meme.site_likelihood",
@@ -462,7 +467,8 @@ for (meme.partition_index = 0; meme.partition_index < meme.partition_count; meme
(_pattern_info_[utility.getGlobalValue("terms.data.sites")])[0]),
"3" : meme.partition_index,
"4" : _pattern_info_,
- "5" : meme.site_model_mapping
+ "5" : meme.site_model_mapping,
+ "6" : 0
},
"meme.store_results");
}
@@ -496,6 +502,10 @@ io.ReportProgressMessageMD ("MEME", "results", "** Found _" + meme.report.count[
selection.io.stopTimer (meme.json [terms.json.timers], "Total time");
selection.io.stopTimer (meme.json [terms.json.timers], "MEME analysis");
+GetString (_hpv,HYPHY_VERSION,0);
+meme.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (meme.json, meme.codon_data_info[terms.json.json]);
//----------------------------------------------------------------------------------------
@@ -571,7 +581,7 @@ lfunction meme.compute_branch_EBF (lf_id, tree_name, branch_name, baseline) {
}
//----------------------------------------------------------------------------------------
-lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pattern_info, model_mapping) {
+lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pattern_info, model_mapping, sim_mode) {
//console.log (pattern_info);
//#profile START;
@@ -706,92 +716,94 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa
//alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping);
- alternative = estimators.ExtractMLEsOptions (lf_bsrel, model_mapping, {^"terms.globals_only" : TRUE});
- alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
-
- /*init_grid_best = ^"LF_INITIAL_GRID_MAXIMUM";
- if (Abs((results[1][0]-init_grid_best)/results[1][0]) > 0.05) {
- console.log (before_opt);
- console.log (^"LF_INITIAL_GRID_MAXIMUM_VALUE");
- console.log (after_opt);
- console.log ("" + results[1][0] + " vs " + init_grid_best);
-
- //fprintf (stdout, ^lf_bsrel, "\n");
- //utility.SetEnvVariable ("VERBOSITY_LEVEL",10);
-
+ if (sim_mode) {
+ lrt = 2*results[1][0];
+ } else {
+ alternative = estimators.ExtractMLEsOptions (lf_bsrel, model_mapping, {^"terms.globals_only" : TRUE});
+ alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ ancestral_info = ancestral.build (lf_bsrel,0,FALSE);
+ //TODO
+ branch_substitution_information = (ancestral.ComputeSubstitutionBySite (ancestral_info,0,None))[^"terms.substitutions"];
+ DeleteObject (ancestral_info);
+
+ branch_ebf = {};
+ branch_posterior = {};
}
-
-
- console.log ("--------");*/
+
- ancestral_info = ancestral.build (lf_bsrel,0,FALSE);
- //TODO
- branch_substitution_information = (ancestral.ComputeSubstitutionBySite (ancestral_info,0,None))[^"terms.substitutions"];
-
- DeleteObject (ancestral_info);
-
- branch_ebf = {};
- branch_posterior = {};
if (^"meme.site_beta_plus" > ^"meme.site_alpha" && ^"meme.site_mixture_weight" < 0.999999) {
-
- /*
- if (^"meme.site_tree_bsrel.aipysurusLaevis.alpha" > 10000) {
- Export (lfe, ^lf_bsrel);
- console.log (lfe);
- }
- */
-
- LFCompute (^lf_bsrel,LF_START_COMPUTE);
- LFCompute (^lf_bsrel,baseline);
+ if (!sim_mode) {
+ LFCompute (^lf_bsrel,LF_START_COMPUTE);
+ LFCompute (^lf_bsrel,baseline);
- for (_node_name_; in; ^bsrel_tree_id) {
- if (((^"meme.selected_branches") [partition_index])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) {
- _node_name_res_ = meme.compute_branch_EBF (lf_bsrel, bsrel_tree_id, _node_name_, baseline);
- branch_ebf[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.empirical_bayes_factor")];
- branch_posterior[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.posterior")];
+ for (_node_name_; in; ^bsrel_tree_id) {
+ if (((^"meme.selected_branches") [partition_index])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) {
+ _node_name_res_ = meme.compute_branch_EBF (lf_bsrel, bsrel_tree_id, _node_name_, baseline);
+ branch_ebf[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.empirical_bayes_factor")];
+ branch_posterior[_node_name_] = _node_name_res_[utility.getGlobalValue("terms.posterior")];
+ }
}
- }
- LFCompute (^lf_bsrel,LF_DONE_COMPUTE);
+ LFCompute (^lf_bsrel,LF_DONE_COMPUTE);
+ }
^"meme.site_beta_plus" := ^"meme.site_alpha";
Optimize (results, ^lf_bsrel, {"OPTIMIZATION_METHOD" : "nedler-mead"});
//io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME-null");
//Null = estimators.ExtractMLEs (lf_bsrel, model_mapping);
- Null = estimators.ExtractMLEsOptions (lf_bsrel, model_mapping, {^"terms.globals_only" : TRUE});
-
- Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
-
+ if (sim_mode) {
+ return lrt - 2*results[1][0];
+ } else {
+ Null = estimators.ExtractMLEsOptions (lf_bsrel, model_mapping, {^"terms.globals_only" : TRUE});
+ Null [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ if (^"meme.resample") {
+ N = ^"meme.resample";
+ sims = {};
+ GetDataInfo (fi, ^((lfInfo["Datafilters"])[0]), "PARAMETERS");
+ for (i = 0; i < N; i+=1) {
+ DataSet null_sim = SimulateDataSet (^lf_bsrel);
+ DataSetFilter null_filter = CreateFilter (null_sim,3,,,fi["EXCLUSIONS"]);
+ sims + alignments.serialize_site_filter (&null_filter, 0);
+ }
+ null_LRT = {N,1};
+ for (i = 0; i < N; i+=1) {
+ is = meme.handle_a_site (lf_fel, lf_bsrel, sims[i], partition_index, pattern_info, model_mapping, TRUE);
+ null_LRT[i] = is;
+ }
+
+ return {"fel" : fel,
+ utility.getGlobalValue("terms.alternative") : alternative,
+ utility.getGlobalValue("terms.posterior") : branch_posterior,
+ utility.getGlobalValue("terms.empirical_bayes_factor") : branch_ebf,
+ utility.getGlobalValue("terms.branch_selection_attributes") : branch_substitution_information, //TODO: keep this attr?
+ utility.getGlobalValue("terms.Null"): Null,
+ utility.getGlobalValue("terms.simulated"): null_LRT
+ };
+ }
+ }
} else {
- Null = alternative;
+ if (!sim_mode) {
+ Null = alternative;
- for (_node_name_; in; ^bsrel_tree_id) {
- if (((^"meme.selected_branches") [partition_index])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) {
- branch_ebf[_node_name_] = 1.0;
- branch_posterior[_node_name_] = 0.0;
- }
- }
-
- /*
- utility.ForEach (^bsrel_tree_id, "_node_name_",
- '
- if ((meme.selected_branches [^"`&partition_index`"])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) {
- (^"`&branch_ebf`")[_node_name_] = 1.0;
- (^"`&branch_posterior`")[_node_name_] = 0.0;
+ for (_node_name_; in; ^bsrel_tree_id) {
+ if (((^"meme.selected_branches") [partition_index])[_node_name_] == utility.getGlobalValue("terms.tree_attributes.test")) {
+ branch_ebf[_node_name_] = 1.0;
+ branch_posterior[_node_name_] = 0.0;
+ }
}
- '
- );
- */
+ } else {
+ return 0;
+ }
}
//#profile collect;
@@ -913,6 +925,12 @@ lfunction meme.store_results (node, result, arguments) {
lrt = {utility.getGlobalValue("terms.LRT") : 2*((result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")]-(result[utility.getGlobalValue("terms.Null")])[utility.getGlobalValue("terms.fit.log_likelihood")])};
lrt [utility.getGlobalValue("terms.p_value")] = 2/3-2/3*(0.45*CChi2(lrt[utility.getGlobalValue("terms.LRT")],1)+0.55*CChi2(lrt[utility.getGlobalValue("terms.LRT")],2));
+
+ if (result / ^"terms.simulated") {
+
+ pv = +((result[^"terms.simulated"])["_MATRIX_ELEMENT_VALUE_>=" + lrt [utility.getGlobalValue("terms.LRT")]]);
+ lrt [utility.getGlobalValue("terms.p_value")] = (pv+1)/(1+^"meme.resample");
+ }
result_row [0] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], utility.getGlobalValue("meme.parameter_site_alpha"));
result_row [1] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], utility.getGlobalValue("meme.parameter_site_omega_minus")) * result_row[0];
=====================================
res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
=====================================
@@ -537,6 +537,10 @@ for (key, value; in; prime.report.count) {
selection.io.stopTimer (prime.json [terms.json.timers], "Total time");
selection.io.stopTimer (prime.json [terms.json.timers], "PRIME analysis");
+GetString (_hpv,HYPHY_VERSION,0);
+prime.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (prime.json, prime.codon_data_info[terms.json.json]);
//----------------------------------------------------------------------------------------
=====================================
res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
=====================================
@@ -41,7 +41,8 @@ relax.analysis_description = {
Version 3 provides support for >2 branch sets.
Version 3.1 adds LHC + Nedler-Mead initial fit phase and keyword support.
Version 3.1.1 adds some bug fixes for better convergence.
- Version 4.0 adds support for synonymous rate variation.",
+ Version 4.0 adds support for synonymous rate variation.
+ Version 4.1 adds further support for multiple hit models",
terms.io.version : "3.1.1",
terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832",
terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group",
@@ -140,8 +141,6 @@ KeywordArgument ("test", "Branches to use as the test set", null, "Choose the s
KeywordArgument ("reference", "Branches to use as the reference set", null, "Choose the set of branches to use as the _reference_ set");
-
-
io.DisplayAnalysisBanner ( relax.analysis_description );
selection.io.startTimer (relax.json [terms.json.timers], "Overall", 0);
@@ -163,6 +162,15 @@ relax.N.initial_guesses = io.PromptUser ("The number of initial random guesses t
io.ReportProgressMessageMD('RELAX', 'selector', 'Branch sets for RELAX analysis');
+KeywordArgument ("multiple-hits", "Include support for multiple nucleotide substitutions", "None");
+
+relax.multi_hit = io.SelectAnOption ({
+ {"Double", "Include branch-specific rates for double nucleotide substitutions"}
+ {"Double+Triple", "Include branch-specific rates for double and triple nucleotide substitutions"}
+ {"None", "[Default] Use standard models which permit only single nucleotide changes to occur instantly"}
+ }, "Include support for multiple nucleotide substitutions");
+
+
if (relax.numbers_of_tested_groups == 2) {
relax.branch_sets = {relax.test_branches_name : "test",
@@ -313,32 +321,86 @@ utility.ForEachPair (relax.filter_specification, "_key_", "_value_",
-relax.model_generator = "models.codon.BS_REL.ModelDescription";
+if (relax.multi_hit == "None") {
+ relax.model_generator = "models.codon.BS_REL.ModelDescription";
-if (relax.do_srv) {
- if (relax.do_bs_srv) {
- relax.model_generator = "relax.model.with.GDD";
- relax.model_generator = "models.codon.BS_REL_SRV.ModelDescription";
- relax.rate_class_arguments = {{relax.synonymous_rate_classes__,relax.rate_classes__}};
- } else {
+ if (relax.do_srv) {
+ if (relax.do_bs_srv) {
+ relax.model_generator = "models.codon.BS_REL_SRV.ModelDescription";
+ relax.rate_class_arguments = {{relax.synonymous_rate_classes__,relax.rate_classes__}};
+ } else {
- lfunction relax.model.with.GDD (type, code, rates) {
- def = models.codon.BS_REL.ModelDescription (type, code, rates);
- options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("relax.synonymous_rate_classes"),
- utility.getGlobalValue("terms._namespace") : "relax._shared_srv"};
+ lfunction relax.model.with.GDD (type, code, rates) {
+ def = models.codon.BS_REL.ModelDescription (type, code, rates);
+ options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("relax.synonymous_rate_classes"),
+ utility.getGlobalValue("terms._namespace") : "relax._shared_srv"};
- if (utility.getGlobalValue("relax.do_srv_hmm")) {
- options [utility.getGlobalValue ("terms.rate_variation.HMM")] = "equal";
+ if (utility.getGlobalValue("relax.do_srv_hmm")) {
+ options [utility.getGlobalValue ("terms.rate_variation.HMM")] = "equal";
+ }
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options);
+ return def;
}
- def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options);
- return def;
- }
- relax.model_generator = "relax.model.with.GDD";
+ relax.model_generator = "relax.model.with.GDD";
+ relax.rate_class_arguments = relax.rate_classes;
+ }
+ } else {
relax.rate_class_arguments = relax.rate_classes;
}
} else {
- relax.rate_class_arguments = relax.rate_classes;
+
+ lfunction relax.model.BS_REL_MH (type, code, rates) {
+ def = models.codon.BS_REL.ModelDescription (type, code, rates);
+ def [utility.getGlobalValue("terms.model.defineQ")] = "models.codon.BS_REL._DefineQ.MH";
+ if (^'relax.multi_hit' == "Double") {
+ def [utility.getGlobalValue("terms.model.rate_generator")] = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return models.codon.MG_REV_MH._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
+ 'omega' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component),
+ 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate')
+ );}";
+ } else {
+ def [utility.getGlobalValue("terms.model.rate_generator")] = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
+ 'omega' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component),
+ 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate')
+ );}"
+ }
+ return def;
+ }
+
+ relax.model_generator = "relax.model.BS_REL_MH";
+
+ if (relax.do_srv) {
+ if (relax.do_bs_srv) {
+ relax.model_generator = "models.codon.BS_REL_SRV.ModelDescription";
+ relax.rate_class_arguments = {{relax.synonymous_rate_classes__,relax.rate_classes__}};
+ } else {
+
+ lfunction relax.model.with.GDD (type, code, rates) {
+ def = relax.model.BS_REL_MH (type, code, rates);
+ options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("relax.synonymous_rate_classes"),
+ utility.getGlobalValue("terms._namespace") : "relax._shared_srv"};
+
+ if (utility.getGlobalValue("relax.do_srv_hmm")) {
+ options [utility.getGlobalValue ("terms.rate_variation.HMM")] = "equal";
+ }
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options);
+ return def;
+ }
+
+ relax.model_generator = "relax.model.with.GDD";
+ relax.rate_class_arguments = relax.rate_classes;
+ }
+ } else {
+ relax.rate_class_arguments = relax.rate_classes;
+ }
}
selection.io.stopTimer (relax.json [terms.json.timers], "Preliminary model fitting");
@@ -367,7 +429,6 @@ if (relax.model_set == "All") { // run all the models
relax.weight_multipliers = parameters.helper.stick_breaking (utility.SwapKeysAndValues(utility.MatrixToDict(relax.distribution["weights"])),None);
relax.constrain_parameters = parameters.ConstrainMeanOfSet(relax.distribution["rates"],relax.weight_multipliers,1,"relax");
-
relax.i = 0;
for (key, value; in; relax.constrain_parameters[terms.global]){
model.generic.AddGlobal (relax.ge.bsrel_model, value, key);
@@ -509,6 +570,9 @@ if (relax.model_set == "All") { // run all the models
"{terms.json.omega_ratio : relax.inferred_ge_distribution [_index_][0],
terms.json.proportion : relax.inferred_ge_distribution [_index_][1]}")
};
+
+
+ relax.report_multi_hit (elax.general_descriptive.fit, relax.distribution_for_json, "RELAX", "gd-mh");
selection.io.json_store_lf (relax.json,
relax.general_descriptive_name,
relax.general_descriptive.fit[terms.fit.log_likelihood],
@@ -571,7 +635,6 @@ if (relax.numbers_of_tested_groups == 2) {
relax.model_for_srv = "relax.test";
} else {
-
relax.model_to_group_name = {};
relax.model_namespaces = {};
utility.ForEachPair (relax.branch_sets, "_key_", "_value_", "
@@ -695,9 +758,6 @@ if (relax.do_srv) {
}
if (relax.model_set != "All") {
- /*
- relax.ge_guess = relax.DistributionGuess(((selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+`relax.test_branches_name`.+"))["0"])[terms.fit.MLE]);
- */
relax.do_lhc = TRUE;
relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.model_object_map[relax.reference_model_namespace]);
relax.initial.test_mean = ((selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+`relax.test_branches_name`.+"))["0"])[terms.fit.MLE];
@@ -963,17 +1023,17 @@ function relax.FitMainTestPair () {
"{terms.json.omega_ratio : relax.inferred_distribution [_index_][0],
terms.json.proportion : relax.inferred_distribution [_index_][1]}"),
- relax.reference_branches_name : utility.Map (utility.Range (relax.rate_classes, 0, 1),
+ relax.reference_branches_name : utility.Map (utility.Range (relax.rate_classes, 0, 1),
"_index_",
"{terms.json.omega_ratio : relax.inferred_distribution_ref [_index_][0],
terms.json.proportion : relax.inferred_distribution_ref [_index_][1]}")
};
} else {
-
relax.report_multi_class_rates (relax.alternative_model.fit, TRUE);
}
-
+
+ relax.report_multi_hit (relax.alternative_model.fit, relax.distribution_for_json, "RELAX", "alt-mh");
relax._report_srv (relax.alternative_model.fit, FALSE);
KeywordArgument ("save-fit", "Save RELAX alternative model fit to this file (default is not to save)", "/dev/null");
@@ -1029,6 +1089,7 @@ function relax.FitMainTestPair () {
relax.report_multi_class_rates (None, FALSE);
}
+ relax.report_multi_hit (relax.null_model.fit, relax.distribution_for_json, "RELAX", "alt-mh-null");
relax._report_srv (relax.null_model.fit, TRUE);
selection.io.json_store_lf (relax.json,
@@ -1140,6 +1201,7 @@ if (relax.model_set == "All") {
} else {
relax.report_multi_class_rates (None, TRUE);
}
+ relax.report_multi_hit (relax.pe.fit, relax.distribution_for_json, "RELAX", "pe-mh");
selection.io.json_store_lf (relax.json,
relax.partitioned_descriptive_name,
@@ -1160,6 +1222,10 @@ if (relax.model_set == "All") {
selection.io.stopTimer (relax.json [terms.json.timers], "Overall");
+GetString (_hpv,HYPHY_VERSION,0);
+relax.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (relax.json, relax.codon_data_info [terms.json.json]);
return relax.json;
@@ -1253,6 +1319,103 @@ lfunction relax.DistributionGuess (mean) {
//------------------------------------------------------------------------------
+lfunction relax.BS_REL._GenerateRate.MH (fromChar, toChar, namespace, model_type, _tt, alpha, alpha_term, beta, beta_term, omega, omega_term, delta, delta_term, psi, psi_term, psi_s, psi_s_term) {
+
+ p = {};
+ _GenerateRate.diff = models.codon.diff.complete(fromChar, toChar);
+ diff_count = utility.Array1D (_GenerateRate.diff);
+
+ if (diff_count
+ ) {
+
+ p[model_type] = {};
+ p[utility.getGlobalValue("terms.global")] = {};
+
+ nuc_rate = "";
+
+ for (i = 0; i < diff_count; i += 1) {
+ if ((_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.from")] > (_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.to")]) {
+ nuc_p = "theta_" + (_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.to")] + (_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.from")];
+ } else {
+ nuc_p = "theta_" + (_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.from")] +(_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.to")];
+ }
+ nuc_p = parameters.ApplyNameSpace(nuc_p, namespace);
+ (p[utility.getGlobalValue("terms.global")])[terms.nucleotideRateReversible((_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.from")], (_GenerateRate.diff[i])[utility.getGlobalValue("terms.diff.to")])] = nuc_p;
+
+ nuc_rate = parameters.AppendMultiplicativeTerm (nuc_rate, nuc_p);
+ }
+
+
+ rate_entry = nuc_rate;
+
+ if (_tt[fromChar] != _tt[toChar]) {
+ if (model_type == utility.getGlobalValue("terms.global")) {
+ aa_rate = parameters.ApplyNameSpace(omega, namespace);
+ (p[model_type])[omega_term] = aa_rate;
+ utility.EnsureKey (p, utility.getGlobalValue("terms.local"));
+ (p[utility.getGlobalValue("terms.local")])[utility.getGlobalValue ("terms.relax.k")] = "k";
+ aa_rate += "^k";
+ } else {
+ aa_rate = beta;
+ (p[model_type])[beta_term] = aa_rate;
+ }
+ rate_entry += "*" + aa_rate;
+ } else {
+ if (model_type == utility.getGlobalValue("terms.local")) {
+ (p[model_type])[alpha_term] = alpha;
+ rate_entry += "*" + alpha;
+ } else {
+ p[utility.getGlobalValue("terms.model.rate_entry")] = nuc_rate;
+ }
+
+
+ }
+
+ if (diff_count == 2) {
+ if (model_type == utility.getGlobalValue("terms.global")) {
+ delta_rate = parameters.ApplyNameSpace(delta, namespace);
+ (p[model_type])[delta_term] = delta_rate;
+ rate_entry += "*" + delta_rate;
+ } else {
+ (p[model_type])[delta_term] = delta;
+ rate_entry += "*" + delta;
+ }
+ }
+
+
+ if (diff_count == 3 && ^'relax.multi_hit' == 'Double+Triple') {
+ if (_tt[fromChar] != _tt[toChar]) {
+ if (model_type == utility.getGlobalValue("terms.global")) {
+ psi_rate = parameters.ApplyNameSpace(psi, namespace);
+ (p[model_type])[psi_term] = psi_rate;
+ rate_entry += "*" + psi_rate;
+ } else {
+ (p[model_type])[psi_term] = psi;
+ rate_entry += "*" + psi;
+ }
+ } else {
+ //console.log (fromChar + " <-> " + toChar + " (" + _tt[fromChar] + ")");
+ if (model_type == utility.getGlobalValue("terms.global")) {
+ psi_rate = parameters.ApplyNameSpace(psi_s, namespace);
+ (p[model_type])[psi_s_term] = psi_rate;
+ rate_entry += "*" + psi_rate;
+ } else {
+ (p[model_type])[psi_s_term] = psi_s;
+ rate_entry += "*" + psi_s;
+ }
+
+ }
+ }
+
+
+ p[utility.getGlobalValue("terms.model.rate_entry")] = rate_entry;
+ }
+ return p;
+}
+
+//------------------------------------------------------------------------------
+
+
lfunction relax.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, _tt, alpha, alpha_term, beta, beta_term, omega, omega_term) {
@@ -1311,15 +1474,30 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) {
for (component = 1; component <= bs_rel[utility.getGlobalValue("terms.model.components")]; component += 1) {
key = "component_" + component;
- ExecuteCommands ("
- function rate_generator (fromChar, toChar, namespace, model_type, model) {
- return relax.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
- 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
- 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
- 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component));
- }"
- );
-
+
+ if (^'relax.multi_hit' == 'None') {
+ ExecuteCommands ("
+ function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return relax.BS_REL._GenerateRate (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
+ 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component));
+ }"
+ );
+ } else {
+ ExecuteCommands ("
+ function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return relax.BS_REL._GenerateRate.MH (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
+ 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component),
+ 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'));
+ }"
+ );
+ }
+
if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) {
model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component ));
parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1);
@@ -1334,6 +1512,7 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) {
bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices;
parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", "");
+
return bs_rel;
}
@@ -1516,3 +1695,48 @@ lfunction relax._renormalize (v, distro, mean) {
return v;
}
+
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+lfunction relax.report_multi_hit (model_fit, json, l1, l2) {
+ if (^'relax.multi_hit' != "None") {
+ io.ReportProgressMessageMD(l1, l2, 'Partition-level rates for multiple-hit substitutions\n');
+ params = selection.io.extract_global_MLE_re (model_fit, ^'terms.parameters.multiple_hit_rate');
+
+ double_rates = {};
+ for (mle; in; params) {
+ exp = regexp.FindSubexpressions (mle[^'terms.description'], "\[relax\.(.+)\]");
+ if (None == exp) {
+ exp = 'reference';
+ } else {
+ exp = exp[1];
+ }
+ double_rates [exp] = mle[^'terms.fit.MLE'];
+ io.ReportProgressMessageMD(l1, l2, '* 2H rate for **' + exp + '**: ' + Format (double_rates [exp], 8, 4));
+
+ }
+
+ json[^'terms.parameters.multiple_hit_rate'] = double_rates;
+
+ if (^'relax.multi_hit' != 'Double') {
+ params = selection.io.extract_global_MLE_re (model_fit, ^'terms.parameters.triple_hit_rate');
+
+ double_rates = {};
+ for (mle; in; params) {
+ exp = regexp.FindSubexpressions (mle[^'terms.description'], "\[relax\.(.+)\]");
+ if (None == exp) {
+ exp = 'reference';
+ } else {
+ exp = exp[1];
+ }
+ double_rates [exp] = mle[^'terms.fit.MLE'];
+ io.ReportProgressMessageMD(l1, l2, '* 3H rate for **' + exp + '**: ' + Format (double_rates [exp], 8, 4));
+
+ }
+
+ json[^'terms.parameters.triple_hit_rate'] = double_rates;
+ }
+ }
+}
\ No newline at end of file
=====================================
res/TemplateBatchFiles/SelectionAnalyses/SLAC.bf
=====================================
@@ -316,6 +316,8 @@ for (slac.i = 0; slac.i < Abs (slac.filter_specification); slac.i += 1) {
slac.json [terms.json.MLE ] = {terms.json.headers : slac.table_headers,
terms.json.content : slac.results };
+GetString (_hpv,HYPHY_VERSION,0);
+slac.json[terms.json.runtime] = _hpv;
io.SpoolJSON (slac.json, slac.codon_data_info[terms.json.json]);
selection.io.stopTimer (slac.json [terms.json.timers], "Primary SLAC analysis");
=====================================
res/TemplateBatchFiles/SelectionAnalyses/aBSREL.bf
=====================================
@@ -693,6 +693,10 @@ absrel.json [terms.json.test_results] = {
selection.io.stopTimer (absrel.json [terms.json.timers], "Overall");
utility.ToggleEnvVariable ("USE_LAST_RESULTS", None);
+
+GetString (_hpv,HYPHY_VERSION,0);
+absrel.json[terms.json.runtime] = _hpv;
+
io.SpoolJSON (absrel.json, absrel.codon_data_info [terms.json.json]);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf
=====================================
@@ -556,6 +556,10 @@ fel.json [terms.json.MLE ] = {
selection.io.stopTimer (fel.json [terms.json.timers], "Total time");
selection.io.stopTimer (fel.json [terms.json.timers], "FEL analysis");
+GetString (_hpv,HYPHY_VERSION,0);
+fel.json[terms.json.runtime] = _hpv;
+
+
io.SpoolJSON (fel.json, fel.codon_data_info[terms.json.json]);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/modules/io_functions.ibf
=====================================
@@ -292,6 +292,21 @@ function selection.io.report_fit (fit, xp, ss) {
//------------------------------------------------------------------------------
+lfunction selection.io.report_fit_secondary_stats (fit) {
+ p = utility.Array1D (fit[^"terms.branch_length"]);
+ blm = {p,1};
+ for (i,v; in; fit[^"terms.branch_length"]) {
+ BL = 0;
+ for (b,bl; in; v) {
+ BL += bl [^"terms.fit.MLE"];
+ }
+ blm[+i] = Format (BL, 6, 3);
+ }
+ return "" + p + " " + io.SingularOrPlural (p, "partition", "partitions") + ". Total tree length by partition (subs/site) " + Join (", ", blm);
+}
+
+//------------------------------------------------------------------------------
+
function selection.io.report_lrt (lrt) {
return "p-value = " +
=====================================
res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf
=====================================
@@ -412,6 +412,10 @@ function doGTR (prefix) {
io.ReportProgressMessageMD (prefix, "nuc-fit", "* " +
selection.io.report_fit (gtr_results, 0, 3*(^"`prefix`.sample_size")));
+ io.ReportProgressMessageMD (prefix, "nuc-fit", "* " +
+ selection.io.report_fit_secondary_stats (gtr_results));
+
+
/* Store nucleotide fit */
gtr_rates = utility.Map(
@@ -435,6 +439,7 @@ function doGTR (prefix) {
/* Store branch lengths */
gtr_bls_over_10 = 0;
+ gtr_bl_sum = 0;
for (partition_index = 0; partition_index < Abs(filter_specification); partition_index += 1) {
selection.io.json_store_branch_attribute(json, utility.getGlobalValue ("terms.json.nucleotide_gtr"), utility.getGlobalValue ("terms.branch_length"), display_orders[terms.json.nucleotide_gtr],
@@ -523,6 +528,9 @@ function doPartitionedMG (prefix, keep_lf) {
io.ReportProgressMessageMD("`prefix`", "codon-fit", "* " + selection.io.report_fit (partitioned_mg_results, 0, (^"`prefix`.codon_data_info")[utility.getGlobalValue ("terms.data.sample_size")]));
+ io.ReportProgressMessageMD (prefix, "nuc-fit", "* " +
+ selection.io.report_fit_secondary_stats (partitioned_mg_results));
+
global_dnds = selection.io.extract_global_MLE_re (partitioned_mg_results, "^" + utility.getGlobalValue("terms.parameters.omega_ratio"));
utility.ForEach (global_dnds, "_value_", 'io.ReportProgressMessageMD ("`prefix`", "codon-fit", "* " + _value_[utility.getGlobalValue("terms.description")] + " = " + Format (_value_[utility.getGlobalValue("terms.fit.MLE")],8,4));');
=====================================
res/TemplateBatchFiles/files.lst
=====================================
@@ -54,6 +54,7 @@
"RMV","Remove sequences with stop codons from the data.","CleanStopCodons.bf";
"","Miscellaneous items.","!Miscellaneous";
+"DEPS","Analyze Directional Evolution in Protein Sequences (DEPS)","DirectionalREL.bf";
"FAA","Fit a multiple fitness class model to amino acid data.","FitnessAAModels.bf";
"KH","Perform a Kishino-Hasegawa test on two competing phylogenies","KHTest.bf";
"LZ","Compute Lempel-Ziv complexity and entropy of (possibly unaligned) sequences","LZ_Complexity.bf";
=====================================
res/TemplateBatchFiles/libv3/UtilityFunctions.bf
=====================================
@@ -198,7 +198,9 @@ lfunction utility.MatrixToDict (matrix) {
*/
lfunction utility.DictToArray (object) {
result = {1,Abs (object)};
- utility.ForEachPair(object, "key", "_value_", "(`&result`)[+key] = _value_");
+ for (key,_value_; in; object) {
+ result[+key] = _value_;
+ }
return result;
}
=====================================
res/TemplateBatchFiles/libv3/all-terms.bf
=====================================
@@ -58,6 +58,7 @@ namespace terms{
branch_length = "branch length";
alternative = "alternative";
Null = "null";
+ simulated = "simulated";
LRT = "LRT";
p_value = "p-value";
@@ -68,6 +69,7 @@ namespace terms{
global_mg94xrev = "Global MG94xREV";
+ confidence_interval = "confidence interval";
lower_bound = "LB";
upper_bound = "UB";
range01 = {
@@ -363,6 +365,7 @@ namespace terms{
defineQ = "defineQ";
q_ij = "q_ij";
rate_matrix = "Q";
+ rate_generator = "rate-generator";
matrix_id = "matrix-id";
rate_entry = "rate-entry";
empirical_rates = "empirical-rates";
=====================================
res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
=====================================
@@ -4,6 +4,8 @@ LoadFunctionLibrary("../parameters.bf");
LoadFunctionLibrary("../frequencies.bf");
LoadFunctionLibrary("../../UtilityFunctions.bf");
LoadFunctionLibrary("MG_REV.bf");
+LoadFunctionLibrary("MG_REV_TRIP.bf");
+LoadFunctionLibrary("MG_REV_MH.bf");
LoadFunctionLibrary("../../convenience/math.bf");
/** @module models.codon.BS_REL
@@ -415,3 +417,147 @@ function models.codon.BS_REL.get_branch_length(model, tree, node) {
}
return bl;
}
+
+/**
+ * @name models.codon.BS_REL._DefineQ.MH
+ * @param {Dict} model
+ * @param {String} global namespace
+*/
+
+lfunction models.codon.BS_REL._DefineQ.MH (bs_rel, namespace) {
+
+
+ rate_matrices = {};
+
+ rg = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component__),
+ 'omega' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component__),
+ 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'),
+ 'psi_islands', utility.getGlobalValue('terms.parameters.triple_hit_rate_syn')
+ );
+ }";
+
+ if (bs_rel[utility.getGlobalValue("terms.model.rate_generator")]) {
+ rg = bs_rel[utility.getGlobalValue("terms.model.rate_generator")];
+ }
+
+ bs_rel [utility.getGlobalValue("terms.model.q_ij")] = &rate_generator;
+ bs_rel [utility.getGlobalValue("terms.mixture.mixture_components")] = {};
+
+ _aux = parameters.GenerateSequentialNames (namespace + ".bsrel_mixture_aux", bs_rel[utility.getGlobalValue("terms.model.components")] - 1, "_");
+ _wts = parameters.helper.stick_breaking (_aux, None);
+
+
+ for (component = 1; component <= bs_rel[utility.getGlobalValue("terms.model.components")]; component += 1) {
+ key = "component_" + component;
+ ExecuteCommands (rg);
+
+ if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) {
+ model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component ));
+ parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1);
+ }
+ models.codon.generic.DefineQMatrix(bs_rel, namespace);
+ rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")];
+ (bs_rel [^'terms.mixture.mixture_components'])[key] = _wts [component-1];
+ }
+
+ bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices;
+ parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", "");
+
+ return bs_rel;
+}
+
+/**
+ * @name models.codon.BS_REL_SRV._DefineQ.MH
+ * @param {Dict} model
+ * @param {String} global namespace
+*/
+
+lfunction models.codon.BS_REL_SRV._DefineQ.MH (bs_rel, namespace) {
+
+ rate_matrices = {};
+
+ rg = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
+ return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
+ 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ 'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
+ 'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component),
+ 'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
+ 'psi', utility.getGlobalValue('terms.parameters.triple_hit_rate'),
+ 'psi_islands', utility.getGlobalValue('terms.parameters.triple_hit_rate_syn')
+ );
+ }";
+
+ if (bs_rel[utility.getGlobalValue("terms.model.rate_generator")]) {
+ rg = bs_rel[utility.getGlobalValue("terms.model.rate_generator")];
+ }
+
+
+ bs_rel [utility.getGlobalValue("terms.model.q_ij")] = &rate_generator;
+ bs_rel [utility.getGlobalValue("terms.model.time")] = &rate_multiplier;
+
+ bs_rel [utility.getGlobalValue("terms.mixture.mixture_components")] = {};
+
+ ns_components = (bs_rel[(utility.getGlobalValue("terms.model.components"))])[1];
+ syn_components = (bs_rel[(utility.getGlobalValue("terms.model.components"))])[0];
+
+
+ _aux = parameters.GenerateSequentialNames (namespace + ".bsrel_mixture_aux", ns_components - 1, "_");
+ _wts = parameters.helper.stick_breaking (_aux, None);
+
+ _aux_srv = parameters.GenerateSequentialNames (namespace + ".bsrel_mixture_aux_srv", syn_components - 1, "_");
+ _wts_srv = parameters.helper.stick_breaking (_aux_srv, None);
+
+ _alphas = parameters.GenerateSequentialNames (namespace + ".alpha", syn_components, "_");
+
+
+ mixture = {};
+
+
+ for (s_component = 1; s_component <= syn_components; s_component += 1) {
+
+ ExecuteCommands ("
+ function rate_multiplier (option) {
+ return {
+ ^'terms.global' : {
+ terms.AddCategory (utility.getGlobalValue('terms.parameters.synonymous_rate'), `s_component`) : '`_alphas[s_component-1]`'
+ },
+ ^'terms.local' : {
+ ^'terms.parameters.synonymous_rate' : models.DNA.generic.Time (null)
+ },
+ ^'terms.model.rate_entry' : '`_alphas[s_component-1]`*' + models.DNA.generic.Time (null)
+ };
+ }
+ ");
+
+ for (component = 1; component <= ns_components; component += 1) {
+ key = "component_" + s_component + "_" + component;
+ ExecuteCommands (rg);
+ if ( component < ns_components) {
+ model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component ));
+ parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1);
+ }
+ if ( s_component < syn_components) {
+ model.generic.AddGlobal ( bs_rel, _aux_srv[s_component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), "SRV " + s_component ));
+ parameters.DeclareGlobalWithRanges (_aux_srv[s_component-1], 0.5, 0, 1);
+ }
+ models.codon.generic.DefineQMatrix(bs_rel, namespace);
+ rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")];
+ (bs_rel [^'terms.mixture.mixture_components'])[key] = parameters.AppendMultiplicativeTerm (_wts [component-1], _wts_srv[s_component- 1]);
+ }
+ }
+
+ __rp = parameters.ConstrainMeanOfSet (_alphas, _wts_srv, 1, namespace);
+
+ parameters.DeclareGlobal (__rp[^'terms.global'], null);
+ parameters.helper.copy_definitions (bs_rel[^'terms.parameters'], __rp);
+
+
+ bs_rel[utility.getGlobalValue("terms.model.rate_matrix")] = rate_matrices;
+ parameters.SetConstraint(((bs_rel[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.nucleotideRate("A", "G")], "1", "");
+
+ return bs_rel;
+}
=====================================
res/TemplateBatchFiles/libv3/tasks/estimators.bf
=====================================
@@ -754,7 +754,7 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o
lf_id = &likelihoodFunction;
utility.ExecuteInGlobalNamespace ("LikelihoodFunction `lf_id` = (`&lf_components`)");
-
+
df = 0;
=====================================
src/core/global_things.cpp
=====================================
@@ -122,7 +122,7 @@ namespace hy_global {
kErrorStringDatasetRefIndexError ("Dataset index reference out of range"),
kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"),
kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"),
- kHyPhyVersion = _String ("2.5.31"),
+ kHyPhyVersion = _String ("2.5.32"),
kNoneToken = "None",
kNullToken = "null",
=====================================
src/core/include/likefunc.h
=====================================
@@ -202,6 +202,8 @@ public:
virtual
_Matrix* Optimize (_AssociativeList const* options = nil);
_Matrix* ConstructCategoryMatrix (const _SimpleList&, unsigned, bool = true, _String* = nil);
+ const _String
+ GetMyName (void) const;
hyFloat SimplexMethod (hyFloat& precision, unsigned long max_iterations = 100000UL, unsigned long max_evals = 0xFFFFFF);
void Anneal (hyFloat& precision);
=====================================
src/core/likefunc.cpp
=====================================
@@ -793,7 +793,7 @@ void _LikelihoodFunction::Rebuild (bool rescan_parameters) {
}
}
catch (unsigned long code) {
- ReportWarning (_String ("Likelihood function cleared because partition index '") & (long) code & "' points to invalid components");
+ ReportWarning (_String ("Likelihood function ") & GetMyName().Enquote() & _String(" cleared because partition index '") & (long) code & "' points to invalid components. " & ignored_error);
Clear();
return;
}
@@ -1524,7 +1524,15 @@ void _LikelihoodFunction::MPI_LF_Compute (long, bool)
}
-
+//_______________________________________________________________________________________
+const _String _LikelihoodFunction::GetMyName (void) const {
+ long i = hyphy_global_objects::FindLikeFuncIndex((void*)this);
+ if (i < 0) {
+ return kEmptyString;
+ }
+ return *hyphy_global_objects::GetObjectNameByType (HY_BL_LIKELIHOOD_FUNCTION, i, false);
+}
+
//_______________________________________________________________________________________
_Matrix* _LikelihoodFunction::ConstructCategoryMatrix (const _SimpleList& whichParts, unsigned runMode, bool remap, _String* storageID) {
long hDim = 1,
=====================================
src/core/matrix.cpp
=====================================
@@ -2028,6 +2028,7 @@ bool _Matrix::IsValidTransitionMatrix() const {
long idx = 0L;
const hyFloat tolerance = kMachineEpsilon * 10.;
for (long r = 0L; r < d; r++) {
+ sums [r] = 0.;
for (long c = 0L; c < d; c++, idx++) {
hyFloat term = theData[idx];
if (term < 0.0 || term > 1.0) {
@@ -7628,11 +7629,11 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) {
if (middle)
- while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) >= 0L) {
+ while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
bottommove++;
}
if (from<to)
- while (middle+topmove<=to && CompareRows (middle+topmove,middle) <= 0L) {
+ while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
topmove++;
}
@@ -7642,7 +7643,7 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) {
index->Swap(middle-bottommove,i);
bottommove++;
- while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) >= 0L) {
+ while (middle-bottommove>=from && CompareRows (middle-bottommove, middle) > 0L) {
bottommove++;
}
}
@@ -7654,7 +7655,7 @@ void _Matrix::RecursiveIndexSort (long from, long to, _SimpleList* index) {
index->Swap(i, middle+topmove);
topmove++;
- while (middle+topmove<=to && CompareRows (middle+topmove,middle) <= 0L) {
+ while (middle+topmove<=to && CompareRows (middle+topmove,middle) < 0L) {
topmove++;
}
}
@@ -7760,6 +7761,13 @@ HBLObjectRef _Matrix::SortMatrixOnColumn (HBLObjectRef mp, HBLObjectRef ca
}
theColumn.RecursiveIndexSort (0, hDim-1, &idx);
+ /*for (long i = 1; i < hDim; i++) {
+ if (theColumn.theData[i-1] > theColumn.theData[i]) {
+ HandleApplicationError("Resulting matrix is not properly sorted");
+ }
+ }*/
+
+
_Matrix *result = (_Matrix*)_returnMatrixOrUseCache(hDim, vDim, _NUMERICAL_TYPE, theIndex, cache);
if (theIndex) {
=====================================
src/core/simplelist.cpp
=====================================
@@ -1327,12 +1327,12 @@ void _SimpleList::RecursiveIndexSort (long from, long to, _SimpleList* index) {
bottommove = 1, topmove = 1, temp,i, imiddleV = (*index)(middle);
if (middle)
- while ((middle-bottommove>=from)&&(Compare(middle-bottommove,middle) != kCompareLess)) {
+ while ((middle-bottommove>=from)&&(Compare(middle-bottommove,middle) == kCompareGreater)) {
bottommove++;
}
if (from<to)
- while ((middle+topmove<=to)&&(Compare(middle+topmove,middle) != kCompareGreater)) {
+ while ((middle+topmove<=to)&&(Compare(middle+topmove,middle) == kCompareLess)) {
topmove++;
}
@@ -1342,7 +1342,7 @@ void _SimpleList::RecursiveIndexSort (long from, long to, _SimpleList* index) {
Exchange (list_data[middle-bottommove], list_data[i]);
Exchange (index->list_data[middle-bottommove], index->list_data[i]);
bottommove++;
- while ((middle-bottommove>=from)&&(Compare(middle-bottommove,middle)!= kCompareLess)) {
+ while ((middle-bottommove>=from)&&(Compare(middle-bottommove,middle)== kCompareGreater)) {
bottommove++;
}
}
@@ -1353,7 +1353,7 @@ void _SimpleList::RecursiveIndexSort (long from, long to, _SimpleList* index) {
Exchange (list_data[middle+topmove], list_data[i]);
Exchange (index->list_data[middle+topmove], index->list_data[i]);
topmove++;
- while ((middle+topmove<=to)&&(Compare(middle+topmove,middle)!= kCompareGreater)) {
+ while ((middle+topmove<=to)&&(Compare(middle+topmove,middle) == kCompareLess)) {
topmove++;
}
}
=====================================
src/core/topology.cpp
=====================================
@@ -1346,7 +1346,7 @@ HBLObjectRef _TreeTopology::MaximumParsimony (HBLObjectRef parameters,HBLObjectR
if (!iterator->is_leaf () && label >= 0) { // labeled node
- long my_label;
+ long my_label = 0L;
hyFloat best_score = 1.e100;
if (!iterator->parent || has_labels.get (iterator->parent->in_object) < 0) {
// either a root or not a part of the labeled tree
=====================================
src/mains/unix.cpp
=====================================
@@ -80,8 +80,6 @@ const char hy_help_message [] =
" -c calculator mode; causes HyPhy to drop into an expression evaluation until 'exit' is typed\n"
" -d debug mode; causes HyPhy to drop into an expression evaluation mode upon script error\n"
" -i interactive mode; causes HyPhy to always prompt the user for analysis options, even when defaults are available\n"
-" -p postprocessor mode; drops HyPhy into an interactive mode where general post-processing scripts can be selected\n"
-" upon analysis completion\n\n"
" -m write diagnostic messages to messages.log\n"
"optional global arguments:\n"
" BASEPATH=directory path defines the base directory for all path operations (default is pwd)\n"
@@ -179,7 +177,6 @@ void ReadInTemplateFiles (void);
long DisplayListOfChoices (void);
void ProcessConfigStr (_String const&);
void ProcessKWStr (_String const& arg, _String const& arg2, _AssociativeList& kwargs);
-void ReadInPostFiles (void);
long DisplayListOfPostChoices (void);
_String getLibraryPath (void);
@@ -187,8 +184,7 @@ _String getLibraryPath (void);
-bool usePostProcessors = false,
- calculatorMode = false,
+bool calculatorMode = false,
updateMode = false,
pipeMode = false,
logInputMode = false,
@@ -344,69 +340,6 @@ void ReadInTemplateFiles(void) {
}
}
-//__________________________________________________________________________________
-void ReadInPostFiles(void) {
- static _String kSeparator ("SEPARATOR");
- //if (!likeFuncList.lLength)
- // return;
-
- _String dir_sep (get_platform_directory_char());
-
- _String fileIndex = libArgDir & hy_standard_library_directory & dir_sep & "postprocessors.lst";
- FILE* modelList = fopen (fileIndex.get_str(),"r");
-
- if (!modelList) {
- return;
- }
-
- _String theData (modelList);
- fclose (modelList);
-
- if (theData.length()) {
- _ElementaryCommand::ExtractConditions(theData,0,availablePostProcessors);
-
- for (unsigned long i = 0; i<availablePostProcessors.countitems(); i++) {
- _String* thisString = (_String*)availablePostProcessors(i);
- _List thisFile;
- _ElementaryCommand::ExtractConditions(*thisString,thisString->FirstNonSpaceIndex(),thisFile,',');
- if (thisFile.lLength!=3) {
- availablePostProcessors.Delete(i);
- i--;
- continue;
- }
- for (long j = 0; j<3; j++) {
- ((_String*)thisFile(j))->StripQuotes();
- }
- if ( *(_String*)thisFile(0) != kSeparator) {
- fileIndex = *((_String*)pathNames(0)) & hy_standard_library_directory & dir_sep & *(_String*)thisFile(1);
- FILE* dummyFile = fopen (fileIndex,"r");
- if (!dummyFile) {
- fileIndex =libArgDir& hy_standard_library_directory & dir_sep & *(_String*)thisFile(1);
- dummyFile = fopen (fileIndex,"r");
- }
- if (dummyFile) {
- fclose (dummyFile);
- _String* condition = (_String*)thisFile(2);
- if (condition->nonempty()) {
- _Formula condCheck (*condition,nil);
- HBLObjectRef condCheckRes = condCheck.Compute();
- if ((!condCheckRes)||(condCheckRes->Value()<.5)) {
- availablePostProcessors.Delete(i);
- i--;
- continue;
- }
- }
- *(_String*)thisFile(1) = fileIndex;
- availablePostProcessors.Replace(i,&thisFile,true);
- continue;
- }
- }
- availablePostProcessors.Delete(i);
- i--;
- }
-
- }
-}
//__________________________________________________________________________________
long DisplayListOfChoices (void) {
@@ -579,12 +512,7 @@ void ProcessConfigStr (_String const & conf) {
}
break;
- case 'p':
- case 'P': {
- usePostProcessors = true;
- break;
- }
- case 'c':
+ case 'c':
case 'C': {
calculatorMode = true;
break;
@@ -1026,39 +954,7 @@ int main (int argc, char* argv[]) {
ex.Execute();
- if (usePostProcessors && (!updateMode)) {
- ReadInPostFiles();
- printf ("\n\n**********Continue with result processing (y/n)?");
- _String c_str (StringFromConsole());
-
- if (logInputMode) {
- loggedUserInputs && & c_str;
- }
-
- if (c_str.get_char(0) !='n' && c_str.get_char(0)!='N' ) {
- long choice = DisplayListOfPostChoices();
- while (choice != -1) {
- _ExecutionList postEx;
- argFile = *(_String*)(*(_List*)availablePostProcessors(choice-1))(1);
- PushFilePath (argFile);
- ReadBatchFile (argFile, postEx);
- postEx.Execute();
- PopFilePath ();
- printf ("\n\n**********Continue with result processing (y/n)?");
-
- c_str = StringFromConsole();
- if (logInputMode) {
- loggedUserInputs && & c_str;
- }
-
- if (c_str.get_char(0)=='n' || c_str.get_char(0)=='N' ) {
- break;
- }
-
- choice = DisplayListOfPostChoices();
- }
- }
- }
+
#ifdef __HYPHYMPI__
}
ReportWarning (_String ("Node ") & (long)rank & " is shutting down\n");
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/d482898df35710ff14413712f36f8cac7cd945b5
--
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/d482898df35710ff14413712f36f8cac7cd945b5
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210825/b2f678ca/attachment-0001.htm>
More information about the debian-med-commit
mailing list