[med-svn] [Git][med-team/hyphy][upstream] New upstream version 2.5.38+dfsg
Andreas Tille (@tille)
gitlab at salsa.debian.org
Mon May 2 12:50:09 BST 2022
Andreas Tille pushed to branch upstream at Debian Med / hyphy
Commits:
2147996c by Andreas Tille at 2022-05-02T12:18:06+02:00
New upstream version 2.5.38+dfsg
- - - - -
19 changed files:
- res/TemplateBatchFiles/LEISR.bf
- res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
- res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
- res/TemplateBatchFiles/files.lst
- res/TemplateBatchFiles/libv3/all-terms.bf
- res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
- res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf
- res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf
- res/TemplateBatchFiles/libv3/models/codon/MSS.bf
- res/TemplateBatchFiles/libv3/models/model_functions.bf
- res/TemplateBatchFiles/libv3/models/parameters.bf
- res/TemplateBatchFiles/libv3/tasks/ancestral.bf
- src/core/fstring.cpp
- src/core/global_things.cpp
- src/core/include/function_templates.h
- src/core/include/matrix.h
- src/core/matrix.cpp
- src/core/tree_evaluator.cpp
- src/mains/unix.cpp
Changes:
=====================================
res/TemplateBatchFiles/LEISR.bf
=====================================
@@ -1,4 +1,4 @@
-RequireVersion("2.3.5");
+RequireVersion("2.5.25");
LoadFunctionLibrary("libv3/UtilityFunctions.bf");
@@ -31,7 +31,7 @@ utility.ToggleEnvVariable ("NORMALIZE_SEQUENCE_NAMES", 1);
leisr.analysis_description = {
terms.io.info: "LEISR (Likelihood Estimation of Individual Site Rates) infer relative amino-acid or nucleotide rates from a fixed nucleotide or amino-acid alignment and tree, with possibility for partitions. Relative site-specific substitution rates are
inferred by first optimizing alignment-wide branch lengths, and then inferring a site-specific uniform tree scaler.",
- terms.io.version: "0.4",
+ terms.io.version: "0.5",
terms.io.reference: "Spielman, S.J. and Kosakovsky Pond, S.L. (2018). Relative evolutionary rate inference in HyPhy with PeerJ 6:e4339. DOI 10.7717/peerj.4339 ; Pupko, T., Bell, R. E., Mayrose, I., Glaser, F. & Ben-Tal, N. Rate4Site: an algorithmic tool for the identification of functional regions in proteins by surface mapping of evolutionary determinants within their homologues. Bioinformatics 18, S71–S77 (2002).",
terms.io.authors: "Sergei L Kosakovsky Pond and Stephanie J Spielman",
terms.io.contact: "{spond,stephanie.spielman}@temple.edu"
@@ -42,7 +42,11 @@ io.DisplayAnalysisBanner(leisr.analysis_description);
/***************************************** LOAD DATASET **********************************************************/
-SetDialogPrompt ("Specify a multiple sequence alignment file");
+
+KeywordArgument ("alignment", "A multiple sequence alignment file in one of the formats supported by HyPhy");
+KeywordArgument ("tree", "A phylogenetic tree (optionally annotated with {})", null, "Please select a tree file for the data:");
+
+SetDialogPrompt ("A multiple sequence alignment file");
leisr.alignment_info = alignments.ReadNucleotideDataSet ("leisr.dataset", None);
leisr.name_mapping = leisr.alignment_info[utility.getGlobalValue("terms.data.name_mapping")];
@@ -57,7 +61,6 @@ leisr.partition_count = Abs (leisr.partitions_and_trees);
leisr.filter_specification = alignments.DefineFiltersForPartitions (leisr.partitions_and_trees, "leisr.dataset" , "leisr.filter.", leisr.alignment_info);
-
io.ReportProgressMessageMD ("relative_rates", "Data", "Input alignment description");
io.ReportProgressMessageMD ("relative_rates", "Data", "Loaded **" +
leisr.alignment_info [terms.data.sequences] + "** sequences, **" +
@@ -71,11 +74,15 @@ io.ReportProgressMessageMD ("relative_rates", "Data", "Loaded **" +
leisr.protein_type = "Protein";
leisr.nucleotide_type = "Nucleotide";
+
+KeywordArgument ("type", "Data type (`leisr.protein_type` or `leisr.nucleotide_type`)", leisr.nucleotide_type);
+
leisr.analysis_type = io.SelectAnOption ({{leisr.protein_type , "Infer relative rates from a protein (amino-acid) alignment"}, {leisr.nucleotide_type, "Infer relative rates from a nucleotide alignment"}},
"Select your analysis type:");
+KeywordArgument ("model", "Substitution model");
-if (leisr.analysis_type == leisr.protein_type) {
+if (leisr.analysis_type == leisr.protein_type) {
leisr.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Select a protein model:");
leisr.generators = models.protein.empirical.plusF_generators;
}
@@ -85,6 +92,9 @@ else {
leisr.generators = models.DNA.generators;
}
+KeywordArgument ("rate-variation", "Optimize branch lengths with rate variation?", "No");
+
+
leisr.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category discrete gamma distribution when optimizing branch lengths."},
{"GDD", "Use a four-category general discrete distribution when optimizing branch lengths."},
{"No", "Do not consider rate variation when optimizing branch lengths."}
@@ -92,8 +102,7 @@ leisr.use_rate_variation = io.SelectAnOption( {{"Gamma", "Use a four-category di
"Optimize branch lengths with rate variation?");
function leisr.Baseline.ModelDescription(type){
- def = Call( leisr.generators[leisr.baseline_model], type);
- return def;
+ return Call( leisr.generators[leisr.baseline_model], type);
}
function leisr.Baseline.ModelDescription.withGamma(type){
@@ -365,8 +374,11 @@ selection.io.json_store_key_value_pair (leisr.json_content, None, utility.getGlo
leisr.filter_specification);
-
-io.SpoolJSON (leisr.json_content, leisr.alignment_info[terms.data.file] + ".LEISR.json");
+KeywordArgument ("output", "Write the analysis report file to", leisr.alignment_info[terms.data.file] + ".LEISR.json");
+leisr.json_content [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");
+
+
+io.SpoolJSON (leisr.json_content, leisr.json_content [terms.json.json]);
//----------------------------------------------------------------------------------------
@@ -388,12 +400,12 @@ lfunction leisr.handle_a_site (lf, filter_data, partition_index, pattern_info, m
utility.SetEnvVariable ("USE_LAST_RESULTS", TRUE);
parameters.SetValue (^"leisr.site_model_scaler_name", 1);
- /*
+ /*
- SLKP 20171210
- RESET the global rate value to a default avoid weird optimization issues because of bad starting conditions
- (e.g. if the previous site was saturated)
- */
+ SLKP 20171210
+ RESET the global rate value to a default avoid weird optimization issues because of bad starting conditions
+ (e.g. if the previous site was saturated)
+ */
if (pattern_info [utility.getGlobalValue("terms.data.is_constant")]) {
@@ -404,13 +416,37 @@ lfunction leisr.handle_a_site (lf, filter_data, partition_index, pattern_info, m
results[1][0] = estimators.ComputeLF (lf);
} else {
-
- ^(utility.getGlobalValue("leisr.site_model_scaler_name")) = 1;
- Optimize (results, ^lf);
+ scaler = utility.getGlobalValue("leisr.site_model_scaler_name");
+ start.grid = {
+ "0" : {
+ scaler : 0.01
+ },
+ "1" : {
+ scaler : 0.1
+ },
+ "2" : {
+ scaler : 0.5
+ },
+ "3" : {
+ scaler : 1
+ },
+ "4" : {
+ scaler : 2
+ },
+ "5" : {
+ scaler : 10
+ },
+ };
+
+ Optimize (results, ^lf, {
+ "OPTIMIZATION_PRECISION" : 1e-5,
+ "OPTIMIZATION_START_GRID" : start.grid
+ });
+
+
}
-
- profile = parameters.GetProfileCI (utility.getGlobalValue("leisr.site_model_scaler_name"), lf, 0.95);
+ profile = parameters.GetProfileCI (utility.getGlobalValue("leisr.site_model_scaler_name"), lf, 0.95);
profile[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
return profile;
=====================================
res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
=====================================
@@ -1,4 +1,4 @@
-RequireVersion("2.5.7");
+RequireVersion("2.5.37");
/*------------------------------------------------------------------------------
Load library files
@@ -22,8 +22,6 @@ LoadFunctionLibrary("modules/selection_lib.ibf");
-
-
debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");
/*------------------------------------------------------------------------------
Display analysis information
@@ -40,7 +38,7 @@ prime.analysis_description = {
inferred -- the non-synonymous rate on branches NOT selected for testing. Multiple partitions within a NEXUS file are also supported
for recombination - aware analysis.
",
- terms.io.version: "0.0.1",
+ terms.io.version: "0.1.0",
terms.io.reference: "TBD",
terms.io.authors: "Sergei L. Kosakovsky Pond",
terms.io.contact: "spond at temple.edu",
@@ -74,7 +72,7 @@ prime.pvalue = 0.1;
prime.json = {
terms.json.analysis: prime.analysis_description,
terms.json.fits: {},
- terms.json.timers: {},
+ terms.json.timers: {}
};
prime.display_orders = {
@@ -110,19 +108,21 @@ the next set of variables.
prime.site.composition.string = "";
-prime.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "Property Description", "Importance", "p-value","Most common codon substitutions at this site"}};
+prime.table_screen_output = {{"Codon", "Partition", "#subs", "#AA", "alpha", "beta", "Property Description", "Importance", "p-value","Most common codon substitutions at this site"}};
prime.table_output_options = { terms.table_options.header : TRUE,
terms.table_options.minimum_column_width: 12,
terms.table_options.align : "center",
terms.table_options.column_widths: {
"0" : 12,
"1" : 12,
- "2" : 12,
- "3" : 12,
- "4" : 40,
+ "2" : 8,
+ "3" : 8,
+ "4" : 12,
"5" : 12,
- "6" : 12,
- "7" : 70}
+ "6" : 40,
+ "7" : 12,
+ "8" : 12,
+ "9" : 70}
};
@@ -142,6 +142,12 @@ prime.property_set = io.SelectAnOption (
},
"The set of properties to use in the model");
+if (prime.property_set == "Custom") {
+ KeywordArgument ("property-file", "JSON file which defines amino-acid properties");
+ prime.property_set = io.ParseJSON(io.PromptUserForFilePathRead ("JSON file which defines amino-acid properties"));
+ console.log (">Loaded a set of `Abs(prime.property_set)` properties");
+}
+
KeywordArgument ("impute-states", "Use site-level model fits to impute likely character states for each sequence", "No");
prime.impute_states = io.SelectAnOption (
@@ -294,6 +300,10 @@ prime.site.property_model = model.generic.DefineModel("models.codon.MG_REV_PROP
},
prime.filter_names,
None);
+
+
+prime.json [terms.model.residue_properties] = prime.site.property_model [terms.model.residue_properties];
+prime.json [terms.translation_table] = prime.site.property_model [terms.translation_table];
prime.properties = prime.site.property_model [terms.model.residue_properties];
@@ -309,14 +319,18 @@ prime.lambda_range = {
-prime.table_headers = { 6 + 3*prime.properties.N, 2};
+prime.table_headers = { 10 + 3*prime.properties.N, 2};
-prime.table_headers[0][0] = "alpha;"; prime.table_headers[0][1] = "Synonymous substitution rate at a site";
-prime.table_headers[1][0] = "β"; prime.table_headers[1][1] = "Log of property independent non-synonymous rate a site";
-prime.table_headers[2][0] = "Total branch length"; prime.table_headers[2][1] = "The total length of branches contributing to inference at this site, and used to scale dN-dS";
-prime.table_headers[3][0] = "PRIME LogL"; prime.table_headers[3][1] = "Site Log-likelihood under the PRIME model";
-prime.table_headers[4][0] = "FEL LogL"; prime.table_headers[4][1] = "Site Log-likelihood under the FEL model";
-prime.table_headers[5][0] = "p-value"; prime.table_headers[5][1] = "Omnibus p-value (any property is important)";
+prime.table_headers[0][0] = "α"; prime.table_headers[0][1] = "Synonymous substitution rate at a site";
+prime.table_headers[1][0] = "β"; prime.table_headers[1][1] = "Property independent non-synonymous rate a site";
+prime.table_headers[2][0] = "FEL α"; prime.table_headers[2][1] = "Synonymous substitution rate at a site (FEL model)";
+prime.table_headers[3][0] = "FEL β"; prime.table_headers[3][1] = "Log of property independent non-synonymous rate a site";
+prime.table_headers[4][0] = "Total branch length"; prime.table_headers[4][1] = "The total length of branches contributing to inference at this site, and used to scale dN-dS";
+prime.table_headers[5][0] = "# subs"; prime.table_headers[5][1] = "The number of amino-acid substitutions that occurred at this site";
+prime.table_headers[6][0] = "# aa"; prime.table_headers[6][1] = "The number of unique amino-acids occurring at that site";
+prime.table_headers[7][0] = "PRIME LogL"; prime.table_headers[7][1] = "Site Log-likelihood under the PRIME model";
+prime.table_headers[8][0] = "FEL LogL"; prime.table_headers[8][1] = "Site Log-likelihood under the FEL model";
+prime.table_headers[9][0] = "p-value"; prime.table_headers[9][1] = "Omnibus p-value (any property is important)";
prime.lambdas = {};
@@ -325,7 +339,7 @@ io.ReportProgressMessageMD ("PRIME", "property-description", "Using the followin
prime.i = 1;
prime.local_to_property_name = {};
-prime.p_value_indices = {"Overall" : 5};
+prime.p_value_indices = {"Overall" : 9};
prime.report.count = {"Overall" : 0};
@@ -333,9 +347,9 @@ for (key, value; in; prime.properties ) {
io.ReportProgressMessageMD ("PRIME", "property-description", "* " + key);
prime.p = prime.property_variable_prefix + (prime.site.property_model[terms.model.residue_name_map])[key];
- prime.table_headers [3+prime.i*3][0] = "λ" + prime.i; prime.table_headers [3 + prime.i*3][1] = "Importance for " + key;
- prime.table_headers [4+prime.i*3][0] = "p" + prime.i; prime.table_headers [4 + prime.i*3][1] = "p-value for non-zero effect of " + key;
- prime.table_headers [5+prime.i*3][0] = "LogL" + prime.i; prime.table_headers [5 + prime.i*3][1] = "Log likelihood when there is no effect of " + key;
+ prime.table_headers [7+prime.i*3][0] = "λ" + prime.i; prime.table_headers [7 + prime.i*3][1] = "Importance for " + key;
+ prime.table_headers [8+prime.i*3][0] = "p" + prime.i; prime.table_headers [8 + prime.i*3][1] = "p-value for non-zero effect of " + key;
+ prime.table_headers [9+prime.i*3][0] = "LogL" + prime.i; prime.table_headers [9 + prime.i*3][1] = "Log likelihood when there is no effect of " + key;
model.generic.AddGlobal (prime.site.property_model, prime.p , key);
parameters.DeclareGlobal ( prime.p, {});
@@ -346,12 +360,13 @@ for (key, value; in; prime.properties ) {
prime.lambdas [parameter.local_lambda] = prime.p;
prime.local_to_property_name [parameter.local_lambda] = key;
prime.property_to_index [key] = prime.i-1;
- prime.p_value_indices[key] = 4+prime.i*3;
+ prime.p_value_indices[key] = 8+prime.i*3;
prime.report.count[key] = 0;
prime.i += 1;
}
+
model.generic.AddGlobal (prime.site.property_model, prime.baseline_non_syn_rate , prime.parameter_site_beta);
parameters.DeclareGlobal ( prime.baseline_non_syn_rate, {});
//parameters.SetRange ( prime.baseline_non_syn_rate, prime.lambda_range);
@@ -384,6 +399,8 @@ function prime.rate_to_screen (name, value) {
prime.report.significant_site = {{"" + (1+((prime.filter_specification[prime.report.partition])[terms.data.coverage])[prime.report.site]),
prime.report.partition + 1,
+ Format(prime.report.row[5],6,0), // #subs
+ Format(prime.report.row[6],6,0), // #AA
Format(prime.report.row[0],7,3), // alpha
Format(prime.report.row[1],7,3), // beta
prime.property_report_name, // property name
@@ -395,6 +412,7 @@ prime.report.significant_site = {{"" + (1+((prime.filter_specification[prime.rep
prime.site_results = {};
prime.imputed_leaf_states = {};
+prime.sub_mapping = {};
for (prime.partition_index = 0; prime.partition_index < prime.partition_count; prime.partition_index += 1) {
@@ -479,9 +497,8 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p
/* run the main loop over all unique site pattern combinations */
prime.pattern_count = 1;
- utility.ForEachPair (prime.site_patterns, "_pattern_", "_pattern_info_",
- '
- io.ReportProgressBar("", "Working on site pattern " + (prime.pattern_count) + "/" + Abs (prime.site_patterns));
+ for (_pattern_, _pattern_info_; in; prime.site_patterns) {
+ io.ReportProgressBar("", "Working on site pattern " + (prime.pattern_count) + "/" + Abs (prime.site_patterns));
if (_pattern_info_[utility.getGlobalValue("terms.data.is_constant")]) {
prime.store_results (-1,None,{"0" : "prime.site_likelihood",
"1" : "prime.site_likelihood_property",
@@ -503,19 +520,16 @@ for (prime.partition_index = 0; prime.partition_index < prime.partition_count; p
"prime.store_results");
}
prime.pattern_count += 1;
- '
- );
+ }
mpi.QueueComplete (prime.queue);
prime.partition_matrix = {Abs (prime.site_results[prime.partition_index]), Rows (prime.table_headers)};
-
- utility.ForEachPair (prime.site_results[prime.partition_index], "_key_", "_value_",
- '
+
+ for (_key_, _value_; in; prime.site_results[prime.partition_index]) {
for (prime.index = 0; prime.index < Rows (prime.table_headers); prime.index += 1) {
prime.partition_matrix [0+_key_][prime.index] = _value_[prime.index];
}
- '
- );
+ }
prime.site_results[prime.partition_index] = prime.partition_matrix;
@@ -530,6 +544,9 @@ if (prime.impute_states) {
(prime.json [terms.json.MLE])[terms.prime_imputed_states] = prime.imputed_leaf_states;
}
+(prime.json [terms.json.MLE])[terms.substitutions] = prime.sub_mapping;
+
+
for (key, value; in; prime.report.count) {
io.ReportProgressMessageMD ("PRIME", "results", "** Found _" + value + "_ sites where " + key + " property was important at p <= " + prime.pvalue + "**");
}
@@ -586,7 +603,7 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
//console.log (pattern_info);
GetString (lfInfo, ^lf_fel,-1);
- //utility.SetEnvVariable ("VERBOSITY_LEVEL", 10);
+ //utility.SetEnvVariable ("VERBOSITY_LEVEL", 1);
//TODO Datafilters hardcode, Trees hardcode.
ExecuteCommands (filter_data);
@@ -607,7 +624,7 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
^"prime.site_beta" :> 0;
Optimize (results, ^lf_fel
- , {"OPTIMIZATION_METHOD" : "nedler-mead", OPTIMIZATION_PRECISION: 1e-4}
+ , {"OPTIMIZATION_METHOD" : "nedler-mead", "OPTIMIZATION_PRECISION": 1e-4}
);
fel = estimators.ExtractMLEs (lf_fel, model_mapping);
fel[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
@@ -628,37 +645,119 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
parameters.SetConstraint ("prime.site_beta", Eval(^"prime.site_beta"),"");
+
start_grid = {};
propN = utility.Array1D (^"prime.lambdas");
- for (sp = 0; sp < 100; sp += 1) {
- point = {};
- point ["prime.site_alpha"] = Random (0.75, 1.25);
- point ["prime.site_beta"] = Random (0.5, 1.5);
- point ["prime.site_beta_nuisance"] = Random (0.5, 1.5);
+ point = {};
+ point ["prime.site_alpha"] = ^"prime.site_alpha";
+ point ["prime.site_beta"] = ^"prime.site_beta";
+ save_beta = ^"prime.site_beta";
+ point ["prime.site_beta_nuisance"] = ^"prime.site_beta_nuisance";
+
+ for (l; in; ^"prime.lambdas") {
+ point [l] = 0;
+ }
+ start_grid + point;
+ propNames = utility.Values (^"prime.lambdas");
+ ranges = {};
+ r_unit = {
+ ^"terms.lower_bound" : -1,
+ ^"terms.upper_bound" : 1,
+ };
+
+ for (k,l; in; ^"prime.lambdas") {
+ ranges[l] = r_unit;
+ }
+
+
+ for (sp = 0; sp < 20; sp += 1) {
for (l; in; ^"prime.lambdas") {
- point [l] = Random (-0.5,0.5);
+ point [l] = 0.0;
}
+ point [propNames [Random (0, propN) $ 1]] = Random (-1,1);
start_grid + point;
}
+
+ for (sp; in; estimators.LHC (ranges, 40)) {
+ point = {};
+
+ point ["prime.site_alpha"] = Random (^"prime.site_alpha" * 0.5, ^"prime.site_alpha" * 2.0);
+ point ["prime.site_beta"] = Random (^"prime.site_beta" * 0.5, ^"prime.site_beta" * 2.0);
+
+ for (l,v; in; sp) {
+ point [l] = v [^"terms.fit.MLE"];
+ }
+ start_grid + point;
+
+ point ["prime.site_alpha"] = ^"prime.site_alpha";
+ point ["prime.site_beta"] = ^"prime.site_beta";
+
+ start_grid + point;
+
+ }
//console.log (start_grid);
- //Export (lfe, ^lf_prop);
- //fprintf ("/Users/sergei/Desktop/prime." + ^"MPI_NODE_ID" + ".bf",CLEAR_FILE,lfe);
-
+ // Export (lfe, ^lf_prop);
+ // fprintf ("/Users/sergei/Desktop/PRIME/site." + (pattern_info["sites"])[0] + ".bf",CLEAR_FILE,lfe);
+
Optimize (results, ^lf_prop, {
"OPTIMIZATION_METHOD" : "nedler-mead",
//"OPTIMIZATION_METHOD" : "gradient-descent",
- "OPTIMIZATION_START_GRID" : start_grid
+ "OPTIMIZATION_START_GRID" : start_grid,
+ "OPTIMIZATION_PRECISION": 1e-4
});
+
+ //console.log ("\n" + ^"LF_INITIAL_GRID_MAXIMUM_VALUE" + "\n"+ ^"LF_INITIAL_GRID_MAXIMUM" + " : " + results[1][0] + "\n");
+ Optimize (results, ^lf_prop);
+
+ //console.log ("\n" + results[1][0] + "\n");
- //console.log ("\n" + ^"LF_INITIAL_GRID_MAXIMUM" + " : " + results[1][0] + "\n");
+ altL = results[1][0];
+
+ // fit all of the nulls
+
+ constrained_models = {};
+
+ alternative = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {});
+
+ while (TRUE) {
+ done = TRUE;
+ for (k,l; in; ^"prime.lambdas") {
+ ^l = 0;
+ // FORCE UPDATE DEPENDENT VARIABLES
+ LFCompute (^lf_prop,LF_START_COMPUTE);
+ LFCompute (^lf_prop,results);
+ LFCompute (^lf_prop,LF_DONE_COMPUTE);
+ ^l := 0;
+ Optimize (results, ^lf_prop);
+ //console.log (k + " => " + (- results[1][0] + altL));
+ if (results[1][0] - altL > 1e-2) {
+ done = FALSE;
+ break;
+ }
+ constrained_models[k] = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {^"terms.globals_only" : TRUE});
+ (constrained_models[k])[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ ^l = 0;
+ estimators.ApplyExistingEstimates (lf_prop, model_mapping, alternative, ^"terms.globals_only");
+ }
+ if (done) {
+ break;
+ } else {
+ ^l = 0;
+ Optimize (results, ^lf_prop);
+ altL = results[1][0];
+ //console.log ("REOPTIMIZED : " + altL);
+ alternative = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {});
+
+ }
+ }
character_map = None;
if (^"prime.impute_states") {
DataSet anc = ReconstructAncestors ( ^lf_prop, {{0}}, MARGINAL, DOLEAVES);
@@ -679,32 +778,12 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
ancestral_info = ancestral.build (lf_prop,0,FALSE);
branch_substitution_information = (ancestral.ComputeSubstitutionBySite (ancestral_info,0,None))[^"terms.substitutions"];
+ branch_mapping = ancestral.ComputeCompressedSubstitutionsBySite (ancestral_info,0);
DeleteObject (ancestral_info);
- alternative = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {});
- alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
- //console.log (results[1][0]);
-
-
- // fit all of the nulls
-
- constrained_models = {};
+ //console.log (branch_substitution_information);
- for (k,l; in; ^"prime.lambdas") {
- ^l = 0;
- // FORCE UPDATE DEPENDENT VARIABLES
- LFCompute (^lf_prop,LF_START_COMPUTE);
- LFCompute (^lf_prop,results);
- LFCompute (^lf_prop,LF_DONE_COMPUTE);
- ^l := 0;
- Optimize (results, ^lf_prop);
- //console.log (results[1][0]);
- constrained_models[k] = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {^"terms.globals_only" : TRUE});
- (constrained_models[k])[utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
- ^l = 0;
- estimators.ApplyExistingEstimates (lf_prop, model_mapping, alternative, ^"terms.globals_only");
- }
-
+ alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = altL;
return {
@@ -712,6 +791,7 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
utility.getGlobalValue("terms.alternative") : alternative,
utility.getGlobalValue("terms.Null"): Null,
utility.getGlobalValue("terms.model.residue_properties") : constrained_models,
+ utility.getGlobalValue("terms.substitutions") : branch_mapping,
utility.getGlobalValue("terms.branch_selection_attributes") : branch_substitution_information,
utility.getGlobalValue("terms.prime_imputed_states") : character_map
};
@@ -761,27 +841,45 @@ function prime.report.echo (prime.report.site, prime.report.partition, prime.rep
lfunction prime.store_results (node, result, arguments) {
sub_profile = result[utility.getGlobalValue("terms.branch_selection_attributes")];
-
-
-
- if (None != sub_profile) {
-
- total_sub_count = 0;
+ sub_map = result[utility.getGlobalValue("terms.substitutions")];
+ pattern_info = arguments [4];
-
+ if (None != sub_profile) {
+ total_sub_count = 0;
sub_by_count = {"Overall" : {}};
for (p,v; in; ^"prime.properties") {
sub_by_count[p] = {};
}
+
+ sub_counts = 0;
+ aa_counts = {};
+
+ partition_index = arguments [3];
+
for (i, v; in; sub_profile) {
for (j, b; in; v) {
c = Abs (b);
total_sub_count += c;
ai = (^"prime.codon_table")[i];
aj = (^"prime.codon_table")[j];
+
+
+
+ if (Abs (ai) && Abs (aj)) {
+ if (ai != aj) {
+ for (idx, br; in; b) {
+ if (((^"prime.selected_branches")[partition_index])[br] == ^"terms.tree_attributes.test") {
+ sub_counts += 1;
+ }
+ }
+ }
+ aa_counts [ai] = 1;
+ aa_counts [aj] = 1;
+ }
+
if (Abs (ai) == 0) {ai = "?";}
if (Abs (aj) == 0) {aj = "?";}
@@ -804,7 +902,7 @@ lfunction prime.store_results (node, result, arguments) {
}
}
}
-
+
sub_profile = {};
for (p,count; in; sub_by_count) {
@@ -825,17 +923,15 @@ lfunction prime.store_results (node, result, arguments) {
}
- partition_index = arguments [3];
- pattern_info = arguments [4];
- nrows = 6 + 3*^"prime.properties.N";
+ nrows = 10 + 3*^"prime.properties.N";
result_row = { nrows, 1 };
for (i; in; ^"prime.p_value_indices") {
result_row [i] = 1.;
}
-
+
if (None != result) { // not a constant site
omninbus_ll = (result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")];
lrt = {utility.getGlobalValue("terms.LRT") : 2*(omninbus_ll-(result["fel"])[utility.getGlobalValue("terms.fit.log_likelihood")])};
@@ -843,8 +939,12 @@ lfunction prime.store_results (node, result, arguments) {
result_row [0] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], utility.getGlobalValue("prime.parameter_site_alpha"));
result_row [1] = estimators.GetGlobalMLE (result[utility.getGlobalValue("terms.alternative")], utility.getGlobalValue("prime.parameter_site_beta"));
- result_row [3] = (result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")];
- result_row [4] = (result["fel"])[utility.getGlobalValue("terms.fit.log_likelihood")];
+ result_row [2] = estimators.GetGlobalMLE (result["fel"], utility.getGlobalValue("prime.parameter_site_alpha"));
+ result_row [3] = estimators.GetGlobalMLE (result["fel"], utility.getGlobalValue("prime.parameter_site_beta"));
+ result_row [5] = sub_counts;
+ result_row [6] = utility.Array1D (aa_counts);
+ result_row [7] = (result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.fit.log_likelihood")];
+ result_row [8] = (result["fel"])[utility.getGlobalValue("terms.fit.log_likelihood")];
p_values = {"Overall" : lrt [utility.getGlobalValue("terms.p_value")]};
//result_row [5] = lrt [utility.getGlobalValue("terms.p_value")];
@@ -857,38 +957,44 @@ lfunction prime.store_results (node, result, arguments) {
sum += (alternative_lengths[_node_])[utility.getGlobalValue("terms.json.MLE")];
}
}
- result_row [2] = sum;
+ result_row [4] = sum;
+
for (key, prop; in; result[utility.getGlobalValue ("terms.model.residue_properties")]) {
- property_index = ((^"prime.property_to_index")[(^"prime.local_to_property_name")[key]])*3+6;
+ property_index = ((^"prime.property_to_index")[(^"prime.local_to_property_name")[key]])*3+10;
ll = prop[utility.getGlobalValue("terms.fit.log_likelihood")];
pv = 1-CChi2 (2*(omninbus_ll-ll),1);
rate = (^"prime.local_to_property_name")[key];
rate = ((((result[utility.getGlobalValue("terms.alternative")])[utility.getGlobalValue("terms.global")])[rate])[utility.getGlobalValue("terms.fit.MLE")]);
+ //console.log (omninbus_ll);
result_row [property_index][0] = rate;
p_values [(^"prime.local_to_property_name")[key]] = pv;
result_row [property_index+2][0] = ll;
}
+
p_values = math.HolmBonferroniCorrection(p_values);
- result_row[5] = p_values["Overall"];
+ result_row[(^"prime.p_value_indices")["Overall"]] = p_values["Overall"];
for (key, prop; in; result[utility.getGlobalValue ("terms.model.residue_properties")]) {
- property_index = ((^"prime.property_to_index")[(^"prime.local_to_property_name")[key]])*3+6;
- result_row [property_index+1] = p_values[(^"prime.local_to_property_name")[key]];
+ property_index = ((^"prime.p_value_indices")[(^"prime.local_to_property_name")[key]]);
+ result_row [property_index] = p_values[(^"prime.local_to_property_name")[key]];
}
}
+
+ //console.log (result_row);
utility.EnsureKey (^"prime.site_results", partition_index);
utility.EnsureKey (^"prime.imputed_leaf_states", partition_index);
-
+ utility.EnsureKey (^"prime.sub_mapping", partition_index);
sites_mapping_to_pattern = pattern_info[utility.getGlobalValue("terms.data.sites")];
sites_mapping_to_pattern.count = utility.Array1D (sites_mapping_to_pattern);
-
+
for (i = 0; i < sites_mapping_to_pattern.count; i+=1) {
site_index = sites_mapping_to_pattern[i];
((^"prime.site_results")[partition_index])[site_index] = result_row;
+ ((^"prime.sub_mapping")[partition_index])[site_index] = sub_map;
((^"prime.imputed_leaf_states")[partition_index])[site_index] = result[^"terms.prime_imputed_states"];
prime.report.echo (site_index, partition_index, result_row);
}
=====================================
res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
=====================================
@@ -45,7 +45,7 @@ relax.analysis_description = {
Version 4.1 adds further support for multiple hit models",
terms.io.version : "3.1.1",
terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832",
- terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group",
+ terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution g",
terms.io.contact : "spond at temple.edu",
terms.io.requirements : "in-frame codon alignment and a phylogenetic tree, with at least two groups of branches defined using the {} notation (one group can be defined as all unlabeled branches)"
};
@@ -63,6 +63,7 @@ relax.synonymous_rate_classes = 3;
relax.SRV = "Synonymous site-to-site rates";
relax.json.srv_posteriors = "Synonymous site-posteriors";
relax.json.srv_viterbi = "Viterbi synonymous rate path";
+relax.kGroupMode = "Group mode";
relax.initial_ranges = {};
@@ -172,7 +173,8 @@ relax.multi_hit = io.SelectAnOption ({
-if (relax.numbers_of_tested_groups == 2) {
+
+if (relax.analysis_run_mode != relax.kGroupMode) {
relax.branch_sets = {relax.test_branches_name : "test",
relax.reference_branches_name : "reference"};
} else {
@@ -193,14 +195,15 @@ function relax.echo_group (group, description) {
utility.ForEachPair (relax.selected_branches, "_partition_", "_selection_",
"_selection_ = utility.Filter (_selection_, '_selector_', '_selector_ == group');
relax.group_choices[group] = 'Set ' + group + ' with ' + utility.Array1D (_selection_) + ' branches';
- io.ReportProgressMessageMD('RELAX', 'selector', '* Selected ' + Abs(_selection_) + ' branches as the _`group`_ set: \\\`' + Join (', ',utility.Keys(_selection_)) + '\\\`')");
+ io.ReportProgressMessageMD('RELAX', 'selector', '\n* Selected ' + Abs(_selection_) + ' branches as the _`group`_ set: \\\`' + Join (', ',utility.Keys(_selection_)) + '\\\`')");
};
+
relax.branch_sets ["relax.echo_group"][""];
-if (relax.numbers_of_tested_groups > 2) {
+if (relax.analysis_run_mode == relax.kGroupMode) {
KeywordArgument ("reference-group", "Branches to use as the reference group", null);
relax.reference_set_name = io.SelectAnOption (relax.group_choices, "Choose the set of branches to use as the _reference_ group");
if (relax.branch_sets [relax.reference_set_name] > 0) {
@@ -615,7 +618,7 @@ if (relax.model_set == "All") { // run all the models
/* now fit the two main models for RELAX */
-if (relax.numbers_of_tested_groups == 2) {
+if (relax.analysis_run_mode == relax.kGroupMode) {
io.ReportProgressMessageMD ("RELAX", "alt", "Fitting the alternative model to test K != 1");
} else {
io.ReportProgressMessageMD ("RELAX", "alt", "Fitting the alternative model with individual K parameters for " + relax.numbers_of_tested_groups + " branch groups");
@@ -627,7 +630,7 @@ selection.io.startTimer (relax.json [terms.json.timers], "RELAX alternative mode
relax.model_object_map = {};
relax.model_to_relax_parameter = {};
-if (relax.numbers_of_tested_groups == 2) {
+if (relax.analysis_run_mode != relax.kGroupMode) {
relax.model_object_map ["relax.reference"] = None;
relax.reference_model_namespace = "relax.reference";
relax.model_object_map ["relax.test"] = None;
@@ -941,7 +944,10 @@ function relax.FitMainTestPair () {
relax.alternative_model.fit = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, relax.general_descriptive.fit, relax.model_object_map, {terms.run_options.retain_lf_object: TRUE});
io.ReportProgressMessageMD("RELAX", "alt", "* " + selection.io.report_fit (relax.alternative_model.fit, 9, relax.codon_data_info[terms.data.sample_size]));
-
+ KeywordArgument ("save-fit", "Save RELAX alternative model fit to this file (default is not to save)", "/dev/null");
+ relax.save_fit_path = io.PromptUserForFilePath ("Save RELAX model fit to this file ['/dev/null' to skip]");
+ io.SpoolLFToPath(relax.alternative_model.fit[terms.likelihood_function], relax.save_fit_path);
+
if (relax.numbers_of_tested_groups == 2) {
relax.fitted.K = estimators.GetGlobalMLE (relax.alternative_model.fit,terms.relax.k);
@@ -1011,6 +1017,7 @@ function relax.FitMainTestPair () {
selection.io.report_dnds (relax.inferred_distribution_ref);
relax.alternative_model.fit = relax.alternative_model.fit.take2;
+ io.SpoolLFToPath(relax.alternative_model.fit.take2[terms.likelihood_function], relax.save_fit_path);
}
DeleteObject (relax.alternative_model.fit.take2);
@@ -1038,9 +1045,7 @@ function relax.FitMainTestPair () {
relax.report_multi_hit (relax.alternative_model.fit, relax.distribution_for_json, "RELAX", "alt-mh");
relax._report_srv (relax.alternative_model.fit, FALSE);
- KeywordArgument ("save-fit", "Save RELAX alternative model fit to this file (default is not to save)", "/dev/null");
- io.SpoolLFToPath(relax.alternative_model.fit[terms.likelihood_function], io.PromptUserForFilePath ("Save RELAX model fit to this file ['/dev/null' to skip]"));
-
+
selection.io.json_store_lf (relax.json,
relax.alternative_name,
relax.alternative_model.fit[terms.fit.log_likelihood],
@@ -1523,7 +1528,7 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) {
//------------------------------------------------------------------------------
lfunction relax.select_branches(partition_info) {
- kGroupMode = "Group mode";
+ kGroupMode = ^"relax.kGroupMode";
io.CheckAssertion("utility.Array1D (`&partition_info`) == 1", "RELAX only works on a single partition dataset");
available_models = {};
@@ -1535,7 +1540,8 @@ lfunction relax.select_branches(partition_info) {
utility.ForEach (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_value_", "`&available_models`[_value_] += 1");
list_models = utility.sortStrings(utility.Keys(available_models)); // get keys
option_count = Abs (available_models);
-
+
+
io.CheckAssertion ("`&option_count` >= 2", "RELAX requires at least one designated set of branches in the tree.");
nontrivial_groups = option_count;
@@ -1546,20 +1552,25 @@ lfunction relax.select_branches(partition_info) {
run_mode = None;
- if (nontrivial_groups >= 3) { // could run as a group set
+
+
+ if (nontrivial_groups >= 2) { // could run as a group set
run_mode = io.SelectAnOption ({
{kGroupMode, "Run the test for equality of selective regimes among " + nontrivial_groups + " groups of branches"}
{"Classic mode", "Select one test and one reference group of branches, with the rest of the branches treated as unclassified"}
}, "Group test mode");
if (run_mode == kGroupMode) {
utility.SetEnvVariable ("relax.numbers_of_tested_groups", nontrivial_groups);
- utility.ForEachPair (tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")], "_key_", "_value_", "
- if ('' == _value_ ) {
- `&tree_configuration`[_key_] = utility.getGlobalValue('relax.unclassified_branches_name');
- } else {
- `&tree_configuration`[_key_] = _value_;
- }
- ");
+
+ for (_key_, _value_; in; tree_for_analysis[utility.getGlobalValue("terms.trees.model_map")]) {
+ if ('' == _value_ ) {
+ tree_configuration[_key_] = utility.getGlobalValue('relax.unclassified_branches_name');
+ } else {
+ tree_configuration[_key_] = _value_;
+ }
+ }
+
+ utility.SetEnvVariable ("relax.analysis_run_mode", kGroupMode);
}
}
=====================================
res/TemplateBatchFiles/files.lst
=====================================
@@ -92,7 +92,6 @@
"BST","Use the improved branch-site REL method of Yang and Nielsen (2005) to look for episodic selection in sequences.","YangNielsenBranchSite2005.bf";
"BT","Test whether a branch (or branches) in the tree evolves under different dN and dS than the rest of the tree.","TestBranchDNDS.bf";
"NY","Test for positive selection using the approach of Nielsen and Yabg, by sampling global dN/dS from an array of distributions, and using Bayesian posterior to identify the sites with dN/dS>1.","NielsenYang.bf";
-"PSM","Test for positive selection using the approach of Nielsen and Yang, by sampling global dN/dS from an array of distributions, and using Bayesian posterior to identify the sites with dN/dS>1. Multiple subsets of one data set with shared dN/dS.","MFPositiveSelection.bf";
"","Test for recombination.","!Recombination";
"GARD","[GARD] Screen an alignment using GARD (requires an MPI environment).","GARD.bf";
=====================================
res/TemplateBatchFiles/libv3/all-terms.bf
=====================================
@@ -397,6 +397,8 @@ namespace terms{
data = "data";
residue_properties = "residue_properties";
argument_collector = "prompt_and_define";
+
+ nonsyn_multipliers = "non-synonymous multipliers";
}
=====================================
res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
=====================================
@@ -14,6 +14,8 @@ LoadFunctionLibrary("../../convenience/math.bf");
*/
+models.codon.BS_REL.rate_term = "alpha";
+
/**
* @name models.codon.BS_REL.ModelDescription
* @param {String} type
@@ -45,7 +47,7 @@ lfunction models.codon.BS_REL.ModelDescription(type, code, components) {
utility.getGlobalValue("terms.model.constrain_branch_length"): "models.codon.BS_REL.constrain_branch_length",
utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.corrected.CF3x4",
utility.getGlobalValue("terms.model.q_ij"): "", // set for individual components in models.codon.BS_REL._DefineQ
- utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time",
+ utility.getGlobalValue("terms.model.time"): "models.BS_REL.Time",
utility.getGlobalValue("terms.model.defineQ"): "models.codon.BS_REL._DefineQ",
utility.getGlobalValue("terms.model.post_definition"): "models.codon.BS_REL.post_definition"
};
@@ -113,7 +115,7 @@ lfunction models.codon.BS_REL_Per_Branch_Mixing._DefineQ(bs_rel, namespace) {
ExecuteCommands ("
function rate_generator (fromChar, toChar, namespace, model_type, model) {
return models.codon.MG_REV._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
- 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ ^'models.codon.BS_REL.rate_term', utility.getGlobalValue('terms.parameters.synonymous_rate'),
'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component));
}"
@@ -140,6 +142,8 @@ lfunction models.codon.BS_REL_Per_Branch_Mixing._DefineQ(bs_rel, namespace) {
*/
lfunction models.codon.BS_REL.ExtractMixtureDistribution (bs_rel) {
+
+
count = bs_rel [utility.getGlobalValue ("terms.model.components")];
if (Type (count) == "Matrix") {
@@ -149,14 +153,23 @@ lfunction models.codon.BS_REL.ExtractMixtureDistribution (bs_rel) {
rates = {count, 1};
weights = {count-1, 1};
- for (i = 1; i <= count; i+=1) {
- rates [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.global")])[terms.AddCategory (utility.getGlobalValue ("terms.parameters.omega_ratio"), i)];
- if (i < count ) {
- weights [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.global")])[terms.AddCategory (utility.getGlobalValue ("terms.mixture.mixture_aux_weight"), i )];
+ if (bs_rel[^"terms.model.type"] == ^"terms.local") {
+ for (i = 1; i <= count; i+=1) {
+ rates [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")])[terms.AddCategory (utility.getGlobalValue ("terms.parameters.nonsynonymous_rate"), i)];
+ if (i < count ) {
+ weights [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")])[terms.AddCategory (utility.getGlobalValue ("terms.mixture.mixture_aux_weight"), i )];
+ }
+ }
+ return {"rates" : rates, "weights" : weights, ^"terms.parameters.synonymous_rate" : ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.parameters.synonymous_rate")]};
+ } else {
+ for (i = 1; i <= count; i+=1) {
+ rates [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.global")])[terms.AddCategory (utility.getGlobalValue ("terms.parameters.omega_ratio"), i)];
+ if (i < count ) {
+ weights [i-1] = ((bs_rel[utility.getGlobalValue ("terms.parameters")])[utility.getGlobalValue ("terms.global")])[terms.AddCategory (utility.getGlobalValue ("terms.mixture.mixture_aux_weight"), i )];
+ }
}
}
-
return {"rates" : rates, "weights" : weights };
}
@@ -239,7 +252,7 @@ lfunction models.codon.BS_REL._DefineQ(bs_rel, namespace) {
ExecuteCommands ("
function rate_generator (fromChar, toChar, namespace, model_type, model) {
return models.codon.MG_REV._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
- 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ ^'models.codon.BS_REL.rate_term', utility.getGlobalValue('terms.parameters.synonymous_rate'),
'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component));
}"
@@ -397,7 +410,7 @@ function models.codon.BS_REL.post_definition(model) {
*/
function models.codon.BS_REL.get_branch_length(model, tree, node) {
-
+
parameters.SetLocalModelParameters (model, tree, node);
parameters.SetCategoryVariables (model);
bl = utility.GetEnvVariable ("BRANCH_LENGTH_STENCIL");
@@ -431,7 +444,7 @@ lfunction models.codon.BS_REL._DefineQ.MH (bs_rel, namespace) {
rg = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
- 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ ^'models.codon.BS_REL.rate_term', utility.getGlobalValue('terms.parameters.synonymous_rate'),
'beta_' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component__),
'omega' + component, terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component__),
'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
@@ -482,7 +495,7 @@ lfunction models.codon.BS_REL_SRV._DefineQ.MH (bs_rel, namespace) {
rg = "function rate_generator (fromChar, toChar, namespace, model_type, model) {
return models.codon.MG_REV_TRIP._GenerateRate_generic (fromChar, toChar, namespace, model_type, model[utility.getGlobalValue('terms.translation_table')],
- 'alpha', utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ ^'models.codon.BS_REL.rate_term', utility.getGlobalValue('terms.parameters.synonymous_rate'),
'beta_`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.nonsynonymous_rate'), component),
'omega`component`', terms.AddCategory (utility.getGlobalValue('terms.parameters.omega_ratio'), component),
'delta', utility.getGlobalValue('terms.parameters.multiple_hit_rate'),
@@ -561,3 +574,10 @@ lfunction models.codon.BS_REL_SRV._DefineQ.MH (bs_rel, namespace) {
return bs_rel;
}
+
+function models.BS_REL.Time (options) {
+ if (options == terms.global) {
+ return models.DNA.generic.Time (options);
+ }
+ return models.codon.BS_REL.rate_term;
+}
=====================================
res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf
=====================================
@@ -148,7 +148,6 @@ function models.codon.MG_REV.set_branch_length(model, value, parameter) {
if ( null != models.codon.MG_REV.set_branch_length.beta && null != models.codon.MG_REV.set_branch_length.alpha) {
-
models.codon.MG_REV.set_branch_length.alpha.p = parameter + "." + models.codon.MG_REV.set_branch_length.alpha;
models.codon.MG_REV.set_branch_length.beta.p = parameter + "." + models.codon.MG_REV.set_branch_length.beta;
@@ -209,8 +208,7 @@ function models.codon.MG_REV.set_branch_length(model, value, parameter) {
parameters.RemoveConstraint(models.codon.MG_REV.set_branch_length.beta);
Eval ("`models.codon.MG_REV.set_branch_length.beta.p` =" + Eval(models.codon.MG_REV.set_branch_length.beta));
} else { // beta IS constrained
- //parameters.SetConstraint (models.codon.MG_REV.set_branch_length.beta, parameters.GetConstraint (models.codon.MG_REV.set_branch_length.alpha.p),"");
- //parameters.SetConstraint (models.codon.MG_REV.set_branch_length.alpha, models.codon.MG_REV.set_branch_length.alpha.p,"");
+
/** the branch length expression is going to be in terms of ** template ** parameters, but constraints will be in
terms of instantiated parameters, so the expression to solve for needs to be temporarily bound to the global variable **/
@@ -218,7 +216,11 @@ function models.codon.MG_REV.set_branch_length(model, value, parameter) {
parameters.SetConstraint ( models.codon.MG_REV.set_branch_length.alpha.p, models.codon.MG_REV.set_branch_length.alpha, "");
parameters.SetConstraint ( models.codon.MG_REV.set_branch_length.beta, models.codon.MG_REV.set_branch_length.beta.p, "");
+
+
ExecuteCommands("FindRoot (models.codon.MG_REV.set_branch_length.lp,(" + model[terms.model.branch_length_string] + ")-" + 3*value + "," + models.codon.MG_REV.set_branch_length.alpha + ",0,10000)");
+
+
Eval("`models.codon.MG_REV.set_branch_length.alpha.p` =" + models.codon.MG_REV.set_branch_length.lp);
parameters.RemoveConstraint ( models.codon.MG_REV.set_branch_length.beta);
parameters.RemoveConstraint ( models.codon.MG_REV.set_branch_length.alpha.p);
=====================================
res/TemplateBatchFiles/libv3/models/codon/MG_REV_PROPERTIES.bf
=====================================
@@ -379,7 +379,6 @@ lfunction models.codon.MG_REV_PROP.ModelDescription(type, code, properties) {
mg_base[utility.getGlobalValue("terms.model.residue_name_map")] = parameters.ValidateIDs (utility.Keys (mg_base[utility.getGlobalValue("terms.model.residue_properties")]));
mg_base[utility.getGlobalValue("terms.model.post_definition")] = "models.codon.MG_REV_PROP.post_definition";
mg_base[utility.getGlobalValue("terms.model.set_branch_length")] = "models.codon.MG_REV_PROP.set_branch_length";
-
return mg_base;
}
@@ -388,6 +387,7 @@ lfunction models.codon.MG_REV_PROP.ModelDescription(type, code, properties) {
* @param {String or AssociativeList} properties
* @return {AssociativeList}
* Convert a set of amino-acid properties into the expected format
+ * This will also recenter the properties (mean 0, variance of 1)
*/
@@ -428,7 +428,25 @@ lfunction models.codon.MG_REV_PROP._munge_properties (properties) {
}
}
}
-
+
+
+ for (key, value; in; properties) {
+ sum = 0;
+ sum2 = 0;
+
+ for (aa, prop; in; value) {
+ sum += prop;
+ sum2 += prop*prop;
+ }
+
+ sum = sum / 20;
+ norm = Sqrt (sum2 / 20 - sum*sum);
+
+ for (aa, prop; in; value) {
+ value [aa] = (prop-sum) / norm;
+ }
+ }
+
assert (utility.Array1D(properties) > 0, "At least one valid property set must be defined");
return properties;
}
@@ -480,7 +498,6 @@ lfunction models.codon.MG_REV_PROP._GenerateRate_generic (fromChar, toChar, name
nuc_rate = parameters.AppendMultiplicativeTerm (nuc_rate, nuc_p);
}
-
rate_entry = nuc_rate;
if (_tt[fromChar] != _tt[toChar]) {
@@ -514,6 +531,7 @@ lfunction models.codon.MG_REV_PROP._GenerateRate_generic (fromChar, toChar, name
rate_entry += "*Exp(-(" + Join("+",aa_rate) + "))";
(_GenerateRate.p[model_type])[alpha_term] = alpha;
rate_entry = "Min(10000," + rate_entry + "*" + beta + ")";
+
}
} else {
if (model_type == utility.getGlobalValue("terms.local")) {
@@ -544,7 +562,7 @@ lfunction models.codon.MG_REV_PROP.post_definition (model) {
}
lfunction models.codon.MG_REV_PROP.set_branch_length(model, value, parameter) {
-
+
if (utility.Has (model, ^"terms.model.MG_REV_PROP.mean_prop", "Number")) {
properties = model.GetLocalParameters_RegExp(model, terms.propertyImportance (""));
for (tag, id; in; properties) {
=====================================
res/TemplateBatchFiles/libv3/models/codon/MSS.bf
=====================================
@@ -29,7 +29,7 @@ lfunction model.codon.MSS.prompt_and_define (type, code) {
{"SynREV", "Each set of codons mapping to the same amino-acid class have a separate substitution rate (Valine == neutral)"}
{"SynREV2", "Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (no rate scaling))"}
{"SynREV2g", "Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate."}
- {"SynREVCodon", "Each set of codons mapping to the same amino-acid class have a separate substitution rate (fully estimated)"}
+ {"SynREVCodon", "Each codon pair that is exchangeable gets its own substitution rate (fully estimated)"}
{"Random", "Random partition (specify how many classes; largest class = neutral)"}
{"File", "Load a TSV partition from file (prompted for neutral class)"}
},
@@ -68,11 +68,13 @@ lfunction model.codon.MSS.prompt_and_define (type, code) {
}
if (partitioning_option == "SynREV" || partitioning_option == "SynREVCodon" ) {
- bins = {};
- mapping = {};
+ bins = {};
+ mapping = {};
+ mapping_codon = {};
tt = gc [^"terms.translation_table"];
for (codon; in; gc [^"terms.sense_codons"]) {
mapping[codon] = tt[codon];
+ mapping_codon [codon] = codon;
bins[mapping[codon]] += 1;
}
if (partitioning_option == "SynREV") {
@@ -81,7 +83,7 @@ lfunction model.codon.MSS.prompt_and_define (type, code) {
);
}
return models.codon.MSS.ModelDescription(type, code,
- {^"terms.model.MSS.codon_classes" : mapping, ^"terms.model.MSS.neutral" : ""}
+ {^"terms.model.MSS.codon_classes" : mapping_codon, ^"terms.model.MSS.neutral" : "", ^"terms.model.MSS.normalize" : FALSE}
);
}
@@ -176,6 +178,7 @@ lfunction model.codon.MSS.prompt_and_define (type, code) {
//----------------------------------------------------------------------------------------------------------------
lfunction models.codon.MSS.ModelDescription(type, code, codon_classes) {
+
m = Call ("models.codon.MG_REV.ModelDescription", type, code);
if (^"fitter.frequency_type" == "F3x4") {
@@ -268,6 +271,14 @@ lfunction models.codon.MSS._GenerateRate (fromChar, toChar, namespace, model_typ
class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar];
class_to = (model[^"terms.model.MSS.codon_classes"])[toChar];
+
+
+ if ((Abs (class_from) && Abs (class_to)) == FALSE) {
+ class_from = (model[^"terms.model.MSS.codon_classes"])[fromChar+toChar];
+ class_to = class_from;
+ }
+
+ assert (Abs (class_from) && Abs (class_to), "The neutral class for `fromChar` to `toChar` is not specified in the model definition");
if (class_from == class_to) {
if (class_from == nr) {
@@ -300,21 +311,26 @@ lfunction models.codon.MSS._GenerateRate (fromChar, toChar, namespace, model_typ
codon_rate = parameters.ApplyNameSpace(between_rate, namespace);
}
(_GenerateRate.p[model_type])[^"terms.model.MSS.between"] = codon_rate;
-
+
} else {
- if (model_type == utility.getGlobalValue("terms.local")) {
- codon_rate = alpha + "_" + class_from + "_" + class_to;
+ if (class_from + class_to == nr) {
+ //console.log ("NEUTRAL");
+ codon_rate = 1;
} else {
- codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from + "_" + class_to, namespace);
+ if (model_type == utility.getGlobalValue("terms.local")) {
+ codon_rate = alpha + "_" + class_from + "_" + class_to;
+ } else {
+ codon_rate = parameters.ApplyNameSpace(alpha + "_" + class_from + "_" + class_to, namespace);
+ }
+ (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_between" + class_from + " and " + class_to] = codon_rate;
}
- (_GenerateRate.p[model_type])[alpha_term + ^"terms.model.MSS.syn_rate_between" + class_from + " and " + class_to] = codon_rate;
}
rate_entry += "*" + codon_rate;
}
}
_GenerateRate.p[utility.getGlobalValue("terms.model.rate_entry")] = rate_entry;
- }
+ }
return _GenerateRate.p;
}
=====================================
res/TemplateBatchFiles/libv3/models/model_functions.bf
=====================================
@@ -402,11 +402,15 @@ function models.generic.SetBranchLength (model, value, parameter) {
if (Abs((model[terms.parameters])[terms.local]) >= 1) {
+
if (Type (model [terms.model.branch_length_string]) == "String") {
models.generic.SetBranchLength.expression = model [terms.model.branch_length_string];
if (Abs((model[terms.parameters])[terms.local]) > 1) {
+
models.generic.SetBranchLength.bl = Call (model [terms.model.time], model[terms.model.type]);
+
+
models.generic.SetBranchLength.bl.p = parameter + "." + models.generic.SetBranchLength.bl;
if (parameters.IsIndependent (models.generic.SetBranchLength.bl.p) == FALSE) {
models.generic.SetBranchLength.bl = utility.First (utility.UniqueValues ((model[terms.parameters])[terms.local]), "_name_",
@@ -418,14 +422,27 @@ function models.generic.SetBranchLength (model, value, parameter) {
}
models.generic.SetBranchLength.bl.p = parameter + "." + models.generic.SetBranchLength.bl;
}
-
+
+
models.generic.SetBranchLength.substitution = {};
- utility.ForEach (utility.UniqueValues ((model[terms.parameters])[terms.local]), "_name_",
- 'if (_name_ != models.generic.SetBranchLength.bl) {
- models.generic.SetBranchLength.substitution [_name_] = Eval (parameter + "." + _name_);
- }');
-
+
+
+ for (_name_; in; utility.UniqueValues ((model[terms.parameters])[terms.local])) {
+
+ if (_name_ != models.generic.SetBranchLength.bl) {
+ if (parameters.IsIndependent (parameter + "." + _name_)) {
+ models.generic.SetBranchLength.substitution [_name_] = Eval (parameter + "." + _name_);
+ } else {
+ models.generic.SetBranchLength.substitution [_name_] = parameter + "." + _name_;
+ }
+ } else {
+ models.generic.SetBranchLength.substitution [_name_] = parameter + "." + _name_;
+ }
+ }
+
+ models.generic.SetBranchLength.bl = parameter + "." + models.generic.SetBranchLength.bl;
models.generic.SetBranchLength.expression = Simplify (models.generic.SetBranchLength.expression, models.generic.SetBranchLength.substitution);
+
} else {
models.generic.SetBranchLength.bl = (Columns ((model[terms.parameters])[terms.local]))[0];
models.generic.SetBranchLength.bl.p = parameter + "." + models.generic.SetBranchLength.bl;
@@ -440,7 +457,7 @@ function models.generic.SetBranchLength (model, value, parameter) {
messages.log ("models.generic.SetBranchLength: " + parameter + "." + models.generic.SetBranchLength.bl + ":=(" + value[terms.model.branch_length_scaler] + ")*" + models.generic.SetBranchLength.t);
} else {
- Eval (parameter + "." + models.generic.SetBranchLength.bl + ":=(" + value[terms.model.branch_length_scaler] + ")*" + value[terms.branch_length]);
+ Eval (parameter + "." + models.generic.SetBranchLength.bl + ":=(" + value[terms.model.branch_length_scaler] + ")*" + value[terms.branch_length]);
messages.log ("models.generic.SetBranchLength: " + parameter + "." + models.generic.SetBranchLength.bl + ":=(" + value[terms.model.branch_length_scaler] + ")*" + models.generic.SetBranchLength.t);
}
=====================================
res/TemplateBatchFiles/libv3/models/parameters.bf
=====================================
@@ -634,26 +634,50 @@ function parameters.helper.copy_definitions(target, source) {
}
/**
- * @name pparameters.SetStickBreakingDistribution
+ * @name pparameters.SetStickBreakingDistributionPrefix
* @param {AssociativeList} parameters
* @param {Matrix} values
+ * #param {String|} prefix to use for variable names
* @returns nothing
*/
-lfunction parameters.SetStickBreakingDistribution (parameters, values) {
+lfunction parameters.SetStickBreakingDistributionPrefix (parameters, values, prefix) {
rate_count = Rows (values);
left_over = 1;
-
- for (i = 0; i < rate_count; i += 1) {
- parameters.SetValue ((parameters["rates"])[i], values[i][0]);
- if (i < rate_count - 1) {
- break_here = values[i][1] / left_over;
- parameters.SetValue ((parameters["weights"])[i], break_here);
- left_over = left_over * (1-break_here);
- }
+
+ if (utility.Has (parameters, ^"terms.parameters.synonymous_rate", "String")) {
+ alpha = parameters [^"terms.parameters.synonymous_rate"];
+ for (i = 0; i < rate_count; i += 1) {
+ parameters.SetConstraint (prefix + (parameters["rates"])[i], parameters.AppendMultiplicativeTerm (prefix + alpha, values[i][0]), "");
+ if (i < rate_count - 1) {
+ break_here = values[i][1] / left_over;
+ parameters.SetValue (prefix + (parameters["weights"])[i], break_here);
+ left_over = left_over * (1-break_here);
+ }
+ }
+ } else {
+ for (i = 0; i < rate_count; i += 1) {
+ parameters.SetValue (prefix + (parameters["rates"])[i], values[i][0]);
+ if (i < rate_count - 1) {
+ break_here = values[i][1] / left_over;
+ parameters.SetValue (prefix + (parameters["weights"])[i], break_here);
+ left_over = left_over * (1-break_here);
+ }
+ }
}
}
+/**
+ * @name pparameters.SetStickBreakingDistribution
+ * @param {AssociativeList} parameters
+ * @param {Matrix} values
+ * @returns nothing
+ */
+
+lfunction parameters.SetStickBreakingDistribution (parameters, values) {
+ parameters.SetStickBreakingDistributionPrefix (parameters, values, "");
+}
+
/**
* @name pparameters.GetStickBreakingDistribution
* @param {AssociativeList} parameters
=====================================
res/TemplateBatchFiles/libv3/tasks/ancestral.bf
=====================================
@@ -539,7 +539,6 @@ lfunction ancestral.ComputeSubstitutionBySite (ancestral_data, site, branch_filt
own_state = (ancestral_data["MATRIX"])[self][site];
parent_state = (ancestral_data["MATRIX"])[parent][site];
-
if ((own_state != parent_state) && (own_state != -1) && (parent_state != -1)) {
own_state = (ancestral_data["MAPPING"])[own_state];
parent_state = (ancestral_data["MAPPING"])[parent_state];
@@ -557,6 +556,57 @@ lfunction ancestral.ComputeSubstitutionBySite (ancestral_data, site, branch_filt
}
+/*******************************************
+ **
+ * @name ancestral.ComputeCompressedSubstitutionsBySite
+ * computes the minimal representation of character states needed to recover the substitution
+ * history
+
+ * @param {Dictionary} ancestral_data - the dictionary returned by ancestral.build
+ * @param {Number} site - the 0-based index of a site
+
+
+ * @returns
+ {
+ "root" : "state",
+ "branch1" : "state",
+ "branch2" : "state"
+ ....
+ }
+
+ character state at the root and at every branch that is different from
+ its parent
+
+ */
+
+/*******************************************/
+
+
+lfunction ancestral.ComputeCompressedSubstitutionsBySite (ancestral_data, site) {
+
+ result = {};
+ branches = (ancestral_data["DIMENSIONS"])["BRANCHES"];
+
+ for (b = 1; b <= branches; b += 1) {
+ self = b;
+ parent = ((ancestral_data["TREE_AVL"])[b])["Parent"];
+ own_state = (ancestral_data["MATRIX"])[self-1][site];
+ if (parent > 0) {
+ parent_state = (ancestral_data["MATRIX"])[parent-1][site];
+
+ if ((own_state != parent_state)) {
+ result [(((ancestral_data["TREE_AVL"])[b]))["Name"]] = (ancestral_data["MAPPING"])[own_state];
+ }
+ } else {
+ result ["root"] = (ancestral_data["MAPPING"])[own_state];
+ }
+ }
+
+
+ return result;
+
+}
+
/*******************************************
**
* @name ancestral.ComputeSiteComposition
=====================================
src/core/fstring.cpp
=====================================
@@ -575,14 +575,29 @@ HBLObjectRef _FString::SubstituteAndSimplify(HBLObjectRef arguments, HBLObjectRe
if (variable_reference) {
HBLObjectRef replacement = argument_substitution_map->GetByKey (*variable_reference->GetName());
if (replacement) {
+ if (replacement->ObjectClass () == STRING) {
+ long sub_idx = LocateVarByName (((_FString*)replacement->Compute())->get_str());
+ if (sub_idx >= 0L) {
+ current_term->SetAVariable(sub_idx);
+ continue;
+ }
+ }
current_term->SetAVariable(-1);
current_term->SetNumber ((HBLObjectRef)replacement->makeDynamic());
}
}
}
+ evaluator.SimplifyConstants();
+ _Polynomial* is_poly = (_Polynomial*)evaluator.ConstructPolynomial();
+ if (is_poly) {
+ //StringToConsole("Secondary simplify\n");
+ _Formula pf (is_poly);
+ evaluator.Duplicate(&pf);
+ }
+ } else {
+ evaluator.SimplifyConstants();
}
- evaluator.SimplifyConstants();
return _returnStringOrUseCache(((_String*)evaluator.toStr(kFormulaStringConversionNormal)), cache);
}
}
=====================================
src/core/global_things.cpp
=====================================
@@ -122,7 +122,7 @@ namespace hy_global {
kErrorStringDatasetRefIndexError ("Dataset index reference out of range"),
kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"),
kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"),
- kHyPhyVersion = _String ("2.5.36"),
+ kHyPhyVersion = _String ("2.5.38"),
kNoneToken = "None",
kNullToken = "null",
=====================================
src/core/include/function_templates.h
=====================================
@@ -156,7 +156,7 @@ void ArrayForEach(ARG_TYPE *array, unsigned long dimension,
template <typename ARG_TYPE>
void InitializeArray(ARG_TYPE *array, unsigned long dimension,
ARG_TYPE &&value) {
- #pragma clang loop unroll_count(8)
+ //#pragma clang loop unroll_count(8)
#pragma GCC unroll 4
for (unsigned long i = 0UL; i < dimension; i++) {
array[i] = value;
=====================================
src/core/include/matrix.h
=====================================
@@ -342,7 +342,8 @@ public:
// otherwise returns the largest element
hyFloat AbsValue (void) const;
-
+ hyFloat L11Norm (void) const;
+
template <typename CALLBACK> HBLObjectRef ApplyScalarOperation (CALLBACK && functor, HBLObjectRef cache) const;
// return the matrix of logs of every matrix element
=====================================
src/core/matrix.cpp
=====================================
@@ -3302,6 +3302,20 @@ hyFloat _Matrix::AbsValue (void) const{
return 0.;
}
+//_____________________________________________________________________________________________
+hyFloat _Matrix::L11Norm (void) const{
+ if (is_numeric()) {
+ hyFloat norm = 0.;
+
+ this->ForEach ([&] (hyFloat&& value, unsigned long, long) -> void {norm += fabs(value);},
+ [&] (unsigned long index) -> hyFloat {return theData[index];});
+
+ return norm;
+ }
+
+ return 0.;
+}
+
//_____________________________________________________________________________________________
HBLObjectRef _Matrix::Abs (HBLObjectRef cache)
{
@@ -3410,7 +3424,7 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
#pragma GCC ivdep
#pragma ivdep
for (long idx = 0; idx < upto; idx+=8) {
@@ -3454,7 +3468,7 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
#pragma GCC ivdep
#pragma ivdep
for (long idx = 0; idx < upto; idx+=8) {
@@ -3998,7 +4012,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long k = 0; k < vDim; k+=2) {
float64x2_t D4, B4;
DO_GROUP_OP1 (D4, B4, k);
@@ -4380,7 +4394,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long k = 0; k < dimm4; k+=2) {
float64x2_t D4, B4;
DO_GROUP_OP1 (D4, B4, k);
@@ -5624,6 +5638,7 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
*stash2 = 0;
// = new hyFloat[hDim*(1+vDim)];
+
if (!is_polynomial()) {
stash = (hyFloat*)alloca(sizeof (hyFloat) * hDim*(1+vDim));
if (theIndex) {
@@ -5663,6 +5678,7 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
} else {
power2 = 0;
}
+ //fprintf (stderr, "MAX %18.12g SCALE %d\n", max, power2);
} else {
@@ -5730,6 +5746,8 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
i=2;
+ //hyFloat const errorBound = truncPrecision * 10.;
+
if (is_dense()) { // avoid matrix allocation
_Matrix temp ;
@@ -5752,12 +5770,14 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
temp *= 1.0/i;
(*result) += temp;
i ++;
-/*#ifndef _OPENMP
+/*
+#ifndef _OPENMP
taylor_terms_count++;
#else
#pragma omp atomic
taylor_terms_count++;
-#endif*/
+#endif
+*/
} while (temp.IsMaxElement(tMax*truncPrecision*i));
@@ -5770,19 +5790,35 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
} else {
_Matrix temp (*this);
+
+ /*hyFloat matrixNorm = L11Norm();
+
+ hyFloat errorEstimate1 = matrixNorm * matrixNorm / 2.,
+ errorEstimate ;
+ */
_Matrix tempS (hDim, vDim, false, temp.storageType);
do {
temp.MultbyS (*this,theIndex!=nil, &tempS, stash);
temp *= 1.0/i;
(*result) += temp;
i ++;
- /*#ifndef _OPENMP
+/*
+ #ifndef _OPENMP
taylor_terms_count++;
#else
#pragma omp atomic
taylor_terms_count++;
- #endif*/
+ #endif
+*/
+ /*errorEstimate1 *= matrixNorm / (i+1.);
+ errorEstimate = errorEstimate1 * (1./ ( 1. - matrixNorm/(i + 1.)));
+ if (errorEstimate > 0. && errorEstimate < errorBound) {
+ break;
+ }*/
+
} while (temp.IsMaxElement(tMax*truncPrecision*i));
+ //printf ("%ld %g %20.16g %ld\n",i,matrixNorm, errorEstimate,temp.IsMaxElement(tMax*truncPrecision*i));
+
}
// use Pade (4,4) here
@@ -5822,7 +5858,7 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long c = from; c < compressedIndex[r]; c++, i++) {
theIndex[i] = compressedIndex[c+hDim] * vDim + r;
}
@@ -5884,6 +5920,18 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
if (check_transition) {
+ /*printf ("SCALE %lg : \n", scale_to);
+
+ for (unsigned long r = 0L; r < hDim; r ++) {
+ hyFloat sum = 0.;
+ //printf ("%ld %18.16lg %18.16lg\n", r, (*result)(r,r), (*result)(r,r) - 1.);
+ for (unsigned long c = 0L; c < vDim; c++) {
+ sum += (*result)(r,c);
+ }
+ if (sum != 1.)
+ printf ("%ld %18.16g\n", r, sum);
+ }*/
+
bool pass = true;
if (result->is_dense()) {
for (unsigned long r = 0L; r < result->lDim; r += result->vDim) {
@@ -5906,22 +5954,22 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
return this->Exponentiate(scale_to * 100, true);
}
- /*printf ("SCALE %lg : \n", scale_to);
-
- for (unsigned long r = 0L; r < hDim; r ++) {
- hyFloat sum = 0.;
- printf ("%ld %18.16lg %18.16lg\n", r, (*result)(r,r), (*result)(r,r) - 1.);
- for (unsigned long c = 0L; c < vDim; c++) {
- sum += (*this)(r,c);
- }
- printf ("%ld %g\n", r, sum);
- }
- ObjectToConsole(this);
- ObjectToConsole(result);*/
+ //ObjectToConsole(this);
+ //ObjectToConsole(result);
throw _String ("Failed to compute a valid transition matrix; this is usually caused by ill-conditioned rate matrices (e.g. very large rate values)");
+ } else { // set diag to 1 - rest of row
+ if (result->is_dense()) {
+ for (unsigned long r = 0L; r < result->hDim; r ++) {
+ hyFloat sum = 0.;
+ for (unsigned long c = result->hDim * r; c < result->hDim * r + result->hDim; c ++) {
+ sum += result->theData[c];
+ }
+ result->theData [r*result->vDim + r] += 1. - sum;
+ }
+ }
}
}
@@ -6988,7 +7036,7 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
#pragma GCC ivdep
#pragma ivdep
for (long k = 0; k < loopBound; k+=4) {
=====================================
src/core/tree_evaluator.cpp
=====================================
@@ -127,7 +127,7 @@ inline double _sse_sum_2 (__m128d const & x) {
template<long D> inline void __ll_handle_matrix_transpose (hyFloat const * __restrict transitionMatrix, hyFloat * __restrict tMatrixT) {
long i = 0L;
for (long r = 0L; r < D; r++) {
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long c = 0L; c < D; c++, i++) {
tMatrixT[c*D+r] = transitionMatrix[i];
@@ -154,7 +154,7 @@ template<long D> inline bool __ll_handle_conditional_array_initialization ( long
}
}*/
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++) {
parentConditionals[k] *= tMatrix[siteState+D*k];
@@ -172,7 +172,7 @@ template<long D> inline bool __ll_handle_conditional_array_initialization ( long
} else {
if (tcc) {
if (__builtin_expect((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom,0)) {
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++) {
childVector[k] = lastUpdatedSite[k];
@@ -200,7 +200,7 @@ inline bool __ll_handle_conditional_array_initialization_generic ( long * __rest
}
if (__builtin_expect(siteState >= 0L,1)) {
// a single character state; sweep down the appropriate column
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++) {
parentConditionals[k] *= tMatrix[siteState+D*k];
@@ -212,7 +212,7 @@ inline bool __ll_handle_conditional_array_initialization_generic ( long * __rest
} else {
if (tcc) {
if (__builtin_expect((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom,0)) {
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++) {
childVector[k] = lastUpdatedSite[k];
@@ -542,7 +542,7 @@ template<long D> inline void __ll_product_sum_loop (hyFloat const* _hprestrict_
#pragma GCC unroll 8
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long c = 0; c < D; c++)
accumulator += tMatrix[c] * childVector[c];
@@ -558,7 +558,7 @@ inline void __ll_product_sum_loop_generic (hyFloat const* _hprestrict_ tMatrix,
#pragma GCC unroll 8
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long c = 0; c < D; c++)
accumulator += tMatrix[c] * childVector[c];
@@ -595,7 +595,7 @@ template<long D, bool ADJUST> inline void __ll_loop_handle_scaling (hyFloat& sum
fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler);
}*/
if (didScale) {
- #pragma unroll(4)
+ // #pragma unroll(4)
#pragma GCC unroll 4
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
@@ -626,7 +626,7 @@ template<long D, bool ADJUST> inline void __ll_loop_handle_scaling (hyFloat& sum
}*/
if (didScale) {
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
@@ -657,7 +657,7 @@ template<bool ADJUST> inline void __ll_loop_handle_scaling_generic (hyFloat& sum
hyFloat scaler = _computeBoostScaler(scalingAdjustments [parentCode*siteCount + siteID] * _lfScalerUpwards, sum, didScale);
if (didScale) {
- #pragma unroll(8)
+ //#pragma unroll(8)
#pragma GCC unroll 8
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
@@ -679,7 +679,7 @@ template<bool ADJUST> inline void __ll_loop_handle_scaling_generic (hyFloat& sum
hyFloat scaler = _computeReductionScaler (scalingAdjustments [parentCode*siteCount + siteID] * _lfScalingFactorThreshold, sum, didScale);
if (didScale) {
- #pragma unroll(8)
+ //#pragma unroll(8)
#pragma GCC unroll 8
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
@@ -707,7 +707,7 @@ template<long D> inline void __ll_loop_handle_leaf_case (hyFloat* _hprestrict_ p
} else {
for (long k = siteFrom; k < siteTo; k++, pp += D) {
hyFloat lsf = localScalingFactor[k];
-#pragma unroll(4)
+//#pragma unroll(4)
#pragma GCC unroll 4
for (long s = 0; s < D; s++) {
pp[s] = lsf;
@@ -1585,7 +1585,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
accumulator = rootConditionals[rootIndex + rootState] * theProbs[rootState];
rootIndex += alphabetDimension;
} else {
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long p = 0; p < alphabetDimension; p++,rootIndex++) {
accumulator += rootConditionals[rootIndex] * theProbs[p];
@@ -1644,7 +1644,7 @@ template<long D> inline bool __lcache_loop_preface (bool isLeaf, long* __restric
long siteState = lNodeFlags[nodeCode*siteCount + siteOrdering.list_data[siteID]] ;
if (siteState >= 0L) {
unsigned long target_index = siteState;
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++, target_index+=D) {
parentConditionals[k] *= tMatrix[target_index];
@@ -1659,7 +1659,7 @@ template<long D> inline bool __lcache_loop_preface (bool isLeaf, long* __restric
if ((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom)
// the value of this conditional vector needs to be copied from a previously stored site
// subtree duplication
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0UL; k < D; k++) {
childVector[k] = lastUpdatedSite[k];
@@ -1688,7 +1688,7 @@ inline bool __lcache_loop_preface_generic (bool isLeaf, long* __restrict lNodeFl
long siteState = lNodeFlags[nodeCode*siteCount + siteOrdering.list_data[siteID]] ;
if (siteState >= 0L) {
unsigned long target_index = siteState;
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0L; k < D; k++, target_index+=D) {
parentConditionals[k] *= tMatrix[target_index];
@@ -1704,7 +1704,7 @@ inline bool __lcache_loop_preface_generic (bool isLeaf, long* __restrict lNodeFl
if ((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom)
// the value of this conditional vector needs to be copied from a previously stored site
// subtree duplication
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (long k = 0UL; k < D; k++) {
childVector[k] = lastUpdatedSite[k];
@@ -1966,7 +1966,7 @@ void _TheTree::ComputeBranchCache (
unsigned long k3 = 0UL;
for (unsigned long k = siteFrom; k < siteTo; k++) {
hyFloat scaler = localScalingFactor[k];
- #pragma unroll(4)
+ //#pragma unroll(4)
#pragma GCC unroll 4
for (unsigned long k2 = 0UL; k2 < alphabetDimension; k2++, k3++) {
parentConditionals [k3] = scaler;
@@ -2474,7 +2474,7 @@ void _TheTree::ComputeBranchCache (
#pragma GCC unroll 8
#pragma clang loop vectorize(enable)
#pragma clang loop interleave(enable)
- #pragma clang loop unroll(enable)
+ //#pragma clang loop unroll(enable)
for (long k = 0; k < alphabetDimension; k++) {
sum += parentConditionals[k];
}
=====================================
src/mains/unix.cpp
=====================================
@@ -981,6 +981,8 @@ int main (int argc, char* argv[]) {
ex.ClearExecutionList();
+ //printf ("\nTAYLOR SERIES TERMS %ld\n", taylor_terms_count);
+
GlobalShutdown ();
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/2147996cdad34ba5032b2231731c0bd18d606fb7
--
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/2147996cdad34ba5032b2231731c0bd18d606fb7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220502/0f3794a0/attachment-0001.htm>
More information about the debian-med-commit
mailing list