[med-svn] [Git][med-team/hyphy][upstream] New upstream version 2.5.21+dfsg
Nilesh Patra
gitlab at salsa.debian.org
Sat Oct 31 14:03:52 GMT 2020
Nilesh Patra pushed to branch upstream at Debian Med / hyphy
Commits:
17ba9006 by Nilesh Patra at 2020-10-31T19:16:20+05:30
New upstream version 2.5.21+dfsg
- - - - -
27 changed files:
- CMakeLists.txt
- res/TemplateBatchFiles/ConvertDataFile.bf
- res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
- res/TemplateBatchFiles/SelectionAnalyses/FEL.bf
- + res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf
- res/TemplateBatchFiles/SelectionAnalyses/MEME.bf
- − res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf
- res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
- res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf
- res/TemplateBatchFiles/libv3/all-terms.bf
- res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
- res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf
- res/TemplateBatchFiles/libv3/models/model_functions.bf
- res/TemplateBatchFiles/libv3/models/parameters.bf
- res/TemplateBatchFiles/libv3/models/rate_variation.bf
- res/TemplateBatchFiles/libv3/tasks/estimators.bf
- res/TemplateBatchFiles/libv3/tasks/trees.bf
- src/core/dataset_filter.cpp
- src/core/global_things.cpp
- src/core/include/variable.h
- src/core/likefunc.cpp
- src/core/matrix.cpp
- src/core/topology.cpp
- src/core/tree_evaluator.cpp
- src/core/variable.cpp
- src/core/variablecontainer.cpp
- + tests/hbltests/libv3/FMM.wbf
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -588,6 +588,7 @@ add_test (BGM HYPHYMP tests/hbltests/libv3/BGM.wbf)
add_test (CONTRAST-FEL HYPHYMP tests/hbltests/libv3/CFEL.wbf)
add_test (GARD HYPHYMP tests/hbltests/libv3/GARD.wbf)
add_test (FADE HYPHYMP tests/hbltests/libv3/FADE.wbf)
-add_test (NAME ABSREL COMMAND HYPHYMP tests/hbltests/libv3/ABSREL.wbf)
+add_test (ABSREL HYPHYMP tests/hbltests/libv3/ABSREL.wbf)
+add_test (FMM HYPHYMP tests/hbltests/libv3/FMM.wbf)
#add_test (NAME ABSREL-MH COMMAND HYPHYMP tests/hbltests/libv3/ABSREL-MH.wbf)
=====================================
res/TemplateBatchFiles/ConvertDataFile.bf
=====================================
@@ -32,7 +32,8 @@ ChoiceList (DATA_FILE_PRINT_FORMAT,"Output Format",1,SKIP_NONE,
/* 8 */ "CSV", "Comma separated characters",
/* 9 */ "FASTA sequential","FASTA Sequential Format.",
/* 10 */ "FASTA interleaved","FASTA Interleaved Format.",
- /* 11 */ "PAML compatible", "PAML Compatible PHYLIP-like format");
+ /* 11 */ "PAML compatible", "PAML Compatible PHYLIP-like format",
+ /* 12 */ "STOCKHOLM", "STOCKHOLM format");
if (DATA_FILE_PRINT_FORMAT<0)
{
=====================================
res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
=====================================
@@ -1,4 +1,4 @@
-RequireVersion ("2.5.12");
+RequireVersion ("2.5.21");
LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4
@@ -214,9 +214,7 @@ busted.model_generator = "models.codon.BS_REL.ModelDescription";
if (busted.do_srv) {
-
if (busted.do_bs_srv) {
- busted.model_generator = "busted.model.with.GDD";
busted.model_generator = "models.codon.BS_REL_SRV.ModelDescription";
busted.rate_class_arguments = {{busted.synonymous_rate_classes__,busted.rate_classes__}};
} else {
@@ -262,15 +260,20 @@ busted.background.bsrel_model = model.generic.DefineMixtureModel(busted.model_g
models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]"));
-/*if (busted.do_srv) {
+if (busted.do_srv) {
if (busted.do_bs_srv) {
- models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, "Mean scaler variable for");
+ for (description,var_id; in; (busted.background.bsrel_model[terms.parameters])[terms.global]) {
+ if (regexp.Find (description, terms.parameters.synonymous_rate + " for category")) {
+ var_id_2 = utility.GetByKey ((busted.test.bsrel_model[terms.parameters])[terms.global], description, "String");
+ if (None != var_id_2) {
+ parameters.SetConstraint (var_id, var_id_2, terms.global);
+ }
+ }
+ }
+
models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), "SRV [0-9]+"));
- } else {
- models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, "GDD rate category");
- models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, utility.getGlobalValue("terms.mixture.mixture_aux_weight") + " for GDD category ");
- }
-}*/
+ }
+}
busted.distribution = models.codon.BS_REL.ExtractMixtureDistribution(busted.test.bsrel_model);
@@ -408,8 +411,8 @@ busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.tree
terms.search_grid : busted.initial_grid,
terms.search_restarts : busted.N.initial_guesses
});
-
-
+
+
busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, {
"retain-lf-object": TRUE,
terms.run_options.optimization_settings :
@@ -426,11 +429,6 @@ io.SpoolLFToPath(busted.full_model[terms.likelihood_function], io.PromptUserForF
io.ReportProgressMessageMD("BUSTED", "main", "* " + selection.io.report_fit (busted.full_model, 9, busted.codon_data_info[terms.data.sample_size]));
-if (busted.do_srv_hmm) {
- busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda);
- io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3));
-
-}
io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the following rate distribution for branch-site combinations was inferred");
@@ -448,9 +446,7 @@ busted.distribution_for_json = {busted.FG : utility.Map (utility.Range (busted.r
terms.json.proportion : busted.inferred_test_distribution [_index_][1]}")
};
-if (busted.do_srv_hmm) {
- busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda;
-}
+
if (busted.has_background) {
io.ReportProgressMessageMD("BUSTED", "main", "* For *background* branches, the following rate distribution for branch-site combinations was inferred");
@@ -464,6 +460,19 @@ if (busted.has_background) {
if (busted.do_srv) {
+ if (busted.do_srv_hmm) {
+ busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda);
+ busted.hmm_lambda.CI = parameters.GetProfileCI(((busted.full_model[terms.global])[terms.rate_variation.hmm_lambda])[terms.id],
+ busted.full_model[terms.likelihood_function], 0.95);
+ io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3));
+
+ io.ReportProgressMessageMD ("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda,8,4) +
+ " (95% profile CI " + Format ((busted.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((busted.hmm_lambda.CI )[terms.upper_bound],8,4) + ")");
+
+ busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda.CI;
+ }
+
+
if (busted.do_bs_srv) {
busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0
} else {
@@ -560,10 +569,16 @@ if (!busted.run_test) {
if (busted.do_srv) {
if (busted.do_bs_srv) {
- busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0
+ busted.srv_info = parameters.GetStickBreakingDistribution ( busted.srv_rate_reporting) % 0;
} else {
busted.srv_info = Transpose((rate_variation.extract_category_information(busted.test.bsrel_model))["VALUEINDEXORDER"][0])%0;
}
+ if (busted.do_srv_hmm) {
+ busted.hmm_lambda = selection.io.extract_global_MLE (busted.full_model, terms.rate_variation.hmm_lambda);
+ io.ReportProgressMessageMD("BUSTED", "main", "* HMM switching rate = " + Format (busted.hmm_lambda, 8, 3));
+ busted.distribution_for_json [terms.rate_variation.hmm_lambda] = busted.hmm_lambda;
+ }
+
io.ReportProgressMessageMD("BUSTED", "main", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred");
selection.io.report_distribution (busted.srv_info);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FEL.bf
=====================================
@@ -162,7 +162,10 @@ io.ReportProgressMessageMD ("fel", "codon-refit", "Improving branch lengths, nuc
fel.final_partitioned_mg_results = estimators.FitMGREV (fel.filter_names, fel.trees, fel.codon_data_info [terms.code], {
terms.run_options.model_type: terms.local,
terms.run_options.partitioned_omega: fel.selected_branches,
- terms.run_options.retain_lf_object: TRUE
+ terms.run_options.retain_lf_object: TRUE,
+ terms.run_options.optimization_settings: {
+ "OPTIMIZATION_METHOD" : "coordinate-wise"
+ }
}, fel.partitioned_mg_results);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/FitMultiModel.bf
=====================================
@@ -0,0 +1,565 @@
+RequireVersion ("2.4.0");
+
+
+LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4
+LoadFunctionLibrary("libv3/UtilityFunctions.bf");
+LoadFunctionLibrary("libv3/IOFunctions.bf");
+LoadFunctionLibrary("libv3/tasks/estimators.bf");
+LoadFunctionLibrary("libv3/tasks/alignments.bf");
+LoadFunctionLibrary("libv3/tasks/ancestral.bf");
+LoadFunctionLibrary("libv3/models/codon.bf");
+LoadFunctionLibrary("libv3/tasks/trees.bf");
+LoadFunctionLibrary("libv3/tasks/genetic_code.bf");
+LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf");
+LoadFunctionLibrary("SelectionAnalyses/modules/selection_lib.ibf");
+LoadFunctionLibrary("libv3/models/codon/MG_REV.bf");
+LoadFunctionLibrary("libv3/models/codon/MG_REV_MH.bf");
+LoadFunctionLibrary("libv3/models/codon/MG_REV_TRIP.bf");
+LoadFunctionLibrary("libv3/convenience/math.bf");
+LoadFunctionLibrary("libv3/models/rate_variation.bf");
+LoadFunctionLibrary("libv3/models/codon/BS_REL.bf");
+
+
+utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE);
+//utility.SetEnvVariable ("VERBOSITY_LEVEL", 10);
+fitter.rate_classes = 3;
+
+KeywordArgument ("code", "Which genetic code should be used", "Universal");
+ /**
+ keyword, description (for inline documentation and help messages), default value
+ */
+KeywordArgument ("alignment", "An in-frame codon alignment in one of the formats supported by HyPhy");
+ /**
+ keyword, description (for inline documentation and help messages), no default value,
+ meaning that it will be required
+ */
+
+KeywordArgument ("tree", "A phylogenetic tree (optionally annotated with {})", null, "Please select a tree file for the data:");
+ /** the use of null as the default argument means that the default expectation is for the
+ argument to be missing, i.e. the tree is expected to be in the file
+ the fourth, optional argument, can match this keyword with the dialog prompt / choice list title,
+ meaning that it can only be consumed when this dialog prompt / choice list is invoked
+ This allows handling some branching logic conditionals
+ */
+
+KeywordArgument ("rates", "The number omega rate classes to include in the model [1-10, default 3, 1 to turn-off rate variation]", 3);
+
+KeywordArgument ("triple-islands", "Use a separate rate parameter for synonymous triple-hit substitutions", "No");
+
+
+fitter.analysis_description = {terms.io.info : "Examine whether or not a codon alignment is better fit by models which permit multiple instantaneous substitutions. v0.2 adds a separate rate for codon-island triple-hit rates",
+ terms.io.version : "1.0",
+ terms.io.authors : "Sergei L Kosakovsky Pond, Sadie Wisotsky and Alexander Lucaci",
+ terms.io.contact : "spond at temple.edu",
+ terms.io.reference: "Submitted; preprint at hyphy.org/resources/fmm.pdf",
+ terms.io.requirements : "in-frame codon alignment and a phylogenetic tree"
+ };
+
+io.DisplayAnalysisBanner (fitter.analysis_description);
+
+
+namespace fitter.terms {
+ MG94 = "Standard MG94";
+ MG94x2 = "MG94 with double instantaneous substitutions";
+ MG94x3 = "MG94 with double and triple instantaneous substitutions";
+ MG94x3xS = "MG94 with double and triple instantaneous substitutions [only synonymous islands]";
+ json.site_logl = "Site Log Likelihood";
+ json.evidence_ratios = "Evidence Ratios";
+ json.site_reports = "Site substitutions";
+}
+
+fitter.json = { terms.json.analysis: fitter.analysis_description,
+ terms.json.input: {},
+ terms.json.fits : {},
+ terms.json.timers : {},
+ fitter.terms.json.site_logl : {},
+ fitter.terms.json.evidence_ratios : {},
+ fitter.terms.json.site_reports : {}
+ };
+
+fitter.display_orders = {terms.original_name: -1,
+ terms.json.nucleotide_gtr: 0,
+ fitter.terms.MG94: 1,
+ fitter.terms.MG94x2: 2,
+ fitter.terms.MG94x3: 3
+ };
+
+
+
+selection.io.startTimer (fitter.json [terms.json.timers], "Overall", 0);
+
+
+namespace fitter {
+ LoadFunctionLibrary ("SelectionAnalyses/modules/shared-load-file.bf");
+ load_file ({utility.getGlobalValue("terms.prefix"): "fitter", utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "selection.io.SelectAllBranches"}});
+}
+
+fitter.rate_classes = io.PromptUser ("The number of omega rate classes to include in the model", 3, 1, 10, TRUE);
+
+fitter.do_islands = io.SelectAnOption ({"Yes" : "Use a separate rate parameter for synonymous triple-hit substitutions (e.g. serine islands)",
+ "No" : "All triple hits have the same rate multiplier"},
+ "Synonymous triple-hit substitutions use a separate rate"
+ ) == "Yes";
+
+
+namespace fitter {
+ doGTR ("fitter");
+}
+
+KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'FITTER.json')", fitter.codon_data_info [terms.json.json]);
+fitter.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");
+
+KeywordArgument ("save-fit", "Write model fit files (HyPhy NEXUS) to this file path with extensions .MODEL_NAME.bf; default is NOT to save, or 'dev/null'", "/dev/null");
+fitter.save_model_path = io.PromptUserForFilePath ("Save model fit files to");
+
+
+estimators.fixSubsetOfEstimates(fitter.gtr_results, fitter.gtr_results[terms.global]);
+
+namespace fitter {
+ scaler_prefix = "fitter.scaler";
+ doPartitionedMG ("fitter", FALSE);
+}
+
+//Export(lf_serialized, ^(fitter.partioned_mg_results));
+//fprintf(PROMPT_FOR_FILE,CLEAR_FILE, lf_serialized, "\n");
+
+//*************** GENERIC FITTER HANDLER *****
+
+function fitter.run_model_fit (model_name, model_generator, initial_values) {
+ io.ReportProgressMessageMD ("fitter", model_name, "Fitting `model_name`");
+ selection.io.startTimer (fitter.json [terms.json.timers], model_name, fitter.display_order [model_name]);
+
+ fitter.results = estimators.FitCodonModel (fitter.filter_names, fitter.trees, model_generator, fitter.codon_data_info [utility.getGlobalValue("terms.code")],
+ {
+ utility.getGlobalValue ("terms.run_options.model_type"): utility.getGlobalValue("terms.global"),
+ utility.getGlobalValue ("terms.run_options.retain_lf_object"): TRUE,
+ utility.getGlobalValue ("terms.run_options.retain_model_object"): TRUE
+ }, initial_values);
+
+
+ if (fitter.save_model_path != "/dev/null") {
+ //^(fitter.results[terms.likelihood_function])
+ io.SpoolLF(fitter.results[terms.likelihood_function], fitter.save_model_path, model_name);
+ }
+
+ fitter.models = fitter.results [terms.model];
+
+ if (^"fitter.rate_classes" > 1) {
+ fitter.cat_info = rate_variation.extract_category_information (fitter.models[fitter.models ["INDEXORDER"][0]]);
+ fitter.cat_info = Transpose (fitter.cat_info[fitter.cat_info["INDEXORDER"][0]])%0;
+ } else {
+ fitter.cat_info = {{1,1}};
+ }
+
+
+ ConstructCategoryMatrix (fitter.run_model_fit.sl, ^(fitter.results[terms.likelihood_function]), SITE_LOG_LIKELIHOODS);
+ (fitter.json [fitter.terms.json.site_logl])[model_name] = fitter.run_model_fit.sl;
+
+ io.ReportProgressMessageMD("fitter", model_name, "* " + selection.io.report_fit (fitter.results, 0, fitter.codon_data_info[terms.data.sample_size]));
+ fitter.global_dnds = selection.io.extract_global_MLE_re (fitter.results, "^(" + terms.parameters.omega_ratio + "|" + terms.parameters.multiple_hit_rate + "|" + terms.parameters.triple_hit_rate + ")");
+
+
+ utility.ForEach (fitter.global_dnds, "_value_", 'io.ReportProgressMessageMD ("fitter", model_name, "* " + _value_[terms.description] + " = " + Format (_value_[terms.fit.MLE],8,4));');
+
+ if (^"fitter.rate_classes" > 1) {
+ io.ReportProgressMessageMD("fitter", model_name, "* The following relative rate distribution (mean 1) for site-to-site **non-synonymous** rate variation was inferred");
+ selection.io.report_distribution (fitter.cat_info);
+ }
+
+ fitter.cat_info = {
+ terms.rate_variation.distribution : fitter.cat_info,
+ terms.parameters: utility.Map (fitter.results[terms.global], "_value_", '_value_ [terms.fit.MLE]')
+ };
+
+ selection.io.json_store_lf (fitter.json,
+ model_name,
+ fitter.results[terms.fit.log_likelihood],
+ fitter.results[terms.parameters],
+ fitter.sample_size,
+ fitter.cat_info,
+ fitter.display_orders[model_name]);
+
+
+ utility.ForEachPair (fitter.filter_specification, "_key_", "_value_",
+ 'selection.io.json_store_branch_attribute(fitter.json, model_name, terms.branch_length, fitter.display_orders[model_name],
+ _key_,
+ selection.io.extract_branch_info((fitter.results[terms.branch_length])[_key_], "selection.io.branch.length"));');
+
+ selection.io.stopTimer (fitter.json [terms.json.timers], model_name);
+ return fitter.results;
+}
+
+//*************** END GENERIC FITTER HANDLER *****
+
+//****** ADDING RATE VARIATION ****
+
+function fitter.modifier_omega (q_ij, from, to, namespace, cat_name) {
+ if (Abs (q_ij[terms.model.rate_entry])) {
+ if (utility.Has (q_ij[terms.global], terms.parameters.omega_ratio, "String")) {
+ q_ij[terms.model.rate_entry] = "(" + q_ij[terms.model.rate_entry] + ")*" + cat_name;
+ //console.log (q_ij);
+ }
+ }
+ return q_ij;
+}
+
+
+lfunction MG_REV.model.with.GDD (type, code) {
+ def = models.codon.MG_REV.ModelDescription (type, code);
+ if (^"fitter.rate_classes" > 1) {
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")});
+ (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega";
+ }
+ return def;
+ }
+
+lfunction MG_REV_MH.model.with.GDD (type, code) {
+ def = models.codon.MG_REV_MH.ModelDescription (type, code);
+ if (^"fitter.rate_classes" > 1) {
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")});
+ (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega";
+ }
+ return def;
+ }
+
+lfunction MG_REV_TRIP.model.with.GDD (type, code) {
+ def = models.codon.MG_REV_TRIP.ModelDescription (type, code);
+ if (^"fitter.rate_classes" > 1) {
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory ({utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("fitter.rate_classes")});
+ (def [utility.getGlobalValue("terms.model.rate_variation")])[utility.getGlobalValue("terms.rate_variation.rate_modifier")] = "fitter.modifier_omega";
+ }
+ return def;
+ }
+
+//*************** END RATE VARIATION *****
+
+
+fitter.one_hit_results = fitter.run_model_fit (fitter.terms.MG94, "MG_REV.model.with.GDD", fitter.partitioned_mg_results);
+
+utility.Extend (fitter.one_hit_results[terms.global],
+ {
+ terms.parameters.multiple_hit_rate : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE}
+
+ });
+
+fitter.two_hit_results = fitter.run_model_fit (fitter.terms.MG94x2, "MG_REV_MH.model.with.GDD", fitter.one_hit_results);
+
+utility.Extend (fitter.two_hit_results[terms.global],
+ {
+ terms.parameters.triple_hit_rate : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE},
+ terms.parameters.triple_hit_rate_syn : { utility.getGlobalValue ("terms.fit.MLE") : 0.05, terms.fix : FALSE}
+
+ });
+
+
+lfunction fitter.MG_REV_TRIP.model.with.GDD (type, code) {
+ model = MG_REV_TRIP.model.with.GDD (type, code);
+ model[utility.getGlobalValue("terms.model.post_definition")] = "fitter.handle_triple_islands";
+ return model;
+}
+
+lfunction fitter.MG_REV_TRIP.model.with.GDD.islands (type, code) {
+ model = MG_REV_TRIP.model.with.GDD (type, code);
+ model[utility.getGlobalValue("terms.model.post_definition")] = "fitter.handle_only_triple_islands";
+ return model;
+}
+
+lfunction fitter.handle_triple_islands (model) {
+ if (utility.getGlobalValue("fitter.do_islands") == FALSE) {
+ parameters.SetConstraint (model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate_syn")),
+ model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate")), "");
+ }
+ return models.generic.post.definition (model);
+}
+
+lfunction fitter.handle_only_triple_islands (model) {
+ parameters.SetConstraint (model.generic.GetGlobalParameter (model, utility.getGlobalValue("terms.parameters.triple_hit_rate")), "0", "");
+ return models.generic.post.definition (model);
+}
+
+fitter.three_hit_results = fitter.run_model_fit (fitter.terms.MG94x3, "fitter.MG_REV_TRIP.model.with.GDD", fitter.one_hit_results);
+
+if (fitter.do_islands) {
+ fitter.three_hit_island_results = fitter.run_model_fit (fitter.terms.MG94x3xS, "fitter.MG_REV_TRIP.model.with.GDD.islands", fitter.three_hit_results);
+}
+//
+
+selection.io.stopTimer (fitter.json [terms.json.timers], "Overall");
+
+// PERFORM HYPOTHESIS TESTING AND POPULATE TABLE REPORT
+
+fitter.LRT = {
+ "Double-hit vs single-hit" : math.DoLRT (fitter.one_hit_results[terms.fit.log_likelihood], fitter.two_hit_results[terms.fit.log_likelihood], 1),
+ "Triple-hit vs single-hit" : math.DoLRT (fitter.one_hit_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood], 2 + fitter.do_islands),
+ "Triple-hit vs double-hit" : math.DoLRT (fitter.two_hit_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood], 1 + fitter.do_islands)
+};
+
+if (fitter.do_islands) {
+ fitter.LRT ["Triple-hit-island vs double-hit"] = math.DoLRT (fitter.two_hit_results[terms.fit.log_likelihood], fitter.three_hit_island_results[terms.fit.log_likelihood],1);
+ fitter.LRT ["Triple-hit vs Triple-hit-island"] = math.DoLRT (fitter.three_hit_island_results[terms.fit.log_likelihood], fitter.three_hit_results[terms.fit.log_likelihood],1);
+}
+
+fitter.json [terms.json.test_results] = fitter.LRT;
+
+fitter.table_output_options = {
+ utility.getGlobalValue("terms.table_options.header"): TRUE,
+ utility.getGlobalValue("terms.table_options.minimum_column_width"): 10,
+ utility.getGlobalValue("terms.table_options.align"): "center",
+ utility.getGlobalValue("terms.table_options.column_widths"): {
+ "0": 38,
+ "1": 12,
+ "2": 12,
+ "3": 12,
+ "4": 36,
+ "5": 12,
+ "6": 36
+ }
+ };
+
+fitter.report = {
+ {
+ "Model", "Log-L", "omega", " 2-hit rate", "p-value", "3-hit rate", "p-value"
+ }
+};
+
+io.ReportProgressMessageMD ("fitter", "testing", "Summary of rate estimates and significance testing");
+
+
+fprintf(stdout, io.FormatTableRow(fitter.report , fitter.table_output_options));
+fitter.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE;
+
+// single rate model
+fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ fitter.terms.MG94,
+ Format (fitter.one_hit_results[terms.fit.log_likelihood], 8, 2),
+ Format (((fitter.one_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4),
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A"
+ }
+ },
+ fitter.table_output_options)
+);
+
+// double-hit rate model
+fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ fitter.terms.MG94 + " + 2 hits",
+ Format (fitter.two_hit_results[terms.fit.log_likelihood], 8, 2),
+ Format (((fitter.two_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4),
+ Format (((fitter.two_hit_results[terms.global])[terms.parameters.multiple_hit_rate])[terms.fit.MLE],8,4) ,
+ Format ((fitter.LRT["Double-hit vs single-hit"])[terms.p_value], 8, 4) + " (2-hit rate = 0)",
+ "N/A",
+ "N/A"
+ }
+ },
+ fitter.table_output_options)
+);
+
+if (fitter.do_islands) {
+
+
+
+ fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ fitter.terms.MG94 + " + 3 hits (islands)",
+ Format (fitter.three_hit_island_results[terms.fit.log_likelihood], 8, 2),
+ Format (((fitter.three_hit_island_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4),
+ Format (((fitter.three_hit_island_results[terms.global])[terms.parameters.triple_hit_rate_syn])[terms.fit.MLE],8,4) ,
+ Format ((fitter.LRT["Triple-hit-island vs double-hit"])[terms.p_value], 8, 4) + " (3-hit island vs 2-hit)",
+ Format (((fitter.three_hit_results[terms.global])[terms.parameters.triple_hit_rate])[terms.fit.MLE],8,4),
+ Format ((fitter.LRT["Triple-hit vs Triple-hit-island"])[terms.p_value], 8, 4) + " (3-hit = 0)"
+ }
+ },
+ fitter.table_output_options)
+ );
+}
+
+// triple-hit rate model
+fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ fitter.terms.MG94 + " + 2 or 3 hits",
+ Format (fitter.three_hit_results[terms.fit.log_likelihood], 8, 2),
+ Format (((fitter.three_hit_results[terms.global])[terms.parameters.omega_ratio])[terms.fit.MLE],8,4),
+ Format (((fitter.three_hit_results[terms.global])[terms.parameters.multiple_hit_rate])[terms.fit.MLE],8,4),
+ Format ((fitter.LRT["Triple-hit vs single-hit"])[terms.p_value], 8, 4) + " (2&3-hit rates = 0)",
+ Format (((fitter.three_hit_results[terms.global])[terms.parameters.triple_hit_rate])[terms.fit.MLE],8,4),
+ Format ((fitter.LRT["Triple-hit vs double-hit"])[terms.p_value], 8, 4) + " (3-hit rate(s) = 0)"
+ }
+ },
+ fitter.table_output_options)
+);
+
+
+
+(fitter.json [fitter.terms.json.evidence_ratios])["Two-hit"] =
+ fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2],
+ (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94]);
+
+(fitter.json [fitter.terms.json.evidence_ratios])["Three-hit"] =
+ fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3],
+ (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2]);
+
+if (fitter.do_islands) {
+ (fitter.json [fitter.terms.json.evidence_ratios])["Three-hit islands vs 2-hit"] =
+ fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3xS],
+ (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x2]);
+
+ (fitter.json [fitter.terms.json.evidence_ratios])["Three-hit vs three-hit islands"] =
+ fitter.EvidenceRatios ((fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3],
+ (fitter.json[fitter.terms.json.site_logl])[fitter.terms.MG94x3xS]);
+
+}
+
+
+fitter.callout = {};
+
+utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Two-hit"], "_index_", "_value_",
+'
+ if (Log (_value_) > 2) {
+ fitter.callout [_index_[1]] = 1;
+ }
+');
+
+utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit"], "_index_", "_value_",
+'
+ if (Log (_value_) > 2) {
+ fitter.callout [_index_[1]] = 1;
+ }
+');
+
+if (fitter.do_islands) {
+ utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit islands vs 2-hit"], "_index_", "_value_",
+ '
+ if (Log (_value_) > 2) {
+ fitter.callout [_index_[1]] = 1;
+ }
+ ');
+ utility.ForEachPair ((fitter.json [fitter.terms.json.evidence_ratios])["Three-hit vs three-hit islands"], "_index_", "_value_",
+ '
+ if (Log (_value_) > 2) {
+ fitter.callout [_index_[1]] = 1;
+ }
+ ');
+}
+
+if (utility.Array1D (fitter.callout)) {
+ // reconstruct ancestors here
+
+ fitter.site_reports = {};
+ fitter.ancestral_cache = ancestral.build (fitter.three_hit_results[terms.likelihood_function], 0, None);
+
+ io.ReportProgressMessageMD ("fitter", "sites", "" + utility.Array1D (fitter.callout) + " individual " + io.SingularOrPlural (utility.Array1D (fitter.callout) , "site", "sites") + " which showed sufficiently strong preference for multiple-hit models");
+ fitter.table_output_options = {
+ utility.getGlobalValue("terms.table_options.header"): TRUE,
+ utility.getGlobalValue("terms.table_options.minimum_column_width"): 10,
+ utility.getGlobalValue("terms.table_options.align"): "center",
+ utility.getGlobalValue("terms.table_options.column_widths"): {
+ "0": 10,
+ "1": 25,
+ "2": 25,
+ "3": 60
+ }
+ };
+
+
+ if (fitter.do_islands) {
+ (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [3] = 25;
+ (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [4] = 25;
+ (fitter.table_output_options[utility.getGlobalValue("terms.table_options.column_widths")]) [5] = 60;
+ fitter.report = {
+ {
+ "Site", "ER (2 vs 1)", "ER (3 vs 2)", "ER (3-island vs 2)", "ER (3-island vs 3)", "Substitutions"
+ }
+ };
+ } else {
+ fitter.report = {
+ {
+ "Site", "ER (2 vs 1)", "ER (3 vs 2)", "Substitutions"
+ }
+ };
+ }
+ fprintf(stdout, "\n", io.FormatTableRow(fitter.report , fitter.table_output_options));
+
+ fitter.table_output_options[utility.getGlobalValue("terms.table_options.header")] = FALSE;
+ utility.ForEachPair (fitter.callout, "_site_", "_value_", "
+
+ fitter.site_reports [_site_] = (ancestral.ComputeSubstitutionBySite (fitter.ancestral_cache, +_site_, None))[terms.substitutions];
+
+ if (fitter.do_islands) {
+ fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ '' + (+_site_ + 1),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Two-hit'])[+_site_], 10, 4),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit'])[+_site_], 10, 4),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit islands vs 2-hit'])[+_site_], 10, 4),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit vs three-hit islands'])[+_site_], 10, 4),
+ fitter.SubstitutionHistory (fitter.site_reports [_site_])
+ }
+ }
+ , fitter.table_output_options));
+
+ } else {
+ fprintf(stdout, io.FormatTableRow(
+ {
+ {
+ '' + (+_site_ + 1),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Two-hit'])[+_site_], 10, 4),
+ Format (((fitter.json [fitter.terms.json.evidence_ratios])['Three-hit'])[+_site_], 10, 4),
+ fitter.SubstitutionHistory (fitter.site_reports [_site_])
+ }
+ }
+ , fitter.table_output_options));
+ }
+ ");
+
+ fitter.json [fitter.terms.json.site_reports] = fitter.site_reports;
+
+
+} else {
+ io.ReportProgressMessageMD ("fitter", "sites", "No individual sites showed sufficiently strong preference for multiple-hit models");
+
+}
+
+io.ReportProgressMessageMD ("fitter", "writing", "Writing detailed analysis report to \`" + fitter.codon_data_info [terms.json.json] + "\'");
+
+io.SpoolJSON (fitter.json, fitter.codon_data_info [terms.json.json]);
+return fitter.json;
+
+//------------------------------------------------------------------------------
+lfunction fitter.EvidenceRatios (ha, h0) {
+ return ha["Exp(_MATRIX_ELEMENT_VALUE_-h0[_MATRIX_ELEMENT_COLUMN_])"];
+}
+
+
+//------------------------------------------------------------------------------
+
+lfunction fitter.SubstitutionHistory (subs) {
+ result = "";
+ result * 128;
+ keys = utility.sortStrings (utility.Keys (subs));
+
+ for (i = 0; i < Abs (subs); i+=1) {
+ source = keys[i];
+ targets = subs[source];
+ if (i > 0) {
+ result * ", ";
+ }
+ result * (source + "->");
+ keys2 = utility.sortStrings (utility.Keys (targets));
+ for (k = 0; k < Abs (targets); k+=1) {
+ result * (keys2[k] + "(" + Abs(targets[keys2[k]]) + ")");
+ }
+ }
+
+ result * 0;
+ return result;
+
+}
=====================================
res/TemplateBatchFiles/SelectionAnalyses/MEME.bf
=====================================
@@ -163,7 +163,10 @@ meme.final_partitioned_mg_results_bl = estimators.FitMGREV (meme.filter_names, m
terms.run_options.model_type: terms.local,
terms.run_options.partitioned_omega: meme.selected_branches,
terms.run_options.retain_lf_object: FALSE,
- terms.run_options.optimization_settings: {"OPTIMIZATION_PRECISION" : Max(meme.partitioned_mg_results[terms.fit.log_likelihood] * 0.00001, 0.1)}
+ terms.run_options.optimization_settings: {
+ "OPTIMIZATION_METHOD" : "coordinate-wise",
+ "OPTIMIZATION_PRECISION" : Max(meme.partitioned_mg_results[terms.fit.log_likelihood] * 0.00001, 0.1)
+ }
}, meme.partitioned_mg_results);
@@ -177,7 +180,10 @@ io.ReportProgressMessageMD ("MEME", "codon-refit", "Improving branch lengths, nu
meme.final_partitioned_mg_results = estimators.FitMGREV (meme.filter_names, meme.trees, meme.codon_data_info [terms.code], {
terms.run_options.model_type: terms.local,
terms.run_options.partitioned_omega: meme.selected_branches,
- terms.run_options.retain_lf_object: TRUE
+ terms.run_options.retain_lf_object: TRUE,
+ terms.run_options.optimization_settings: {
+ "OPTIMIZATION_METHOD" : "coordinate-wise"
+ }
}, meme.partitioned_mg_results);
@@ -526,11 +532,8 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa
//io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME");
-
- Optimize (results, ^lf_bsrel, {
- "OPTIMIZATION_METHOD" : "nedler-mead",
- "OPTIMIZATION_START_GRID" :
- {
+
+ initial_guess_grid = {
"0" : {
"meme.site_beta_plus": ^"meme.site_beta_plus",
"meme.site_omega_minus": ^"meme.site_omega_minus",
@@ -565,15 +568,54 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa
"meme.site_beta_plus": ^"meme.site_beta_plus",
"meme.site_omega_minus": 0,
"meme.site_mixture_weight": 0.01
+ },
+ "7" : {
+ "meme.site_beta_plus": ^"meme.site_beta_plus",
+ "meme.site_omega_minus": ^"meme.site_omega_minus",
+ "meme.site_mixture_weight": 1.0
}
- }
+ };
+
+ //before_opt = {"alpha" : ^"meme.site_alpha", "other" : initial_guess_grid};
+
+
+
+ ^"meme.site_alpha" = ^"meme.site_alpha";
+ // SLKP 20201028 : without this, there are occasional initialization issues with
+ // the likelihood function below
+
+ Optimize (results, ^lf_bsrel, {
+ "OPTIMIZATION_METHOD" : "nedler-mead",
+ "OPTIMIZATION_START_GRID" : initial_guess_grid
});
+ /*after_opt = {
+ "alpha" : Eval("meme.site_alpha"),
+ "meme.site_beta_plus": Eval("meme.site_beta_plus"),
+ "meme.site_omega_minus": Eval("meme.site_omega_minus"),
+ "meme.site_mixture_weight": Eval("meme.site_mixture_weight")
+ };*/
+
alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping);
alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
+ /*init_grid_best = ^"LF_INITIAL_GRID_MAXIMUM";
+ if (Abs((results[1][0]-init_grid_best)/results[1][0]) > 0.05) {
+ console.log (before_opt);
+ console.log (^"LF_INITIAL_GRID_MAXIMUM_VALUE");
+ console.log (after_opt);
+ console.log ("" + results[1][0] + " vs " + init_grid_best);
+
+ //fprintf (stdout, ^lf_bsrel, "\n");
+ //utility.SetEnvVariable ("VERBOSITY_LEVEL",10);
+
+ }
+
+
+ console.log ("--------");*/
+
ancestral_info = ancestral.build (lf_bsrel,0,FALSE);
=====================================
res/TemplateBatchFiles/SelectionAnalyses/RELAX-SRV.bf deleted
=====================================
@@ -1,888 +0,0 @@
-RequireVersion ("2.31");
-
-console.log("ERROR: THIS BATCHFILE WILL NOT RUN PROPERLY WITH UPDATED TERMS BASE. QUITTING.");
-exit();
-
-
-RELAX.debug.reload = FALSE;
-
-LoadFunctionLibrary("GrabBag");
-LoadFunctionLibrary("CF3x4");
-LoadFunctionLibrary("TreeTools");
-
-
-// namespace 'utility' for convenience functions
-LoadFunctionLibrary("libv3/UtilityFunctions.bf");
-
-// namespace 'io' for interactive/datamonkey i/o functions
-LoadFunctionLibrary("libv3/IOFunctions.bf");
-
-LoadFunctionLibrary ("libv3/models/codon/MG_REV.bf");
-
-// namespace 'estimators' for various estimator related functions
-LoadFunctionLibrary("libv3/tasks/estimators.bf");
-
-// namespace 'alignments' for various alignments related functions
-LoadFunctionLibrary("libv3/tasks/alignments.bf");
-
-// namespace 'tree' for various tree related functions
-LoadFunctionLibrary("libv3/tasks/trees.bf");
-
-// namespace 'estimators' for various estimator related functions
-LoadFunctionLibrary("SelectionAnalyses/modules/io_functions.ibf");
-
-
-LoadFunctionLibrary("BranchSiteTemplate");
-/*------------------------------------------------------------------------------
- BranchSiteTemplate Defines
-
- BuildCodonFrequencies (obsF);
- PopulateModelMatrix (ModelMatrixName&, EFV, synrateP, globalP, nonsynRateP);
-
-------------------------------------------------------------------------------*/
-
-RELAX.settings = {"GTR" : 1,
- "LocalMG" : 1,
- "Estimate GTR" : 1};
-
-RELAX.timers = {6,1};
-
-
-relax.taskTimerStart (0);
-
-RELAX.json = {"fits" : {},
- "timers" : {},
- "relaxation-test" : None
- };
-
-RELAX.test = "RELAX.test";
-RELAX.reference = "RELAX.reference";
-RELAX.unclassified = "RELAX.unclassified";
-RELAX.relaxation_parameter = "RELAX.K";
-
-term.RELAX.k = "relaxation coefficient";
-
-/*------------------------------------------------------------------------------*/
-
-
-io.DisplayAnalysisBanner ({"info" : "RELAX (a random effects test of selection relaxation)
- uses a random effects branch-site model framework
- to test whether a set of 'Test' branches evolves under relaxed
- selection relative to a set of 'Reference' branches (R), as measured
- by the relaxation parameter (K).",
- "version" : "1.0",
- "reference" : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832",
- "authors" : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group",
- "contact" : "spond at temple.edu",
- "requirements" : "in-frame codon alignment and a phylogenetic tree, with at least two groups of branches defined using the {} notation (one group can be defined as all unlabeled branches)"
- } );
-
-relax.codon_data_info = alignments.PromptForGeneticCodeAndAlignment ("RELAX.codon_data", "RELAX.codon_filter");
-relax.sample_size = relax.codon_data_info["sites"] * relax.codon_data_info["sequences"];
-
-relax.name_mapping = relax.codon_data_info[utility.getGlobalValue("terms.json.name_mapping")];
- /**
- will contain "mapped" -> "original" associations with sequence names; or null if no mapping was necessary
- */
-
-if (None == relax.name_mapping) { /** create a 1-1 mapping if nothing was done */
- relax.name_mapping = {};
- utility.ForEach (alignments.GetSequenceNames ("RELAX.codon_data"), "_value_", "`&relax.name_mapping`[_value_] = _value_");
-}
-
-relax.codon_data_info["json"] = relax.codon_data_info["file"] + ".RELAX.json";
-io.ReportProgressMessage ("RELAX", "Loaded an MSA with " + relax.codon_data_info["sequences"] + " sequences and " + relax.codon_data_info["sites"] + " codons from '" + relax.codon_data_info["file"] + "'");
-
-relax.codon_lists = models.codon.MapCode (relax.codon_data_info["code"]);
-_Genetic_Code = relax.codon_data_info["code"];
- /*
-
- hack to make PopulateModelMatrix work
-
- */
-relax.codon_frequencies = frequencies._aux.CF3x4(frequencies._aux.empirical.collect_data ("RELAX.codon_filter",3,1,1),
-models.DNA.alphabet, relax.codon_lists["sense"], relax.codon_lists["stop"]);
-
-
-relax.partitions_and_trees = trees.LoadAnnotatedTreeTopology.match_partitions (relax.codon_data_info[utility.getGlobalValue("terms.json.partitions")], relax.name_mapping);
-io.CheckAssertion("utility.Array1D (relax.partitions_and_trees) == 1", "RELAX only works on a single partition dataset");
-
-relax.filter_specification = alignments.DefineFiltersForPartitions (relax.partitions_and_trees, "RELAX.codon_data" , "RELAX.codon_filter.", relax.codon_data_info);
-
-relax.trees = utility.Map (relax.partitions_and_trees, "_partition_", '_partition_["tree"]');
-relax.filter_names = utility.Map (relax.filter_specification, "_partition_", '_partition_["name"]');
-
-relax.tree = relax.trees[0];
-
-utility.SetEnvVariable ("VERBOSITY_LEVEL", 1);
-
-relax.group_count = utility.Array1D(relax.tree["model_list"]);
-
-assert (relax.group_count >= 2, "This analysis expects a partitioned tree with at least two groups defined (one can be the default group without a model)");
-
-RELAX.groups = {};
-
-utility.ForEach (relax.tree ["model_list"], "_model_", "
- if (_model_ == '') {
- RELAX.groups [RELAX.unclassified] = 'Unclassified branches';
- } else {
- RELAX.groups [_model_] = _model_ + ' branches';
- }
-");
-
-RELAX.reference_set = io.SelectAnOption (RELAX.groups, "Choose the reference set of branches");
-RELAX.reference_branches = utility.Filter (relax.tree["model_map"], "_value_", "_value_ == RELAX.reference_set");
-
-io.ReportProgressMessage ("RELAX", "Selected " + Abs (RELAX.reference_branches) + " branches as the test set: " + Join (",", utility.Keys (RELAX.reference_branches)));
-
-RELAX.has_unclassified = RELAX.groups / RELAX.unclassified;
-RELAX.branch_to_partiton = {};
-utility.ForEachPair (relax.tree ["model_map"], "_key_", "_value_", "if (Abs (_value_)) { RELAX.branch_to_partiton[_key_&&1] = _value_; } else { RELAX.branch_to_partiton[_key_&&1] = RELAX.unclassified;}");
-
-RELAX.json ["partition"] = relax.selected_branches;
-RELAX.json ["tree"] = relax.tree ["string"];
-
-RELAX.runModel = io.SelectAnOption ({
- {"All", "[Default] Fit descriptive models AND run the relax test (4 models)"}
- {"Minimal", "Run only the RELAX test (2 models)"}
- }, "Analysis type");
-
-relax.taskTimerStart (1);
-
-RELAX.srvType = io.SelectAnOption ({
- {"Site-only", "[Default] Synonymous rates vary from site to site only (all branches are scaled uniformly)"}
- {"Branch-site", "Synonymous rates vary from site to site and branch to branch"}
- }, "Analysis type");
-
-
-
-if (RELAX.settings["GTR"]) {
- io.ReportProgressMessage ("RELAX", "Obtaining branch lengths under the GTR model");
- relax.gtr_results = estimators.FitGTR ("RELAX.codon_filter", relax.tree, None);
- io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.gtr_results["LogL"]);
- estimators.fixSubsetOfEstimates (relax.gtr_results, relax.gtr_results["global"]);
-} else {
- relax.gtr_results = None;
-}
-
-/*if (RELAX.settings["LocalMG"] && RELAX.runModel == "All") {
- io.ReportProgressMessage ("RELAX", "Obtaining omega and branch length estimates under the local MG94xGTR model");
- relax.local_mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info ["code"], {"model-type" : terms.local}, relax.gtr_results);
- io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.local_mg_results["LogL"]);
- estimators.fixSubsetOfEstimates (relax.local_mg_results, relax.local_mg_results["global"]);
-} else {
- relax.local_mg_results = relax.gtr_results;
-}*/
-
-parameters.DeclareGlobal ("relax.codon_branch_scaler", None);
-
-utility.SetEnvVariable ("VERBOSITY_LEVEL", 1);
-
-
-if (!RELAX.debug.reload) {
- io.ReportProgressMessageMD ("RELAX", "mg-rev", "Obtaining omega and branch length estimates under the partitioned MG94xGTR model");
- relax.mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info ["code"],
- {"model-type" : terms.local, "partitioned-omega" : {"0" : RELAX.branch_to_partiton}, "proportional-branch-length-scaler": {"0" : "relax.codon_branch_scaler"}},
- relax.local_mg_results);
- relax.taskTimerStop (1);
-
- io.ReportProgressMessageMD("RELAX", "mg-rev", "* Log(L) = " + Format(relax.mg_results["LogL"],8,2));
- relax.global_dnds = selection.io.extract_global_MLE_re (relax.mg_results , "^" + terms.omega_ratio);
-
- relax.mg_results_rate = {};
-
- utility.ForEach (relax.global_dnds, "_value_",
- '
- io.ReportProgressMessageMD ("fel", "mg-rev", "* " + _value_["description"] + " = " + Format (_value_["MLE"],8,4));
- '
- );
-
-
- relax.json_store_lf (RELAX.json, "Partitioned MG94xREV",
- relax.mg_results["LogL"], relax.mg_results["parameters"] + 5,
- RELAX.timers[1],
- relax._aux.extract_branch_info ((relax.mg_results["branch lengths"])[0], "relax.branch.length"),
- relax._aux.extract_branch_info ((relax.mg_results["branch lengths"])[0], "relax.branch.omega"),
- None,
- None,
- "ω"
- );
- relax.json_spool (RELAX.json, relax.codon_data_info["json"]);
-
- relax.taskTimerStart (2);
-}
-
-RELAX.model_mapping = {};
-RELAX.model_assignment = {};
-RELAX.model_specification = {};
-RELAX.unclassified_model_id = None;
-
-
-for (g = 0; g < relax.group_count; g += 1) {
- relax.group_name = (relax.tree ["model_list"])[g];
- relax.branch_set = utility.Filter (relax.tree ["model_map"], "_value_", "_value_ == relax.group_name");
-
- if (Abs ( relax.group_name ) == 0) {
- relax.group_name = RELAX.unclassified;
- }
-
-
- relax.omega_estimate = Eval(utility.Values (selection.io.extract_global_MLE_re (relax.mg_results , relax.group_name + "\*$"))[0]);
-
- RELAX.model = relax.io.define_a_bsrel_model ("RELAX.`g`",
- relax.codon_frequencies,
- relax.omega_estimate["MLE"],
- 1,
- RELAX.srvType != "Site-only");
-
-
- RELAX.model_assignment [RELAX.model["id"]] = RELAX.model;
- RELAX.model_specification [relax.group_name] = relax.branch_set;
- RELAX.model_mapping [relax.group_name] = RELAX.model["id"];
-
- if ((relax.tree ["model_list"])[g] == RELAX.reference_set) {
- RELAX.reference_model = RELAX.model["id"];
- }
-
- if (relax.group_name == RELAX.unclassified) {
- RELAX.unclassified_model_id = RELAX.model["id"];
- }
-}
-
-utility.ForEachPair ( RELAX.model_assignment, "_key_", "_value_", '
- if (_key_ != RELAX.reference_model) {
- parameters.ConstrainSets ((RELAX.model_assignment[RELAX.reference_model]) ["omegas"], (RELAX.model_assignment[_key_]) ["omegas"]);
- parameters.ConstrainSets ((RELAX.model_assignment[RELAX.reference_model]) ["f"], (RELAX.model_assignment[_key_]) ["f"]);
- }
- ');
-
-
-
-model.ApplyModelToTree ("RELAX.tree", relax.tree, RELAX.model_mapping, RELAX.model_specification);
-
-ASSUME_REVERSIBLE_MODELS = 1;
-LikelihoodFunction relax.LF = (RELAX.codon_filter, RELAX.tree);
-
-global RELAX.branch_scaler = 4;
-RELAX.proportional_constraint = "RELAX.branch_scaler";
-
-if (RELAX.settings["Estimate GTR"] != TRUE) {
- estimators.fixSubsetOfEstimates (relax.mg_results, relax.mg_results["global"]);
-}
-
-estimators.ApplyExistingEstimates ("relax.LF", RELAX.model_assignment, relax.mg_results, None);
-
-utility.SetEnvVariable ("USE_LAST_RESULTS", 1);
-
-if (RELAX.runModel == "All") {
-
- io.ReportProgressMessageMD ("RELAX", "GDM", "## Two-stage fit of the general descriptive model (separate relaxation parameter for each branch)");
-
- OPTIMIZATION_PRECISION = 0.1;
-
- Optimize (relax.MLE.general_descriptive, relax.LF);
-
- OPTIMIZATION_PRECISION = 0.001;
- parameters.UnconstrainParameterSet ("relax.LF", {{terms.lf.local.constrained}});
-
- Optimize (relax.MLE.general_descriptive, relax.LF);
- io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.general_descriptive[1][0]);
-
- relax.general_descriptive = estimators.ExtractMLEs ("relax.LF", RELAX.model_specification);
- relax.add_scores (relax.general_descriptive, relax.MLE.general_descriptive);
-
- relax.taskTimerStop (2);
-
- relax.json_store_lf (RELAX.json, "General Descriptive",
- relax.general_descriptive["LogL"], relax.general_descriptive["parameters"],
- RELAX.timers[2],
- relax._aux.extract_branch_info ((relax.general_descriptive["branch lengths"])[0], "relax.branch.length"),
- relax._aux.extract_branch_info ((relax.general_descriptive["branch lengths"])[0], "relax.branch.local_k"),
- {"All" : relax.getRateDistribution (RELAX.reference.model, 1)},
- None,
- "k"
- );
- relax.json_spool (RELAX.json, relax.codon_data_info["json"]);
-
-} else {
- parameters.UnconstrainParameterSet ("relax.LF", {{terms.lf.local.constrained}});
-}
-
-if (RELAX.has_unclassified) {
- parameters.RemoveConstraint ((RELAX.model_assignment[RELAX.unclassified_model_id]) ["omegas"]);
- parameters.RemoveConstraint ((RELAX.model_assignment[RELAX.unclassified_model_id]) ["f"]);
-}
-
-relax.taskTimerStart (3);
-io.ReportProgressMessage ("RELAX", "Fitting the RELAX null model");
-
-RELAX.null = relax.define.null ("RELAX.tree", RELAX.model_assignment[RELAX.reference_model], RELAX.model_specification);
-
-
-if (!RELAX.debug.reload) {
-
- utility.SetEnvVariable ("VERBOSITY_LEVEL", 1);
-
- Optimize (relax.MLE.null, relax.LF);
- io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.null[1][0]);
- relax.null = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment);
-
- LIKELIHOOD_FUNCTION_OUTPUT = 7;
- fprintf (relax.codon_data_info["file"] + ".null.fit", CLEAR_FILE, relax.LF);
- LIKELIHOOD_FUNCTION_OUTPUT = 2;
-} else {
- ExecuteAFile (relax.codon_data_info["file"] + ".null.fit");
- relax.null = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment);
- relax.null["LogL"] = estimators.ComputeLF ("relax.LF");
-}
-
-relax.add_scores (relax.null, relax.MLE.null);
-
-relax.taskTimerStop (3);
-
-relax.omega_distributions = {};
-relax.omega_distributions["Joint"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.reference_model], 1);
-relax.omega_distributions["SRV"] = relax.getRateDistributionSRV (RELAX.model_assignment[RELAX.reference_model]);
-
-if (RELAX.has_unclassified) {
- relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.unclassified_model_id], 1);
-}
-
-
-
-if (!RELAX.debug.reload) {
- relax.json_store_lf (RELAX.json, "Null",
- relax.null["LogL"], relax.null["parameters"],
- RELAX.timers[3],
- relax._aux.extract_branch_info ((relax.null["branch lengths"])[0], "relax.branch.length"),
- relax._aux.extract_branch_info ((relax.null["branch lengths"])[0], "relax.branch.local_k"),
- relax.omega_distributions,
- 1,
- "k"
- );
-
- relax.json_spool (RELAX.json, relax.codon_data_info["json"]);
-}
-
-io.ReportProgressMessage ("RELAX", "Fitting the RELAX alternative model");
-
-relax.taskTimerStart (4);
-
-utility.ForEach (estimators.GetGlobalMLE_RegExp (relax.null, "^Relaxation parameter"), "_parameter_",
-'
- parameters.RemoveConstraint (_parameter_["ID"]);
-');
-
-
-if (!RELAX.debug.reload) {
-
- Optimize (relax.MLE.alt, relax.LF);
- relax.alt = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment);
-
- io.ReportProgressMessageMD ("RELAX", "AltModel", "Log(L) = " + relax.MLE.alt[1][0]);
-} else {
- ExecuteAFile (relax.codon_data_info["file"] + ".alternative.fit");
- relax.alt = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment);
- relax.alt["LogL"] = estimators.ComputeLF ("relax.LF");
-}
-
-utility.ForEachPair (estimators.GetGlobalMLE_RegExp (relax.alt, "^Relaxation parameter"), "_name_", "_parameter_",
-'
- io.ReportProgressMessageMD ("RELAX", "AltModel", "* " + _name_ + " = " + _parameter_ [terms.MLE]);
-
-');
-
-
-if (!RELAX.debug.reload) {
- LIKELIHOOD_FUNCTION_OUTPUT = 7;
- fprintf (relax.codon_data_info["file"] + ".alternative.fit", CLEAR_FILE, relax.LF);
- LIKELIHOOD_FUNCTION_OUTPUT = 2;
-}
-
-relax.add_scores (relax.alt, relax.MLE.alt);
-
-RELAX.json ["relaxation-test"] = relax.runLRT (relax.alt["LogL"], relax.null["LogL"], relax.group_count-2);
-
-io.ReportProgressMessageMD ("RELAX", "AltModel", "Likelihood ratio test for differences among branch classes. Null = "
- + relax.null["LogL"] + ", Alt = " + relax.alt["LogL"] + ", p = " + (RELAX.json ["relaxation-test"])["p"]);
-
-relax.taskTimerStop (4);
-
-relax.omega_distributions = {};
-relax.omega_distributions["Joint"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.reference_model], 1);
-relax.omega_distributions["SRV"] = relax.getRateDistributionSRV (RELAX.model_assignment[RELAX.reference_model]);
-
-if (RELAX.has_unclassified) {
- relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.model_assignment[RELAX.unclassified_model_id], 1);
-}
-
-
-relax.json_store_lf (RELAX.json, "Alternative",
- relax.alt["LogL"], relax.alt["parameters"],
- RELAX.timers[4],
- relax._aux.extract_branch_info ((relax.alt["branch lengths"])[0], "relax.branch.length"),
- relax._aux.extract_branch_info ((relax.alt["branch lengths"])[0], "relax.branch.local_k"),
- relax.omega_distributions,
- Eval (RELAX.relaxation_parameter),
- "k"
- );
-
-
-if (RELAX.runModel == "All") {
-
- relax.taskTimerStart (5);
-
- parameters.RemoveConstraint (RELAX.test.model ["omegas"]);
- parameters.RemoveConstraint (RELAX.test.model ["f"]);
- parameters.SetConstraint (RELAX.relaxation_parameter, Eval (RELAX.relaxation_parameter), "");
-
-
- io.ReportProgressMessage ("RELAX", "Fitting the RELAX partitioned exploratory model");
- Optimize (relax.MLE.part.expl, relax.LF);
- io.ReportProgressMessage ("RELAX", "Log(L) = " + relax.MLE.part.expl [1][0]);
-
- LIKELIHOOD_FUNCTION_OUTPUT = 7;
- fprintf (relax.codon_data_info["file"] + ".partitioned_descriptive.fit", CLEAR_FILE, relax.LF);
- LIKELIHOOD_FUNCTION_OUTPUT = 2;
-
- relax.part.expl = estimators.ExtractMLEs ("relax.LF", RELAX.model_assignment);
-
- relax.omega_distributions["Test"] = relax.getRateDistribution (RELAX.test.model,Eval (RELAX.relaxation_parameter));
- relax.omega_distributions["Reference"] = relax.getRateDistribution (RELAX.reference.model, 1);
-
- if (RELAX.has_unclassified) {
- relax.omega_distributions ["Unclassified"] = relax.getRateDistribution (RELAX.unclassified.model, 1);
- }
-
- relax.add_scores (relax.part.expl, relax.MLE.part.expl);
-
- relax.taskTimerStop (5);
- relax.json_store_lf (RELAX.json, "Partitioned Exploratory",
- relax.part.expl["LogL"], relax.part.expl["parameters"],
- RELAX.timers[5],
- relax._aux.extract_branch_info ((relax.part.expl["branch lengths"])[0], "relax.branch.length"),
- None,
- relax.omega_distributions,
- None,
- ""
- );
-}
-
-
-relax.taskTimerStop (0);
-
-
-(RELAX.json ["timers"])["Overall"] = RELAX.timers[0];
-(RELAX.json ["timers"])["Preliminaries"] = RELAX.timers[1];
-(RELAX.json ["timers"])["General Descriptive"] = RELAX.timers[2];
-(RELAX.json ["timers"])["Null"] = RELAX.timers[3];
-(RELAX.json ["timers"])["Alternative"] = RELAX.timers[4];
-(RELAX.json ["timers"])["Partitioned Descriptive"] = RELAX.timers[5];
-
-relax.json_spool (RELAX.json, relax.codon_data_info["json"]);
-
-return RELAX.json;
-
-//------------------------------------------------------------------------------
-// HELPER FUNCTIONS FROM THIS POINT ON
-//------------------------------------------------------------------------------
-
-function relax.branch.length (branch_info) {
- return branch_info["MLE"];
-}
-
-function relax.branch.omega (branch_info) {
- return parameters.NormalizeRatio ((branch_info[terms.nonsynonymous_rate])["MLE"], (branch_info[terms.synonymous_rate])["MLE"]);
-}
-
-function relax.branch.local_k (branch_info) {
- return (branch_info[term.RELAX.k])["MLE"];
-}
-
-function relax._aux.extract_branch_info.callback (key, value) {
- relax._aux.extract_branch_info_result [key] = utility.CallFunction (callback, {"0" : "value"});
-}
-
-function relax._aux.extract_branch_info (branch_spec, callback) {
- relax._aux.extract_branch_info_result = {};
- branch_spec ["relax._aux.extract_branch_info.callback"][""];
- return relax._aux.extract_branch_info_result;
-}
-
-
-//------------------------------------------------------------------------------
-function relax.getRateDistribution (model_description, K) {
- relax.getRateInformation.rate_classes = Abs (model_description["omegas"]);
- relax.getRateInformation.omega_info = {relax.getRateInformation.rate_classes,2};
-
- for (relax.getRateInformation.k = 0; relax.getRateInformation.k < relax.getRateInformation.rate_classes; relax.getRateInformation.k += 1) {
- relax.getRateInformation.omega_info[relax.getRateInformation.k][0] = Eval ((model_description["omegas"])[relax.getRateInformation.k])^K;
- relax.getRateInformation.omega_info[relax.getRateInformation.k][1] = Eval ((model_description["weights"])[relax.getRateInformation.k]);
- }
- return relax.getRateInformation.omega_info % 0;
-}
-
-//------------------------------------------------------------------------------
-function relax.getRateDistributionSRV (model_description) {
- relax.getRateInformation.rate_classes = Abs (model_description["srv_rates"]);
- relax.getRateInformation.omega_info = {relax.getRateInformation.rate_classes,2};
-
- for (relax.getRateInformation.k = 0; relax.getRateInformation.k < relax.getRateInformation.rate_classes; relax.getRateInformation.k += 1) {
- relax.getRateInformation.omega_info[relax.getRateInformation.k][0] = Eval ((model_description["srv_rates"])[relax.getRateInformation.k]);
- relax.getRateInformation.omega_info[relax.getRateInformation.k][1] = Eval ((model_description["srv_weights"])[relax.getRateInformation.k]);
- }
- return relax.getRateInformation.omega_info % 0;
-}
-
-//------------------------------------------------------------------------------
-function relax.define.null._aux (key, value) {
- //fprintf (stdout, "`tree_id`.`key`.`relax.define.null.local` := `relax.define.null.global`\n");
- ExecuteCommands ("`tree_id`.`key`.`relax.define.null.local` := `relax.define.null.global`");
-}
-
-//------------------------------------------------------------------------------
-function relax.define.null (tree_id, general_model, partition) {
-
- parameters.RemoveConstraint ((general_model["omegas"])[2]);
-
- relax.define.null.local = ((general_model["parameters"])["local"])[term.RELAX.k];
-
- relax.define.null.index = 0;
-
- utility.ForEachPair (partition, "_part_name_","_branch_list_", '
- relax.define.null.index += 1;
- if (_part_name_ == RELAX.reference_set || _part_name_ == RELAX.unclassified) {
- relax.define.null.global = "1";
- _branch_list_["relax.define.null._aux"][""];
- } else {
- relax.define.null.global = RELAX.relaxation_parameter + "_" + relax.define.null.index;
- (((RELAX.model_assignment[ RELAX.model_mapping[_part_name_]])["parameters"])["global"])["Relaxation parameter for *" + _part_name_ + "*"] = relax.define.null.global;
- parameters.SetConstraint (relax.define.null.global, 1, terms.global);
- _branch_list_["relax.define.null._aux"][""];
- }
- ');
-
- return general_model;
-}
-
-
-
-//------------------------------------------------------------------------------
-function relax.add_scores (desc, mles) {
- if (Type (mles) == "Matrix") {
- desc ["LogL"] = mles[1][0];
- desc ["parameters"] = mles[1][1] + 9 + 5 * (RELAX.settings["Estimate GTR"] != 1);
- }
-}
-
-//------------------------------------------------------------------------------
-function relax.runLRT (ha, h0, df) {
- return {"LR" : 2*(ha-h0),
- "p" : 1-CChi2 (2*(ha-h0),df)};
-}
-
-
-//------------------------------------------------------------------------------
-function relax._aux.define_bsrel_model (id,Q,weights,freqs) {
- rate_count = Abs (Q);
- components = {};
- length = "";
- Eval ("`id`_eqf = freqs");
-
- for (k = 0; k < rate_count; k+=1) {
- components[k] = "Exp(" + Q[k] + ")*" + weights[k];
- ExecuteCommands ("Model busted._aux.define_bsrel_model_bl = (" + Q[k] + ",`id`_eqf,0)");
- GetString (blExp, busted._aux.define_bsrel_model_bl, -1);
- if (k) {
- length += "+";
- }
- length += "(`blExp`)*" + weights[k];
- }
-
- ExecuteCommands ("Model `id` =(\"" + Join("+",components) + "\",`id`_eqf,EXPLICIT_FORM_MATRIX_EXPONENTIAL);");
-
- return length;
-}
-
-
-
-//------------------------------------------------------------------------------
-
-function relax.io.evaluator (key, value) {
- fprintf (stdout, key, "->", Eval (value), "\n");
-}
-
-//------------------------------------------------------------------------------
-function relax._aux.define_srv_category (rates, weights, cat_name) {
-
- rate_count = Abs (weights);
- expected_value = "(" + weights[0] + ")*(" + rates[0] + ")";
-
- cat_freqs = "`cat_name`.weights";
- cat_rates = "`cat_name`.rates";
- cat_norm = "`cat_name`.normalizer";
- norm_weights = {};
- ExecuteCommands ("`cat_freqs` = {rate_count,1}");
- ExecuteCommands ("`cat_rates` = {rate_count,1}");
- ExecuteCommands ("`cat_freqs`[0] := " + weights[0]);
-
- for (k = 1; k < rate_count; k+=1) {
- expected_value += "+(" + weights[k] + ")*(" + rates[k] + ")";
- ExecuteCommands ("`cat_freqs`[k] := " + weights[k]);
- }
-
- ExecuteCommands ("global `cat_norm` := `expected_value`");
-
- for (k = 0; k < rate_count; k+=1) {
- ExecuteCommands ("`cat_rates`[k] := (" + rates[k] + ")/`cat_norm`");
- norm_weights + ("(" + rates[k] + ")/`cat_norm`");
- }
- //category c = ("+resp+", categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25);\n\n"
- ExecuteCommands ("category `cat_name` = (rate_count, `cat_freqs`, MEAN, , `cat_rates`, 0,1e25)");
-
- return norm_weights;
-}
-
-//------------------------------------------------------------------------------
-function relax.io.define_a_bsrel_model (id, frequencies, mean_omega, do_local, srv_branch_site) {
-
- model_parameters = {"parameters" : {"global" : {}, "local" : {}}, "omegas" : {}, "weights" : {}, "f" : {}, "Q" : {}, "length" : ""};
-
- model_parameters["omegas"] = parameters.GenerateSequentialNames ("`id`.omega", 3, "_");
- model_parameters["f"] = parameters.GenerateSequentialNames ("`id`.aux_freq", 2, "_");
-
- parameters.DeclareGlobal (model_parameters["f"], None);
- parameters.SetRange (model_parameters["f"], terms.range01);
-
- parameters.DeclareGlobal (model_parameters["omegas"], None);
-
- model_parameters["weights"] = parameters.helper.stick_breaking (model_parameters["f"], {{0.7,0.25,0.05}});
-
- relax.init_omegas = {{0.05,0.25,4}};
- relax.init_omegas = relax.init_omegas * (1/ parameters.Mean (relax.init_omegas, model_parameters["weights"], Abs (model_parameters["omegas"])));
-
- parameters.SetRange ((model_parameters["omegas"])[0], terms.range_almost_01);
- parameters.SetValue ((model_parameters["omegas"])[0], relax.init_omegas[0]);
-
- parameters.SetRange ((model_parameters["omegas"])[1], terms.range_almost_01);
- parameters.SetValue ((model_parameters["omegas"])[1], relax.init_omegas[1]);
-
- parameters.SetRange ((model_parameters["omegas"])[2], terms.range_gte1);
- parameters.SetValue ((model_parameters["omegas"])[2], relax.init_omegas[2]);
-
- model_parameters["srv_rates"] = parameters.GenerateSequentialNames ("relax.srv_rate", 3, "_");
- model_parameters["srv_f"] = parameters.GenerateSequentialNames ("relax.aux_freq_srv", 2, "_");
-
- parameters.DeclareGlobal (model_parameters["srv_f"], None);
- parameters.SetRange (model_parameters["srv_f"], terms.range01);
-
- parameters.DeclareGlobal (model_parameters["srv_rates"], None);
- model_parameters["srv_weights"] = parameters.helper.stick_breaking (model_parameters["srv_f"], {{0.7,0.25,0.05}});
-
- relax.init_srv = {{0.05,1,4}};
-
- parameters.SetRange ((model_parameters["srv_rates"])[0], terms.range01);
- parameters.SetValue ((model_parameters["srv_rates"])[0], relax.init_omegas[0]);
-
- parameters.SetConstraint ((model_parameters["srv_rates"])[1], 1,"");
-
- parameters.SetRange ((model_parameters["srv_rates"])[2], terms.range_gte1);
- parameters.SetValue ((model_parameters["srv_rates"])[2], relax.init_omegas[2]);
-
- relax._srv_cat_name = "relax.srv.cat";
- relax.scaled_weights = relax._aux.define_srv_category (model_parameters["srv_rates"], model_parameters["srv_weights"], relax._srv_cat_name);
-
-
-
- if (do_local) {
- parameters.SetConstraint ((model_parameters["omegas"])[2], " 1/" + ((model_parameters["omegas"])[0]) + "/" + ((model_parameters["omegas"])[1]) , "");
- relax.io.define_a_bsrel_model_r = {"LB" : 1e-4, "UB" : 1};
- parameters.SetRange ((model_parameters["omegas"])[1], relax.io.define_a_bsrel_model_r);
- }
-
-
- local_k :< 50;
-
- relax.nuc = {4,3};
- for (k = 0; k < 4; k+=1) {
- for (k2 = 0; k2 < 3; k2 += 1) {
- relax.nuc [k][k2] = ((frequencies["bases"])[models.DNA.alphabet[k]])[k2];
- }
- }
-
- model_parameters["id"] = "`id`_model";
-
- if (srv_branch_site) {
- composite_weights = {};
- rc_counter = 1;
- for (k = 1; k <= 3; k +=1) {
- ((model_parameters["parameters"])["global"])[relax.define_omega_term (k)] = (model_parameters["omegas"])[k-1];
- ((model_parameters["parameters"])["global"])[relax.define_srv_term (k)] = (model_parameters["srv_rates"])[k-1];
- if (k < 3) {
- ((model_parameters["parameters"])["global"])[relax.define_weight_term (k)] = (model_parameters["f"])[k-1];
- ((model_parameters["parameters"])["global"])[relax.define_srv_weight_term (k)] = (model_parameters["srv_f"])[k-1];
- }
-
- for (srv = 1; srv <= 3; srv += 1) {
- model_parameters["Q"] + ("Q_`id`_" + rc_counter);
- if (do_local) {
- PopulateModelMatrix ((model_parameters["Q"])[rc_counter-1], relax.nuc, "t*" + relax.scaled_weights [srv-1] , "Min (1000," + (model_parameters["omegas"])[k-1] +"^local_k)", "");
- } else {
- PopulateModelMatrix ((model_parameters["Q"])[rc_counter-1], relax.nuc, "t*" + relax.scaled_weights [srv-1] , (model_parameters["omegas"])[k-1], "");
- }
- rc_counter += 1;
- composite_weights + ("(" + (model_parameters["weights"])[k-1] + ")*(" + (model_parameters["srv_weights"])[srv-1] + ")");
- }
- }
- model_parameters["length-expression"] = relax._aux.define_bsrel_model ("`id`_model", model_parameters["Q"], composite_weights, frequencies["codons"]);
- } else {
- for (k = 1; k <= 3; k +=1) {
- ((model_parameters["parameters"])["global"])[relax.define_omega_term (k)] = (model_parameters["omegas"])[k-1];
- ((model_parameters["parameters"])["global"])[relax.define_srv_term (k)] = (model_parameters["srv_rates"])[k-1];
- if (k < 3) {
- ((model_parameters["parameters"])["global"])[relax.define_weight_term (k)] = (model_parameters["f"])[k-1];
- ((model_parameters["parameters"])["global"])[relax.define_srv_weight_term (k)] = (model_parameters["srv_f"])[k-1];
- }
-
- model_parameters["Q"] + ("Q_`id`_" + k);
- if (do_local) {
- PopulateModelMatrix ((model_parameters["Q"])[k-1], relax.nuc, "t*`relax._srv_cat_name`", "Min (1000," + (model_parameters["omegas"])[k-1] +"^local_k)", "");
- } else {
- PopulateModelMatrix ((model_parameters["Q"])[k-1], relax.nuc, "t*`relax._srv_cat_name`", (model_parameters["omegas"])[k-1], "");
- }
- }
- model_parameters["length-expression"] = relax._aux.define_bsrel_model ("`id`_model", model_parameters["Q"], model_parameters["weights"], frequencies["codons"]);
- }
-
-
- ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("A","C")] = "AC";
- ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("A","T")] = "AT";
- ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("C","G")] = "CG";
- ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("C","T")] = "CT";
- ((model_parameters["parameters"])["global"])[terms.nucleotideRate ("G","T")] = "GT";
-
- model_parameters["set-branch-length"] = "relax.aux.copy_branch_length";
-
- model_parameters["length parameter"] = "t";
- ((model_parameters["parameters"])[terms.local])[terms.timeParameter ()] = "t";
- if (do_local) {
- ((model_parameters["parameters"])[terms.local])[term.RELAX.k] = "local_k";
- }
- model_parameters["get-branch-length"] = "relax.aux.retrieve_branch_length";
-
-
- return model_parameters;
-}
-
-//------------------------------------------------------------------------------
-
-
-function relax.aux.retrieve_branch_length (model, tree, node) {
- relax.aux.retrieve_branch_length.locals = Columns ((model_parameters["parameters"])[terms.local]);
- for (relax.aux.retrieve_branch_length.i = 0; relax.aux.retrieve_branch_length.i < Columns (relax.aux.retrieve_branch_length.locals); relax.aux.retrieve_branch_length.i += 1) {
- Eval (relax.aux.retrieve_branch_length.locals[relax.aux.retrieve_branch_length.i] + " = `tree`.`node`." + relax.aux.retrieve_branch_length.locals[relax.aux.retrieve_branch_length.i]);
- }
- return Eval (model["length-expression"]);
-}
-
-//------------------------------------------------------------------------------
-
-function relax.aux.copy_branch_length (model, value, parameter) {
-
- relax.aux.copy_branch_length.t = ((model["parameters"])["local"])[terms.timeParameter ()];
- relax.aux.copy_branch_length.k = ((model["parameters"])["local"])[term.RELAX.k];
-
- if (Abs (RELAX.proportional_constraint)) {
- Eval ("`parameter`.`relax.aux.copy_branch_length.t` := `RELAX.proportional_constraint` * " + value);
- } else {
- Eval ("`parameter`.`relax.aux.copy_branch_length.t` = " + value);
- }
-
- if (Type (relax.aux.copy_branch_length.k) == "String") {
- Eval ("`parameter`.`relax.aux.copy_branch_length.k` = 1");
- }
-}
-
-function relax._aux.io.countBranchSets (key, value) {
- available_models[value] += 1;
- return None;
-}
-
-function relax._aux.io.mapBranchSets (key, value) {
- /*if (Abs (value) == 0) {
- value = RELAX.unclassified;
- }*/
- (relax.tree ["model_map"])[key] = branch_set[value];
- (return_set[branch_set[value]])[key] = 1;
- return None;
-}
-
-function relax.handle.unlabeled (label) {
- if (label == "Unlabeled branches") {
- return "";
- } else {
- return label;
- }
-}
-
-
-
-//------------------------------------------------------------------------------------------------------------------------
-
-function relax.taskTimerStart (index) {
- RELAX.timers[index] = Time(1);
-}
-
-function relax.taskTimerStop (index) {
- RELAX.timers[index] = Time(1) - RELAX.timers[index];
-}
-
-//------------------------------------------------------------------------------------------------------------------------
-
-function relax.getIC (logl,params,samples) {
- return -2*logl + 2*samples/(samples-params-1)*params;
-}
-
-//------------------------------------------------------------------------------------------------------------------------
-
-lfunction relax.define_omega_term (cat) {
- return "Omega for category " + cat;
-}
-
-lfunction relax.define_weight_term (cat) {
- return "Omega frequency parameter " + cat;
-}
-
-lfunction relax.define_srv_term (cat) {
- return "Synonymous rate variation for category " + cat;
-}
-
-lfunction relax.define_srv_weight_term (cat) {
- return "Synonymous frequency parameter " + cat;
-}
-
-//------------------------------------------------------------------------------------------------------------------------
-
-function relax.json_spool (json, file) {
- USE_JSON_FOR_MATRIX = 1;
- fprintf (file, CLEAR_FILE, json);
- USE_JSON_FOR_MATRIX = 0;
-
-}
-
-//------------------------------------------------------------------------------------------------------------------------
-
-function relax.json_store_lf (json, name, ll, df, time, branch_length, branch_annotation, omega_distribution, K, annotation_tag) {
-
- (json["fits"])[name] = {"log-likelihood" : ll,
- "parameters" : df,
- "AIC-c" : relax.getIC (ll, df, relax.sample_size),
- "runtime" : time,
- "branch-lengths" : branch_length,
- "branch-annotations" : branch_annotation,
- "rate-distributions" : omega_distribution,
- "K" : K,
- "annotation-tag" : annotation_tag,
- "display-order" : Abs (json["fits"])};
-
- }
=====================================
res/TemplateBatchFiles/SelectionAnalyses/RELAX.bf
=====================================
@@ -1,5 +1,4 @@
-
-RequireVersion("2.3.3");
+RequireVersion ("2.5.21");
LoadFunctionLibrary("libv3/all-terms.bf"); // must be loaded before CF3x4
@@ -25,14 +24,14 @@ LoadFunctionLibrary("modules/selection_lib.ibf");
LoadFunctionLibrary("libv3/models/codon/BS_REL.bf");
LoadFunctionLibrary("libv3/convenience/math.bf");
LoadFunctionLibrary("libv3/tasks/mpi.bf");
+LoadFunctionLibrary("libv3/models/rate_variation.bf");
utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE);
utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE);
utility.SetEnvVariable ("USE_MEMORY_SAVING_DATA_STRUCTURES", 1e8);
-utility.SetEnvVariable ("LF_SMOOTHING_SCALER",0.05);
-
+u
/*------------------------------------------------------------------------------*/
@@ -41,7 +40,8 @@ relax.analysis_description = {
Version 2.1 adds a check for stability in K estimates to try to mitigate convergence problems.
Version 3 provides support for >2 branch sets.
Version 3.1 adds LHC + Nedler-Mead initial fit phase and keyword support.
- Version 3.1.1 adds some bug fixes for better convergence.",
+ Version 3.1.1 adds some bug fixes for better convergence.
+ Version 4.0 adds support for synonymous rate variation.",
terms.io.version : "3.1.1",
terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832",
terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group",
@@ -58,10 +58,16 @@ relax.json = { terms.json.analysis: relax.analysis_description,
relax.relaxation_parameter = "relax.K";
relax.rate_classes = 3;
+relax.synonymous_rate_classes = 3;
+relax.SRV = "Synonymous site-to-site rates";
+relax.json.srv_posteriors = "Synonymous site-posteriors";
+relax.json.srv_viterbi = "Viterbi synonymous rate path";
+
relax.initial_ranges = {};
relax.initial_grid.N = 500;
+
relax.MG94_name = terms.json.mg94xrev_sep_rates;
relax.general_descriptive_name = "General descriptive";
relax.alternative_name = "RELAX alternative";
@@ -135,6 +141,7 @@ KeywordArgument ("reference", "Branches to use as the reference set", null, "Ch
+
io.DisplayAnalysisBanner ( relax.analysis_description );
selection.io.startTimer (relax.json [terms.json.timers], "Overall", 0);
@@ -148,6 +155,11 @@ namespace relax {
KeywordArgument ("output", "Write the resulting JSON to this file (default is to save to the same path as the alignment file + 'RELAX.json')", relax.codon_data_info [terms.json.json]);
relax.codon_data_info [terms.json.json] = io.PromptUserForFilePath ("Save the resulting JSON file to");
+KeywordArgument ("grid-size", "The number of points in the initial distributional guess for likelihood fitting", 250);
+relax.initial_grid.N = io.PromptUser ("The number of points in the initial distributional guess for likelihood fitting", 250, 1, 10000, TRUE);
+
+KeywordArgument ("starting-points", "The number of initial random guesses to seed rate values optimization", 1);
+relax.N.initial_guesses = io.PromptUser ("The number of initial random guesses to 'seed' rate values optimization", 1, 1, relax.initial_grid.N$10, TRUE);
io.ReportProgressMessageMD('RELAX', 'selector', 'Branch sets for RELAX analysis');
@@ -211,6 +223,43 @@ relax.model_set = io.SelectAnOption ({
{"All", "[Default] Fit descriptive models AND run the relax test (4 models)"}
{"Minimal", "Run only the RELAX test (2 models)"}
}, "RELAX analysis type");
+
+KeywordArgument ("srv", "Include synonymous rate variation", "No");
+
+relax.do_srv = io.SelectAnOption (
+ {
+ "Yes" : "Allow synonymous substitution rates to vary from site to site (but not from branch to branch)",
+ "Branch-site" : "Allow synonymous substitution rates to vary using general branch site models",
+ "HMM" : "Allow synonymous substitution rates to vary from site to site (but not from branch to branch), and use a hidden markov model for spatial correlation on synonymous rates",
+ "No" : "Synonymous substitution rates are constant across sites. This is the 'classic' behavior, i.e. the original published test"
+ },
+ "Synonymous rate variation"
+);
+
+if (relax.do_srv == "Branch-site") {
+ relax.do_bs_srv = TRUE;
+ relax.do_srv = TRUE;
+ (relax.json[terms.json.analysis])[terms.settings] = "Branch-site synonymous rate variation";
+} else {
+ if (relax.do_srv == "Yes") {
+ relax.do_bs_srv = FALSE;
+ relax.do_srv = TRUE;
+ relax.do_srv_hmm = FALSE;
+ } else {
+ if (relax.do_srv == "HMM") {
+ relax.do_srv = TRUE;
+ relax.do_bs_srv = FALSE;
+ relax.do_srv_hmm = TRUE;
+ } else {
+ relax.do_srv = FALSE;
+ }
+ }
+}
+
+if (relax.do_srv) {
+ KeywordArgument ("syn-rates", "The number alpha rate classes to include in the model [1-10, default 3]", relax.synonymous_rate_classes);
+ relax.synonymous_rate_classes = io.PromptUser ("The number omega rate classes to include in the model", relax.synonymous_rate_classes, 1, 10, TRUE);
+}
selection.io.startTimer (relax.json [terms.json.timers], "Preliminary model fitting", 1);
@@ -245,8 +294,6 @@ utility.ForEach (relax.global_dnds, "_value_", '
');
-
-
//Store MG94 to JSON
selection.io.json_store_lf_withEFV (relax.json,
relax.MG94_name,
@@ -264,10 +311,41 @@ utility.ForEachPair (relax.filter_specification, "_key_", "_value_",
+relax.model_generator = "models.codon.BS_REL.ModelDescription";
+
+if (relax.do_srv) {
+ if (relax.do_bs_srv) {
+ relax.model_generator = "relax.model.with.GDD";
+ relax.model_generator = "models.codon.BS_REL_SRV.ModelDescription";
+ relax.rate_class_arguments = {{relax.synonymous_rate_classes__,relax.rate_classes__}};
+ } else {
+
+ lfunction relax.model.with.GDD (type, code, rates) {
+ def = models.codon.BS_REL.ModelDescription (type, code, rates);
+ options = {utility.getGlobalValue("terms.rate_variation.bins") : utility.getGlobalValue("relax.synonymous_rate_classes"),
+ utility.getGlobalValue("terms._namespace") : "relax._shared_srv"};
+
+ if (utility.getGlobalValue("relax.do_srv_hmm")) {
+ options [utility.getGlobalValue ("terms.rate_variation.HMM")] = "equal";
+ }
+ def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.GDD.factory (options);
+ return def;
+ }
+
+ relax.model_generator = "relax.model.with.GDD";
+ relax.rate_class_arguments = relax.rate_classes;
+ }
+} else {
+ relax.rate_class_arguments = relax.rate_classes;
+}
selection.io.stopTimer (relax.json [terms.json.timers], "Preliminary model fitting");
parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50);
+PARAMETER_GROUPING = {};
+PARAMETER_GROUPING + relax.distribution["rates"];
+PARAMETER_GROUPING + relax.distribution["weights"];
+
if (relax.model_set == "All") { // run all the models
relax.ge_guess = None;
@@ -307,9 +385,6 @@ if (relax.model_set == "All") { // run all the models
io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model");
selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2);
- PARAMETER_GROUPING = {};
- PARAMETER_GROUPING + relax.distribution["rates"];
- PARAMETER_GROUPING + relax.distribution["weights"];
@@ -329,6 +404,7 @@ if (relax.model_set == "All") { // run all the models
parameters.DeclareGlobalWithRanges ("relax.bl.scaler", 1, 0, 1000);
+
relax.grid_search.results = estimators.FitLF (relax.filter_names, relax.trees,{ "0" : {"DEFAULT" : "relax.ge"}},
relax.final_partitioned_mg_results,
@@ -346,7 +422,8 @@ if (relax.model_set == "All") { // run all the models
"OPTIMIZATION_PRECISION" : relax.nm.precision
} ,
- terms.search_grid : relax.initial_grid
+ terms.search_grid : relax.initial_grid,
+ terms.search_restarts : relax.N.initial_guesses
}
);
@@ -406,7 +483,6 @@ if (relax.model_set == "All") { // run all the models
relax.ge_guess[relax.i][0] = relax.inferred_ge_distribution[relax.i + relax.shift][0];
relax.ge_guess[relax.i][1] = relax.inferred_ge_distribution[relax.i + relax.shift][1];
}
- //console.log (relax.ge_guess);
continue;
}
}
@@ -476,6 +552,7 @@ if (relax.numbers_of_tested_groups == 2) {
relax.model_to_relax_parameter ["relax.test"] = relax.relaxation_parameter;
relax.model_namespaces = {{"relax.reference","relax.test"}};
relax.model_to_group_name = {"relax.reference" : relax.reference_branches_name, "relax.test" : relax.test_branches_name};
+ relax.model_for_srv = "relax.test";
} else {
@@ -489,24 +566,24 @@ if (relax.numbers_of_tested_groups == 2) {
relax.model_namespaces[_value_] = relax.k;
");
relax.reference_model_namespace = relax.model_namespaces[0];
- //console.log (relax.model_namespaces);
- //console.log (relax.reference_model_namespace);
+ relax.model_for_srv = relax.model_namespaces[0];
}
+
+
//console.log (relax.model_namespaces);
//explicit loop to avoid re-entrance errors
relax.relax_parameter_terms = {};
-
-relax.bound_weights = {};
+relax.bound_weights = {};
for (relax.k = 0; relax.k < relax.numbers_of_tested_groups; relax.k += 1) {
relax.model_nmsp = relax.model_namespaces[relax.k ];
- relax.model_object_map[relax.model_nmsp] = model.generic.DefineMixtureModel("models.codon.BS_REL.ModelDescription",
+ relax.model_object_map[relax.model_nmsp] = model.generic.DefineMixtureModel(relax.model_generator,
relax.model_nmsp, {
"0": parameters.Quote(terms.global),
"1": relax.codon_data_info[terms.code],
- "2": parameters.Quote (relax.rate_classes) // the number of rate classes
+ "2": parameters.Quote (relax.rate_class_arguments) // the number of rate classes
},
relax.filter_names,
None);
@@ -525,13 +602,35 @@ for (relax.k = 0; relax.k < relax.numbers_of_tested_groups; relax.k += 1) {
} else {
model.generic.AddGlobal (relax.model_object_map[relax.model_nmsp], relax.model_to_relax_parameter [relax.model_nmsp], terms.relax.k);
}
- relax.bound_weights * models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.mixture.mixture_aux_weight + ".+");
+ relax.bound_weights * models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.mixture.mixture_aux_weight + " for category");
models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.model_object_map[relax.model_nmsp]}, terms.nucleotideRate("[ACGT]","[ACGT]"));
for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) {
parameters.SetConstraint (model.generic.GetGlobalParameter (relax.model_object_map[relax.model_nmsp] , terms.AddCategory (terms.parameters.omega_ratio,relax.i)),
model.generic.GetGlobalParameter (relax.model_object_map[relax.reference_model_namespace] , terms.AddCategory (terms.parameters.omega_ratio,relax.i)) + "^" + relax.model_to_relax_parameter [relax.model_nmsp],
terms.global);
}
+ if (relax.do_bs_srv) {
+ for (description,var_id; in; ((relax.model_object_map[relax.reference_model_namespace])[terms.parameters])[terms.global]) {
+
+ if (None != regexp.Find (description, terms.parameters.synonymous_rate + " for category")) {
+ var_id_2 = utility.GetByKey (((relax.model_object_map[relax.model_nmsp])[terms.parameters])[terms.global], description, "String");
+ if (None != var_id_2) {
+ parameters.SetConstraint (var_id, var_id_2, terms.global);
+ }
+ }
+
+ if (None != regexp.Find (description, terms.mixture.mixture_aux_weight + " for category SRV ")) {
+ var_id_2 = utility.GetByKey (((relax.model_object_map[relax.model_nmsp])[terms.parameters])[terms.global], description, "String");
+ if (None != var_id_2) {
+ if (parameters.IsIndependent (var_id_2)) {
+ parameters.SetConstraint (var_id, var_id_2, terms.global);
+ }
+ }
+ }
+ }
+
+
+ }
}
}
@@ -541,6 +640,43 @@ relax.model_map = utility.Map (relax.model_to_group_name, "_groupid_", 'utility.
relax.do_lhc = FALSE;
+if (relax.do_srv) {
+
+ if (relax.do_bs_srv) {
+ relax.srv_rate_regex = "Mean scaler variable for";
+ relax.srv_weight_regex = terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), "SRV [0-9]+");
+
+ relax.srv_rate_reporting = regexp.PartitionByRegularExpressions(utility.Keys (((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global]),
+ {"0" : "^" + utility.getGlobalValue('terms.parameters.synonymous_rate'),
+ "1" : relax.srv_weight_regex});
+
+
+ relax.srv_rate_reporting = {
+ 'rates' : utility.UniqueValues (utility.Map ( relax.srv_rate_reporting ["^" + utility.getGlobalValue('terms.parameters.synonymous_rate') ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')),
+ 'weights' : utility.UniqueValues (utility.Map (relax.srv_rate_reporting [relax.srv_weight_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]'))
+ };
+
+
+
+ } else {
+ relax.srv_rate_regex = "GDD rate category [0-9]+";
+ relax.srv_weight_regex = "Mixture auxiliary weight for GDD category [0-9]+";
+ }
+ relax.srv_distribution = regexp.PartitionByRegularExpressions(utility.Keys (((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global]), {"0" : relax.srv_rate_regex, "1" : relax.srv_weight_regex});
+
+
+ relax.srv_distribution = {
+ 'rates' : utility.UniqueValues (utility.Map (relax.srv_distribution [relax.srv_rate_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]')),
+ 'weights' : utility.UniqueValues (utility.Map (relax.srv_distribution [relax.srv_weight_regex ] , "_value_", '(((relax.model_object_map[relax.model_for_srv])[terms.parameters])[terms.global])[_value_]'))
+ };
+
+
+ PARAMETER_GROUPING + relax.srv_distribution["rates"];
+ PARAMETER_GROUPING + relax.srv_distribution["weights"];
+
+ relax.init_grid_setup (relax.srv_distribution);
+
+}
if (relax.model_set != "All") {
/*
@@ -555,11 +691,11 @@ if (relax.model_set != "All") {
}
if (relax.has_unclassified) {
- relax.unclassified.bsrel_model = model.generic.DefineMixtureModel("models.codon.BS_REL.ModelDescription",
+ relax.unclassified.bsrel_model = model.generic.DefineMixtureModel(relax.model_generator,
"relax.unclassified", {
"0": parameters.Quote(terms.global),
"1": relax.codon_data_info[terms.code],
- "2": parameters.Quote (relax.rate_classes) // the number of rate classes
+ "2": parameters.Quote (relax.rate_class_arguments) // the number of rate classes
},
relax.filter_names,
None);
@@ -580,51 +716,115 @@ if (relax.has_unclassified) {
models.BindGlobalParameters ({"0" : relax.model_object_map[relax.reference_model_namespace], "1" : relax.unclassified.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]"));
}
+
+
//------------------------------------
function relax.report_multi_class_rates (model_fit, distributions) {
- io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **" + relax.model_to_group_name[relax.reference_model_namespace] + "** (reference) branches");
- relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.reference_model_namespace])) % 0;
- selection.io.report_dnds (relax.inferred_distribution_ref);
+ io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **" + relax.model_to_group_name[relax.reference_model_namespace] + "** (reference) branches");
+ relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.reference_model_namespace])) % 0;
+ selection.io.report_dnds (relax.inferred_distribution_ref);
- relax.distribution_for_json = {};
- relax.distribution_for_json [relax.model_to_group_name[relax.reference_model_namespace]] = utility.Map (utility.Range (relax.rate_classes, 0, 1),
- "_index_",
- "{terms.json.omega_ratio : relax.inferred_distribution_ref [_index_][0],
- terms.json.proportion : relax.inferred_distribution_ref [_index_][1]}");
+ relax.distribution_for_json = {};
+ relax.distribution_for_json [relax.model_to_group_name[relax.reference_model_namespace]] = utility.Map (utility.Range (relax.rate_classes, 0, 1),
+ "_index_",
+ "{terms.json.omega_ratio : relax.inferred_distribution_ref [_index_][0],
+ terms.json.proportion : relax.inferred_distribution_ref [_index_][1]}");
- if (None != model_fit || distributions) {
-
- if (None != model_fit) {
- relax.fitted.K = {};
- relax.fitted.K [relax.model_to_group_name[relax.reference_model_namespace]] = 1;
- }
-
+ if (None != model_fit || distributions) {
- for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) {
- relax.model_nmsp = relax.model_namespaces[relax.k ];
- relax.branch_set = relax.model_to_group_name[relax.model_nmsp];
- if (None != model_fit) {
- if (relax.k > 1) {
- relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,relax.relax_parameter_terms[relax.k]);
- } else {
- relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,terms.relax.k);
- }
- relax.fitted.K [relax.model_to_group_name[relax.model_nmsp]] = relax.fitted.K_group ;
-
- io.ReportProgressMessageMD("RELAX", "alt", "* Relaxation/intensification parameter (K) for branch set `relax.branch_set` = " + Format(relax.fitted.K_group ,8,2));
- }
- io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **`relax.branch_set`** branches");
- relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.model_nmsp])) % 0;
- selection.io.report_dnds (relax.inferred_distribution);
- relax.distribution_for_json [relax.model_to_group_name[relax.model_nmsp]] = utility.Map (utility.Range (relax.rate_classes, 0, 1),
- "_index_",
- "{terms.json.omega_ratio : relax.inferred_distribution [_index_][0],
- terms.json.proportion : relax.inferred_distribution [_index_][1]}");
- }
- }
+ if (None != model_fit) {
+ relax.fitted.K = {};
+ relax.fitted.K [relax.model_to_group_name[relax.reference_model_namespace]] = 1;
+ }
+
+
+ for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) {
+ relax.model_nmsp = relax.model_namespaces[relax.k ];
+ relax.branch_set = relax.model_to_group_name[relax.model_nmsp];
+ if (None != model_fit) {
+ if (relax.k > 1) {
+ relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,relax.relax_parameter_terms[relax.k]);
+ } else {
+ relax.fitted.K_group = estimators.GetGlobalMLE (model_fit,terms.relax.k);
+ }
+ relax.fitted.K [relax.model_to_group_name[relax.model_nmsp]] = relax.fitted.K_group ;
+
+ io.ReportProgressMessageMD("RELAX", "alt", "* Relaxation/intensification parameter (K) for branch set `relax.branch_set` = " + Format(relax.fitted.K_group ,8,2));
+ }
+ io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **`relax.branch_set`** branches");
+ relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.model_object_map[relax.model_nmsp])) % 0;
+ selection.io.report_dnds (relax.inferred_distribution);
+ relax.distribution_for_json [relax.model_to_group_name[relax.model_nmsp]] = utility.Map (utility.Range (relax.rate_classes, 0, 1),
+ "_index_",
+ "{terms.json.omega_ratio : relax.inferred_distribution [_index_][0],
+ terms.json.proportion : relax.inferred_distribution [_index_][1]}");
+ }
+
+
+
+ }
+}
+
+//------------------------------------
+
+function relax._report_srv (relax_model_fit, is_null) {
+
+ if (relax.do_srv_hmm) {
+ relax.hmm_lambda = selection.io.extract_global_MLE (relax_model_fit, terms.rate_variation.hmm_lambda);
+ if (is_null) {
+ io.ReportProgressMessageMD("relax", "main", "* HMM switching rate = " + Format (relax.hmm_lambda, 8, 3));
+ relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda;
+ } else {
+ relax.hmm_lambda.CI = parameters.GetProfileCI(((relax_model_fit[terms.global])[terms.rate_variation.hmm_lambda])[terms.id],
+ relax_model_fit[terms.likelihood_function], 0.95);
+
+ io.ReportProgressMessageMD ("relax", "main", "* HMM switching rate = " + Format (relax.hmm_lambda,8,4) +
+ " (95% profile CI " + Format ((relax.hmm_lambda.CI )[terms.lower_bound],8,4) + "-" + Format ((relax.hmm_lambda.CI )[terms.upper_bound],8,4) + ")");
+
+ relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda.CI;
+
+ }
+
+ }
+
+ if (relax.do_srv) {
+ if (relax.do_bs_srv) {
+ relax.srv_info = parameters.GetStickBreakingDistribution ( relax.srv_rate_reporting) % 0;
+ } else {
+ relax.srv_info = Transpose((rate_variation.extract_category_information((relax.model_object_map[relax.model_for_srv])))["VALUEINDEXORDER"][0])%0;
+ }
+ io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution for site-to-site **synonymous** rate variation was inferred");
+ selection.io.report_distribution (relax.srv_info);
+
+ relax.distribution_for_json [relax.SRV] = (utility.Map (utility.Range (relax.synonymous_rate_classes, 0, 1),
+ "_index_",
+ "{terms.json.rate :relax.srv_info [_index_][0],
+ terms.json.proportion : relax.srv_info [_index_][1]}"));
+
+ if (is_null == FALSE) {
+ ConstructCategoryMatrix (relax.cmx, ^(relax_model_fit[terms.likelihood_function]));
+ ConstructCategoryMatrix (relax.cmx_weights, ^(relax_model_fit[terms.likelihood_function]), WEIGHTS);
+ relax.cmx_weighted = (relax.cmx_weights[-1][0]) $ relax.cmx; // taking the 1st column fixes a bug with multiple partitions
+ relax.column_weights = {1, Rows (relax.cmx_weights)}["1"] * relax.cmx_weighted;
+ relax.column_weights = relax.column_weights["1/_MATRIX_ELEMENT_VALUE_"];
+ (relax.json [relax.json.srv_posteriors]) = relax.cmx_weighted $ relax.column_weights;
+ }
+
+ if (relax.do_srv_hmm && is_null == FALSE ) {
+ ConstructCategoryMatrix (relax.cmx_viterbi, ^(relax_model_fit[terms.likelihood_function]), SHORT);
+ (relax.json [relax.json.srv_viterbi]) = relax.cmx_viterbi;
+ io.ReportProgressMessageMD("relax", "main", "* The following switch points for synonymous rates were inferred");
+ selection.io.report_viterbi_path (relax.cmx_viterbi);
+
+ }
+ }
+
+ if (relax.do_srv_hmm) {
+ relax.distribution_for_json [terms.rate_variation.hmm_lambda] = relax.hmm_lambda;
+ }
}
//------------------------------------
@@ -636,6 +836,7 @@ function relax.FitMainTestPair () {
relax.nm.precision = -0.00025*relax.final_partitioned_mg_results[terms.fit.log_likelihood];
parameters.DeclareGlobalWithRanges ("relax.bl.scaler", 1, 0, 1000);
+
relax.general_descriptive.fit = estimators.FitLF (relax.filter_names, relax.trees,{ "0" : relax.model_map},
relax.general_descriptive.fit,
relax.model_object_map,
@@ -651,16 +852,15 @@ function relax.FitMainTestPair () {
"OPTIMIZATION_PRECISION" : relax.nm.precision
} ,
- terms.search_grid : relax.initial_grid
+ terms.search_grid : relax.initial_grid,
+ terms.search_restarts : relax.N.initial_guesses
}
);
}
-
-
+
relax.alternative_model.fit = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map}, relax.general_descriptive.fit, relax.model_object_map, {terms.run_options.retain_lf_object: TRUE});
- //io.SpoolLF(relax.alternative_model.fit["LF"], "/tmp/relax", "alt");
io.ReportProgressMessageMD("RELAX", "alt", "* " + selection.io.report_fit (relax.alternative_model.fit, 9, relax.codon_data_info[terms.data.sample_size]));
@@ -753,7 +953,11 @@ function relax.FitMainTestPair () {
relax.report_multi_class_rates (relax.alternative_model.fit, TRUE);
}
-
+ relax._report_srv (relax.alternative_model.fit, FALSE);
+
+ KeywordArgument ("save-fit", "Save RELAX alternative model fit to this file (default is not to save)", "/dev/null");
+ io.SpoolLFToPath(relax.alternative_model.fit[terms.likelihood_function], io.PromptUserForFilePath ("Save RELAX model fit to this file ['/dev/null' to skip]"));
+
selection.io.json_store_lf (relax.json,
relax.alternative_name,
relax.alternative_model.fit[terms.fit.log_likelihood],
@@ -774,6 +978,8 @@ function relax.FitMainTestPair () {
selection.io.startTimer (relax.json [terms.json.timers], "RELAX null model fitting", 4);
io.ReportProgressMessageMD ("RELAX", "null", "Fitting the null (K := 1) model");
+
+
for (relax.k = 1; relax.k < relax.numbers_of_tested_groups; relax.k += 1) {
relax.model_nmsp = relax.model_namespaces[relax.k ];
@@ -785,7 +991,6 @@ function relax.FitMainTestPair () {
}
-
relax.null_model.fit = estimators.FitExistingLF (relax.alternative_model.fit[terms.likelihood_function], relax.model_object_map);
io.ReportProgressMessageMD ("RELAX", "null", "* " + selection.io.report_fit (relax.null_model.fit, 9, relax.codon_data_info[terms.data.sample_size]));
relax.LRT = math.DoLRT (relax.null_model.fit[terms.fit.log_likelihood], relax.alternative_model.fit[terms.fit.log_likelihood], relax.numbers_of_tested_groups-1);
@@ -804,6 +1009,8 @@ function relax.FitMainTestPair () {
} else {
relax.report_multi_class_rates (None, FALSE);
}
+
+ relax._report_srv (relax.null_model.fit, TRUE);
selection.io.json_store_lf (relax.json,
relax.null_name ,
=====================================
res/TemplateBatchFiles/SelectionAnalyses/modules/shared-load-file.bf
=====================================
@@ -297,7 +297,6 @@ function doGTR (prefix) {
trees,
gtr_results);
-
KeywordArgument ("kill-zero-lengths", "Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise)", "Yes");
kill0 = io.SelectAnOption (
@@ -390,7 +389,7 @@ function doGTR (prefix) {
/**
- * @name doPartitionMG
+ * @name doPartitionedMG
* Can only be used after including shared-load-file
* @return
*/
@@ -414,6 +413,8 @@ function doPartitionedMG (prefix, keep_lf) {
utility.getGlobalValue("terms.run_options.partitioned_omega"): selected_branches,
utility.getGlobalValue("terms.run_options.retain_lf_object"): keep_lf
}, gtr_results);
+
+
io.ReportProgressMessageMD("`prefix`", "codon-fit", "* " + selection.io.report_fit (partitioned_mg_results, 0, (^"`prefix`.codon_data_info")[utility.getGlobalValue ("terms.data.sample_size")]));
=====================================
res/TemplateBatchFiles/libv3/all-terms.bf
=====================================
@@ -375,7 +375,7 @@ namespace terms{
constrain_branch_length = "constrain-branch-length";
branch_length_string = "branch-length-string";
branch_length_string_conditional = "branch-length-string-conditional";
- branch_length_string_expr = = "branch-length-string-expression";
+ branch_length_string_expr = "branch-length-string-expression";
branch_length_scaler = "branch length scaler";
post_definition = "post-definition";
length = "length";
=====================================
res/TemplateBatchFiles/libv3/models/codon/BS_REL.bf
=====================================
@@ -164,6 +164,11 @@ lfunction models.codon.BS_REL.ExtractMixtureDistribution (bs_rel) {
lfunction models.codon.BS_REL.ExtractMixtureDistributionFromFit (bs_rel, fit) {
count = bs_rel [utility.getGlobalValue ("terms.model.components")];
+
+ if (Type (count) == "Matrix") {
+ count = count[1];
+ }
+
rates = {count, 1};
weights = {count-1, 1};
@@ -177,6 +182,34 @@ lfunction models.codon.BS_REL.ExtractMixtureDistributionFromFit (bs_rel, fit) {
return {utility.getGlobalValue ("terms.parameters.rates") : rates, utility.getGlobalValue ("terms.parameters.weights") : weights };
}
+/**
+ * @name models.codon.BS_REL.ExtractSynMixtureDistributionFromFit
+ * @param {Dict} bs_rel
+ * @returns {Dict} mixture distribution parameters
+ */
+
+lfunction models.codon.BS_REL.ExtractSynMixtureDistributionFromFit (bs_rel, fit) {
+ count = bs_rel [utility.getGlobalValue ("terms.model.components")];
+
+ if (Type (count) == "Matrix") {
+ count = count[0];
+ } else {
+ assert (0, "models.codon.BS_REL.ExtractSynMixtureDistributionFromFit called for a model without a bivariate component" );
+ }
+
+ rates = {count, 1};
+ weights = {count-1, 1};
+
+ for (i = 1; i <= count; i+=1) {
+ rates [i-1] = estimators.GetGlobalMLE (fit, terms.AddCategory (utility.getGlobalValue ("terms.parameters.synonymous_rate"), i));
+ if (i < count ) {
+ weights [i-1] = estimators.GetGlobalMLE (fit, terms.AddCategory (utility.getGlobalValue ("terms.mixture.mixture_aux_weight"), "SRV " + i));
+ }
+ }
+
+ return {utility.getGlobalValue ("terms.parameters.rates") : rates, utility.getGlobalValue ("terms.parameters.weights") : weights };
+}
+
/**
* @name models.codon.BS_REL.BS_REL._DefineQ
* @param {Dict} mg_rev
@@ -359,6 +392,7 @@ function models.codon.BS_REL.post_definition(model) {
*/
function models.codon.BS_REL.get_branch_length(model, tree, node) {
+
parameters.SetLocalModelParameters (model, tree, node);
parameters.SetCategoryVariables (model);
bl = utility.GetEnvVariable ("BRANCH_LENGTH_STENCIL");
@@ -369,7 +403,7 @@ function models.codon.BS_REL.get_branch_length(model, tree, node) {
bl = Eval ((model [utility.getGlobalValue("terms.model.branch_length_string_conditional")])[bl]);
} else {
if (model / terms.model.branch_length_string_expr) {
- bl = Eval (model [terms.model.branch_length_string_exp]);
+ bl = Eval (model [terms.model.branch_length_string_expr]);
//console.log (bl);
//console.log (Eval (model [utility.getGlobalValue("terms.model.branch_length_string")]));
} else {
=====================================
res/TemplateBatchFiles/libv3/models/codon/MG_REV.bf
=====================================
@@ -119,6 +119,7 @@ function models.codon.MG_REV._DefineQ(mg_rev, namespace) {
*/
function models.codon.MG_REV.set_branch_length(model, value, parameter) {
+
if (model[terms.model.type] == terms.global) {
// boost numeric branch length values by a factor of 3
@@ -162,17 +163,18 @@ function models.codon.MG_REV.set_branch_length(model, value, parameter) {
}
//fprintf (stdout, models.codon.MG_REV.set_branch_length.alpha.p, "\n");
+
+
parameters.SetConstraint(models.codon.MG_REV.set_branch_length.beta.p, "(" + 3 * value[terms.branch_length] + " - " + models.codon.MG_REV.set_branch_length.alpha.p + "*(" + model[terms.parameters.synonymous_rate] + "))/(" + model[terms.parameters.nonsynonymous_rate] + ")", "");
return 1;
} else {
- assert(0, "TBA in models.codon.MG_REV.set_branch_length");
+ assert(0, "TBD in models.codon.MG_REV.set_branch_length");
}
} else {
models.codon.MG_REV.set_branch_length.lp = 0;
-
-
+
if (parameters.IsIndependent(models.codon.MG_REV.set_branch_length.alpha.p)) {
Eval(models.codon.MG_REV.set_branch_length.alpha.p + ":=(" + value[terms.model.branch_length_scaler] + ")*" + value[terms.branch_length]);
models.codon.MG_REV.set_branch_length.lp += 1;
=====================================
res/TemplateBatchFiles/libv3/models/model_functions.bf
=====================================
@@ -558,6 +558,7 @@ lfunction models.BindGlobalParameters (models, filter) {
candidate_set = utility.UniqueValues(utility.Filter (utility.Keys (reference_set), "_key_",
"regexp.Find (_key_,`&filter`)"
));
+
constraints_set = {};
@@ -569,7 +570,7 @@ lfunction models.BindGlobalParameters (models, filter) {
if (parameters.IsIndependent (`¶meter_set`[_p_])) {
parameters.SetConstraint (`¶meter_set`[_p_], `&reference_set`[_p_], '');
`&constraints_set` [`¶meter_set`[_p_]] = `&reference_set`[_p_];
- }
+ }
}"
);
}
=====================================
res/TemplateBatchFiles/libv3/models/parameters.bf
=====================================
@@ -202,10 +202,12 @@ function parameters.SetLocalValue(tree, branch, id, value) {
function parameters.SetValues(set) {
if (Type (set) == "AssociativeList") {
- utility.ForEachPair (set, "_key_", "_value_",
- '
- parameters.SetValue (_value_[terms.id], _value_[terms.fit.MLE]);
- ');
+ for (_key_, _value_; in; set) {
+ if (parameters.IsIndependent (_value_[terms.id])) {
+ parameters.SetValue (_value_[terms.id], _value_[terms.fit.MLE]);
+ }
+ }
+
}
}
@@ -231,10 +233,8 @@ lfunction parameters.ConstrainMeanOfSet (set, weights, mean, namespace) {
return;
}
}
-
-
- scaler_variables = {};
+ scaler_variables = {};
utility.ForEach (unscaled, "_name_", 'parameters.DeclareGlobal (_name_, null)');
global_scaler = namespace + ".scaler_variable";
parameters.SetConstraint (global_scaler, Join ("+", constraint), "global");
@@ -492,18 +492,21 @@ function parameters.SetRange(id, ranges) {
}
+
/**
* Check if parameter is independent
* @name parameters.IsIndependent
* @param parameter - id of parameter to check
* @returns {Bool} TRUE if independent, FALSE otherwise
*/
-lfunction parameters.IsIndependent(parameter) {
-
- //console.log(parameter);
+function parameters.IsIndependent(parameter) {
- GetString(info, ^ parameter, -1);
-
+ SetParameter(HBL_EXECUTION_ERROR_HANDLING,1,0);
+ GetString(info, ^parameter, -1);
+ SetParameter(HBL_EXECUTION_ERROR_HANDLING,0,0);
+ if (Abs (LAST_HBL_EXECUTION_ERROR)) { // variable does not exist
+ return TRUE;
+ }
if (Type(info) == "AssociativeList") {
return (utility.CheckKey(info, "Local", "Matrix") && utility.CheckKey(info, "Global", "Matrix")) == FALSE;
}
@@ -639,7 +642,6 @@ lfunction parameters.SetStickBreakingDistribution (parameters, values) {
left_over = 1;
for (i = 0; i < rate_count; i += 1) {
-
parameters.SetValue ((parameters["rates"])[i], values[i][0]);
if (i < rate_count - 1) {
break_here = values[i][1] / left_over;
=====================================
res/TemplateBatchFiles/libv3/models/rate_variation.bf
=====================================
@@ -38,7 +38,7 @@ function rate_variation.modifier_everything (q_ij, from, to, namespace, cat_name
lfunction rate_variation_define_HMM (categories, namespace, globals, definition) {
lambda = parameters.ApplyNameSpace ("hmm_lambda", namespace);
- parameters.DeclareGlobalWithRanges (lambda, .1/categories, 0, 1/(categories-1));
+ parameters.DeclareGlobalWithRanges (lambda, .1/categories, 1e-6, 1/(categories));
globals[utility.getGlobalValue("terms.rate_variation.hmm_lambda")] = lambda;
hmm_T = parameters.ApplyNameSpace ("hmm_transition_matrix", namespace);
=====================================
res/TemplateBatchFiles/libv3/tasks/estimators.bf
=====================================
@@ -123,6 +123,7 @@ lfunction estimators.GetGlobalMLE_RegExp(results, re) {
* @returns nothing
*/
function estimators.copyGlobals2(key2, value2) {
+
if (Type((estimators.ExtractMLEs.results[terms.global])[key2]) == "AssociativeList") {
key2 = "[`key`] `key2`";
// this parameter has already been defined, need to prefix with model name
@@ -163,26 +164,38 @@ function estimators.CopyFrequencies(model_name, model_decription) {
function estimators.SetGlobals2(key2, value) {
+
if (Type(estimators.ApplyExistingEstimates.set_globals[key2]) == "AssociativeList") {
key3 = "[`key`] `key2`";
} else {
key3 = key2;
}
+
+
+
+
estimators.ApplyExistingEstimates.set_globals[key3] = {
terms.id: key3,
terms.fit.MLE: value
};
- __init_value = (initial_values[terms.global])[key2];
+ __init_value = (initial_values[terms.global])[key3];
+
+ if (Type(__init_value) != "AssociativeList") {
+ __init_value = (initial_values[terms.global])[key2];
+ }
+
+
if (Type(__init_value) == "AssociativeList") {
if (__init_value[terms.fix]) {
estimators.ApplyExistingEstimates.df_correction += parameters.IsIndependent(value);
ExecuteCommands("`value` := " + __init_value[terms.fit.MLE]);
+ //_did_set [value] = 1;
} else {
if (parameters.IsIndependent (value)) {
- //fprintf (stdout, "Setting `value` to " + __init_value[terms.fit.MLE] + "\n", parameters.IsIndependent (value), "\n");
ExecuteCommands("`value` = " + __init_value[terms.fit.MLE]);
+ //_did_set [value] = 1;
} else {
messages.log (value + " was already constrained in estimators.SetGlobals2");
}
@@ -197,7 +210,14 @@ function estimators.SetGlobals2(key2, value) {
* @returns nothing
*/
function estimators.SetGlobals(key, value) {
+ /*_did_set = {};
+ for (i,v; in; ((value[terms.parameters])[terms.global])) {
+ _did_set [v] = 0;
+ }*/
+
((value[terms.parameters])[terms.global])["estimators.SetGlobals2"][""];
+
+ //console.log (_did_set);
}
/**
@@ -437,6 +457,7 @@ lfunction estimators.TraverseLocalParameters (likelihood_function_id, model_desc
* @returns number of constrained parameters;
*/
function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions, initial_values, _application_type, keep_track_of_proportional_scalers) {
+
estimators.ApplyExistingEstimatesToTree.constraint_count = 0;
@@ -467,13 +488,15 @@ function estimators.ApplyExistingEstimatesToTree (_tree_name, model_descriptions
}
}
}
-
+
estimators.ApplyExistingEstimatesToTree.constraint_count += estimators.applyBranchLength(_tree_name, _branch_name, model_descriptions[estimators.ApplyExistingEstimatesToTree.map[_branch_name]], _set_branch_length_to);
} else {
if (Type(_existing_estimate) != "Unknown") {
warning.log ("Incorrect type for the initial values object of for branch '" + _branch_name + "' : " + _existing_estimate);
}
}
+ } else {
+ //warning.log ("No initial branch length object of for branch '" + _branch_name);
}
}
@@ -532,7 +555,7 @@ function estimators.ApplyExistingEstimates(likelihood_function_id, model_descrip
if (Type (branch_length_conditions) == "AssociativeList") {
if (Abs(branch_length_conditions) > estimators.ApplyExistingEstimates.i) {
_application_type = branch_length_conditions[estimators.ApplyExistingEstimates.i];
- }
+ }
}
estimators.ApplyExistingEstimates.df_correction += estimators.ApplyExistingEstimatesToTree ((estimators.ApplyExistingEstimates.lfInfo[terms.fit.trees])[estimators.ApplyExistingEstimates.i],
@@ -710,15 +733,15 @@ lfunction estimators.FitLF(data_filter, tree, model_map, initial_values, model_o
can_do_restarts = null;
- /*
+
- Export (lfe, likelihoodFunction);
- console.log (lfe);
- GetString (lfe, likelihoodFunction, -1);
- console.log (lfe);
- fprintf ("/Users/sergei/Desktop/busted.txt", CLEAR_FILE, lfe);
- utility.ToggleEnvVariable("VERBOSITY_LEVEL", 1);
- */
+ //Export (lfe, likelihoodFunction);
+ //console.log (lfe);
+ //GetString (lfe, likelihoodFunction, -1);
+ //console.log (lfe);
+ //fprintf ("/Users/sergei/Desktop/busted.txt", CLEAR_FILE, lfe);
+ //utility.ToggleEnvVariable("VERBOSITY_LEVEL", 10);
+
@@ -890,7 +913,6 @@ lfunction estimators.FitSingleModel_Ext (data_filter, tree, model_template, init
Optimize (mles, likelihoodFunction);
}
-
if (Type(initial_values) == "AssociativeList") {
utility.ToggleEnvVariable("USE_LAST_RESULTS", None);
}
@@ -1094,17 +1116,14 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op
component_tree = lf_components[2 * i + 1];
ClearConstraints( * component_tree);
branch_map = (option[utility.getGlobalValue("terms.run_options.partitioned_omega")])[i];
-
-
+
component_branches = BranchName( * component_tree, -1);
for (j = 0; j < Columns(component_branches) - 1; j += 1) {
/**
-1 in the upper bound because we don't want to count the root node
*/
- node_name = (component_branches[j]);
-
-
+ node_name = (component_branches[j]);
ExecuteCommands(apply_constraint);
}
}
@@ -1113,6 +1132,7 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op
LikelihoodFunction likelihoodFunction = (lf_components);
GetString (params, likelihoodFunction,-1);
+
utility.ToggleEnvVariable ("PARAMETER_GROUPING", {"0" : params["Global Independent"]});
if (utility.Has (option,utility.getGlobalValue("terms.run_options.apply_user_constraints"),"String")) {
@@ -1130,6 +1150,7 @@ lfunction estimators.FitCodonModel(codon_data, tree, generator, genetic_code, op
parameters.SetRange (_value_,terms.range_clamp_locals);
');*/
+ //io.SpoolLF ("likelihoodFunction", "/tmp/hyphy-spool", "cfit");
//Export (lfe, likelihoodFunction);
//console.log (lfe);
=====================================
res/TemplateBatchFiles/libv3/tasks/trees.bf
=====================================
@@ -85,7 +85,11 @@ LoadFunctionLibrary("distances.bf");
lfunction trees.GetTreeString._sanitize(string) {
if (utility.GetEnvVariable("_DO_TREE_REBALANCE_")) {
+ console.log ("BEFORE REBALANCE\n");
+ console.log (string);
string = RerootTree(string, 0);
+ console.log ("AFTER REBALANCE\n");
+ console.log (string);
}
if (utility.GetEnvVariable("_KEEP_I_LABELS_")) {
=====================================
src/core/dataset_filter.cpp
=====================================
@@ -1919,7 +1919,8 @@ void _DataSetFilter::internalToStr (FILE * file ,_StringBuffer * string_buffe
kFormatCharacterList = 8,
kFormatFASTASequential = 9,
kFormatFASTAInterleaved = 10,
- kFormatPAML = 11
+ kFormatPAML = 11,
+ kFormatSTOCKHOLM = 12
} datafile_format = kFormatMEGASequential;
auto trim_to_10 = [] (const _String& seq_name) -> _String const {
@@ -2242,6 +2243,19 @@ void _DataSetFilter::internalToStr (FILE * file ,_StringBuffer * string_buffe
}
break;
}
+
+ case kFormatSTOCKHOLM: {
+ write_here << "# STOCKHOLM 1.0" << kStringFileWrapperNewLine;
+ for (unsigned long i = 0UL; i< sequence_count; i++) {
+ write_here << GetSequenceName(i) << " ";
+ for (unsigned long j = 1UL; j<site_count; j++) {
+ write_here << (*theData)(theOriginalOrder(j),theNodeMap(i),1);
+ }
+ write_here << kStringFileWrapperNewLine;
+ }
+ write_here << "//" << kStringFileWrapperNewLine;
+ break;
+ }
default: { // hash-mark sequential
char seqDelimiter = (outputFormat==kFormatFASTASequential)?'>':'#';
=====================================
src/core/global_things.cpp
=====================================
@@ -120,7 +120,7 @@ namespace hy_global {
kErrorStringDatasetRefIndexError ("Dataset index reference out of range"),
kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"),
kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"),
- kHyPhyVersion = _String ("2.5.20"),
+ kHyPhyVersion = _String ("2.5.21"),
kNoneToken = "None",
kNullToken = "null",
=====================================
src/core/include/variable.h
=====================================
@@ -90,7 +90,11 @@ public:
return varValue; // get the value of the variable
}
void SetFormula (_Formula&); // set the variable to a new formula
-
+
+ void ClearValue (void) {
+ if (varValue) { delete (varValue); varValue = nil;}
+ }
+
const _Formula * get_constraint (void) const {
return varFormula;
}
@@ -139,7 +143,8 @@ public:
virtual bool CheckFForDependence (_AVLList const&, bool = false);
virtual bool HasBeenInitialized (void) const {return !(varFlags & HY_VARIABLE_NOTSET);}
virtual void MarkModified (void) {varFlags = varFlags | HY_VARIABLE_CHANGED;}
-
+ virtual void ClearModified (void) {if (varFlags & HY_VARIABLE_CHANGED) varFlags -= HY_VARIABLE_CHANGED;}
+
_String const ContextFreeName (void) const;
_StringBuffer& ContextFreeName (_StringBuffer&) const;
_String const ParentObjectName (void) const;
=====================================
src/core/likefunc.cpp
=====================================
@@ -3897,8 +3897,10 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
kMethodCoordinate ("coordinate-wise"),
kMethodNedlerMead ("nedler-mead"),
kMethodHybrid ("hybrid"),
- kMethodGradientDescent ("gradient-descent");
-
+ kMethodGradientDescent ("gradient-descent"),
+ kInitialGridMaximum ("LF_INITIAL_GRID_MAXIMUM"),
+ kInitialGridMaximumValue ("LF_INITIAL_GRID_MAXIMUM_VALUE");
+
// optimization setting to produce a detailed log of optimization runs
auto get_optimization_setting = [options] (const _String& arg, const hyFloat def_value) -> hyFloat {
@@ -4137,7 +4139,14 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
GetInitialValues();
}
-
+ if (keepStartingPoint) {
+ indexInd.Each ([this] (long v, unsigned long i) -> void {
+ _Variable *iv = GetIthIndependentVar (i);
+ if (iv->HasBeenInitialized()) {
+ iv->ClearModified();
+ }
+ });
+ }
if (!keepStartingPoint) {
hyFloat global_starting_point = get_optimization_setting (kGlobalStartingPoint, 0.1);
@@ -4277,6 +4286,12 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
//_String const * grid_point = key_value.get_key();
hyFloat this_point = set_and_compute((_AssociativeList*)key_value.get_object());
+ if (verbosity_level > 100) {
+ char buffer[512];
+ snprintf (buffer, 512, "[GRID INITIAL VALUE CALCULATION] At point %s, LF = %15.12g\n", key_value.get_key()->get_str(), this_point);
+ BufferToConsole(buffer);
+ }
+
//printf ("%s %g\n", grid_point->get_str(), this_point);
if (this_point > max_value) {
@@ -4286,7 +4301,14 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
}
if (best_values) {
- set_and_compute (best_values);
+ maxSoFar = set_and_compute (best_values);
+ hy_env::EnvVariableSet(kInitialGridMaximum, new _Constant (maxSoFar), false);
+ hy_env::EnvVariableSet(kInitialGridMaximumValue, best_values, true);
+ if (verbosity_level > 100) {
+ char buffer[512];
+ snprintf (buffer, 512, "[GRID INITIAL VALUE CALCULATION: BEST VALUE] LF = %15.12g\n", maxSoFar);
+ BufferToConsole(buffer);
+ }
}
}
@@ -6193,10 +6215,10 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi
/*if (currentValue < 0.) {
printf ("Negative value stashed %15.12lg\n", currentValue);
}
- hyFloat check_vv = GetIthIndependent(index);
- if (verbosity_level > 100) {
- printf ("_LikelihoodFunction::ComputeGradient %d\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\t \n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue));
- }*/
+ hyFloat check_vv = GetIthIndependent(index);*/
+ if (verbosity_level > 150) {
+ printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t%20.18g\t%e\t%e\t%e\t%e\t%15.12g\n", index, GetIthIndependentName(index)->get_str(), funcValue, testStep, currentValue, check_vv, check_vv-currentValue, DerivativeCorrection (index, currentValue));
+ }
SetIthIndependent(index,currentValue);
} else {
gradient[index]= 0.;
@@ -6331,6 +6353,8 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision
initial_value = maxSoFar,
currentPrecision = localOnly?step_precision:.01;
+ //printf ("\n\n_LikelihoodFunction::ConjugateGradientDescent ==> %d (%lg)\n", usedCachedResults, maxSoFar);
+
if (check_value != -INFINITY) {
if (!CheckEqual(check_value, maxSoFar)) {
_String errorStr = _String("Internal error in _LikelihoodFunction::ConjugateGradientDescent. The function evaluated at current parameter values [") & maxSoFar & "] does not match the last recorded LF maximum [" & check_value & "]";
@@ -7114,6 +7138,7 @@ hyFloat _LikelihoodFunction::replaceAPoint (_Matrix&m, long row, _Matrix&p,
}
+
//_______________________________________________________________________________________
hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned long iterations, unsigned long max_evaluations) {
@@ -7155,7 +7180,13 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
auto set_point_and_compute = [this,&lf_evaluations] (_Matrix& v) -> hyFloat {
lf_evaluations++;
SetAllIndependent (&v);
- return -Compute();
+ hyFloat ll = -Compute();
+ /*
+ if (fabs (ll - 22.3935843505) < 0.001 ) {
+ _TerminateAndDump(_String ("Checkpoint ") & likeFuncEvalCallCount);
+ }
+ */
+ return ll;
};
auto resort_values = [&zero] (_Matrix& v) -> void {
@@ -7163,12 +7194,34 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
v = *sorted;
DeleteObject (sorted);
};
+
+ auto echo_values = [=] (_Matrix &v, hyFloat lf)-> void {
+ char buffer [512];
+ snprintf(buffer, 512, "\n\t%15.12g (LF)", lf);
+ BufferToConsole(buffer);
+ for (long i = 0; i < N; i++) {
+ snprintf(buffer, 512, "\n\t%15.12g (%s)", GetIthIndependent(i,false), GetIthIndependentName(i)->get_str());
+ BufferToConsole(buffer);
+ }
+ };
+
+ auto echo_simplex_values = [=] (_Matrix &function_values)-> void {
+ char buffer [512];
+ for (long i = 0; i <= N; i++) {
+ snprintf(buffer, 512, "\n\tSimple point %ld => %15.12g (map to point %ld)", i, function_values (i,0), (long)function_values(i,1));
+ BufferToConsole(buffer);
+ }
+ };
GetAllIndependent(simplex[0]);
// current FEASIBLE point
function_values.Store (0,0, set_point_and_compute (simplex[0]));
+ if (verbosity_level > 100) {
+ BufferToConsole ("\n[SIMPLEX SETUP]");
+ echo_values(simplex[0], function_values(0,0));
+ }
for (long i = 0L; i < N; i++) {
simplex[i+1] = simplex[0];
@@ -7177,10 +7230,11 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
lb = GetIthIndependentBound(i, true),
ub = GetIthIndependentBound(i, false);
-#ifdef NEDLER_MEAD_DEBUG
- printf ("\nCoordinate %ld, value %g, range [%g, %g]\n", i, ith_coordinate, lb, ub);
-#endif
-
+ if (verbosity_level > 100) {
+ char buffer[512];
+ snprintf (buffer, 512, "\n\tVariable %s, coordinate %ld, value %g, range [%g, %g]", GetIthIndependentName(i)->get_str(), i, ith_coordinate, lb, ub);
+ BufferToConsole(buffer);
+ }
if (CheckEqual(ith_coordinate, lb)) {
simplex[i+1][i] += MIN(DEFAULT_STEP_OFF_BOUND, (ub-lb)*0.5);
}
@@ -7194,19 +7248,20 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
}
}
-#ifdef NEDLER_MEAD_DEBUG
- ObjectToConsole(&simplex[i+1]);
-#endif
- //SetAllIndependent(&simplex[i+1]);
function_values.Store(i+1,0, set_point_and_compute (simplex[i+1]));
function_values.Store(i+1,1, i+1);
+ if (verbosity_level > 100) {
+ echo_values(simplex[i+1], function_values(i+1,0));
+ }
+
}
resort_values (function_values);
-#ifdef NEDLER_MEAD_DEBUG
- printf ("\n**********\nSimplex iteration in\n");
- ObjectToConsole(&function_values);
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nInitial simplex");
+ echo_simplex_values (function_values);
+ }
+
for (long it_count = 0L; it_count <= iterations && lf_evaluations <= max_evaluations; it_count ++) {
/** compute the centroid of all EXCEPT the WORST point **/
@@ -7232,46 +7287,55 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
new_point += t;
hyFloat trial_value = set_point_and_compute (new_point);
-#ifdef NEDLER_MEAD_DEBUG
- printf ("\tTrial point value %g\n", trial_value);
-#endif
+ if (verbosity_level > 100) {
+ char buffer[512];
+ snprintf (buffer, 512, "\n>Simplex iteration %ld", (it_count+1));
+ BufferToConsole(buffer);
+ echo_values(new_point, trial_value);
+ }
+
bool do_shrink = false;
if (trial_value >= function_values(0,0) && trial_value < function_values (N-1,0)) {
// accept the reflection
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### ACCEPT REFLECTION replace %d\n", (long)function_values(N,1));
-#endif
+ if (verbosity_level > 100) {
+ char buffer[512];
+ snprintf (buffer, 512, "\nACCEPT REFLECTION replace point %ld\n", (long)function_values(N,1));
+ BufferToConsole (buffer);
+ }
replace_point (new_point, trial_value, (long)function_values(N,1), N);
} else {
if (trial_value < function_values(0,0)) { // try expansion
// x[e] = ¯x +β(x[r] − ¯x)
-#ifdef NEDLER_MEAD_DEBUG
- printf ("--- Trying expansion\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nTrying expansion");
+ }
_Matrix expansion (centroid),
t (new_point);
t -= centroid;
t *= simplex_beta;
expansion += t;
hyFloat expansion_value = set_point_and_compute (expansion);
+ if (verbosity_level > 100) {
+ echo_values(expansion, expansion_value);
+ }
if (expansion_value < trial_value) {
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### ACCEPT EXPANSION\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nACCEPT EXPANSION");
+ }
replace_point (expansion, expansion_value, (long)function_values(N,1), N);
} else {
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### REJECT EXPANSION, ACCEPT REFLECTION\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nREJECT EXPANSION, ACCEPT REFLECTION");
+ }
replace_point (new_point, trial_value, (long)function_values(N,1), N);
}
} else {
//if (trial_value >= function_values (N,0)) {
long worst_index = function_values (N,1);
-#ifdef NEDLER_MEAD_DEBUG
- printf ("--- Trying CONTRACTION\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nTRYING CONTRACTION");
+ }
// x[ic] = x ̄− γ (x ̄−x[n+1])
// x[oc] = ¯x ̄+γ alpha (x ̄−x[n+1])
_Matrix cv (centroid),
@@ -7281,65 +7345,67 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
t *= simplex_gamma;
bool inside = false;
if (trial_value >= function_values (N,0)) { // INSIDE
-#ifdef NEDLER_MEAD_DEBUG
- printf ("--- INSIDE contraction\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nINSIDE contraction");
+ }
inside = true;
cv -= t;
} else {
-#ifdef NEDLER_MEAD_DEBUG
- printf ("--- OUTSIDE contraction\n");
-#endif
- t *= simplex_alpha;
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nOUTSIDE contraction");
+ }
+ t *= simplex_alpha;
cv += t;
}
hyFloat contraction_value = set_point_and_compute (cv);
-
-#ifdef NEDLER_MEAD_DEBUG
- printf ("--Contraction value = %g (relected = %g, worst = %g)\n", contraction_value, trial_value, function_values (N,0));
-#endif
+ if (verbosity_level > 100) {
+ echo_values(cv, contraction_value);
+ char buffer[512];
+ snprintf (buffer, 512, "\n\tContraction value = %g (relected = %g, worst = %g)", contraction_value, trial_value, function_values (N,0));
+ }
if ((inside && contraction_value < function_values (N,0)) || (!inside && contraction_value < trial_value)) {
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### ACCEPT contraction\n");
-#endif
+
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nACCEPT contraction");
+ }
replace_point (cv, contraction_value, worst_index, N);
} else {
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### REJECT contraction\n");
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nREJECT contraction");
+ }
do_shrink = true;
}
}
}
if (do_shrink) {
long best_idx = function_values(0,1);
-#ifdef NEDLER_MEAD_DEBUG
- printf ("### PEFRORM SHRINKAGE\n");
- BufferToConsole("\nBest point\n");
- ObjectToConsole(&simplex[best_idx]);
-#endif
+ if (verbosity_level > 100) {
+ BufferToConsole ("\n\tPerform Shrinkage");
+ echo_values(simplex[best_idx], function_values(0,0));
+ }
+
//x[i] = x[1] + δ(x[i] − x[1]).
for (long i = 1L; i <= N; i++) {
long idx = function_values(i,1);
_Matrix t (simplex[idx]);
t -= simplex[best_idx];
t *= simplex_delta;
-#ifdef NEDLER_MEAD_DEBUG
- BufferToConsole("\nOld point\n");
- ObjectToConsole(&simplex[idx]);
-#endif
simplex[idx] = simplex[best_idx];
simplex[idx] += t;
-#ifdef NEDLER_MEAD_DEBUG
- BufferToConsole("\nMoved point\n");
- ObjectToConsole(&simplex[idx]);
-#endif
+
function_values.Store (i, 0, set_point_and_compute(simplex[idx]));
+ if (verbosity_level > 100) {
+ echo_values(simplex[idx], function_values(i,0));
+ }
}
}
resort_values (function_values);
-
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nCurrent simplex");
+ echo_simplex_values (function_values);
+ NLToConsole();
+ }
if (verbosity_level==1) {
UpdateOptimizationStatus (-function_values (0,0),progress_tracker.PushValue (-function_values (0,0)),1,true,progressFileString);
} else {
@@ -7370,11 +7436,31 @@ hyFloat _LikelihoodFunction::SimplexMethod (hyFloat& gPrecision, unsigned l
#endif
}
-
+ /*
+ BufferToConsole("\n$$$$$\nFunction values\n");
+ ObjectToConsole(&function_values);
+ BufferToConsole("\nBest point\n");
+ ObjectToConsole(&simplex[(long)function_values(0,1)]);
+ */
+
long best_idx = function_values(0,1);
+ /*for (long i = 0; i < simplex[best_idx].GetHDim(); i++) {
+ fprintf (stdout, "%d %s %g\n", i, GetIthIndependentName(i)->get_str(), simplex[best_idx][i]);
+ }*/
+
N_inv = -set_point_and_compute (simplex[best_idx]);
+ if (verbosity_level > 100) {
+ BufferToConsole ("\nFinal simplex point");
+ echo_values (simplex[best_idx], N_inv);
+ NLToConsole();
+ }
+
+ if (fabs(N_inv + function_values(0,0)) > .1) {
+ _TerminateAndDump(_String("Internal error in _SimplexMethod; final point log-likelihood does not match the best recorded log-L: ") & N_inv & " vs " & -function_values(0,0));
+ }
+
delete [] simplex;
return N_inv;
}
@@ -7932,22 +8018,47 @@ void _LikelihoodFunction::UpdateIndependent (long index, bool purgeResults, _
if (f!=-1) {
theList->Delete(f);
(*secondList)<<index;
-
+
+
_SimpleList newVars;
- {
- _AVLList al (&newVars);
- LocateVar(index)->ScanForVariables(al,true);
- al.ReorderList();
- }
+ _AVLList al (&newVars);
+ LocateVar(index)->ScanForVariables(al,true);
+ al.ReorderList();
+
+
+ /* SLKP 20201027 TODO
+ This involves linear searches of potentially large arrays; especially if there are many variables in `newVars`
+ */
+ newVars.Each([theList] (long idx, unsigned long ) -> void {
+ if (LocateVar(idx)->IsIndependent() && theList->Find(idx)==-1) {
+ (*theList) << idx;
+ }
+ });
- for (f=0; f<newVars.countitems(); f++) {
+ /*for (f=0; f<newVars.countitems(); f++) {
_Variable* cv = LocateVar(newVars.list_data[f]);
if (cv->IsIndependent()&& theList->Find(newVars.list_data[f])==-1) {
(*theList) << newVars.list_data[f];
}
- }
+ }*/
+
+
if (theList!=whichList) {
+ /* SLKP 20201026
+ if the variable is being set to a constant value, and some other variables depend on it,
+ this could create the situation where they don't get properly refreshed
+ */
+ if (purgeResults) {
+ for (long i = 0; i < indexDep.lLength; i++) {
+ _Variable* ith_dep = GetIthDependentVar(i);
+ if (ith_dep->CheckFForDependence(index)) {
+ ith_dep->ClearValue ();
+ ith_dep->Compute();
+ }
+ }
+ }
+
for (f = 0; f < indVarsByPartition.lLength; f++) {
UpdateIndependent (index, false, (_SimpleList*)indVarsByPartition(f), (_SimpleList*)depVarsByPartition(f));
}
@@ -8335,7 +8446,7 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c
long snID = -1;
if (nodeID == 1) {
- if (matrices->lLength == 2) {
+ if (matrices->lLength > 1) {
branches->Clear(false);
matrices->Clear(false);
@@ -8385,8 +8496,9 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c
branches->Populate (t->GetINodeCount()+t->GetLeafCount()-1,0,1);
}
+
#ifdef _UBER_VERBOSE_LF_DEBUG
- fprintf (stderr, "%d matrices, %d branches marked for rate class %d\n", matrices->lLength, branches->lLength, catID);
+ fprintf (stderr, "\n%d matrices, %d branches marked for rate class %d, doCachedComp = %d\n", matrices->lLength, branches->lLength, catID, doCachedComp);
if (matrices->lLength == 0) {
fprintf (stderr, "Hmm\n");
}
@@ -8591,6 +8703,7 @@ hyFloat _LikelihoodFunction::ComputeBlock (long index, hyFloat* siteRes, long c
printf ("AFTER CONSTRUCT CACHE overallScalingFactors = %ld\n", overallScalingFactors[0]);
}*/
+
if (sum > -INFINITY) {
hyFloat checksum = t->ComputeLLWithBranchCache (*sl,
doCachedComp,
=====================================
src/core/matrix.cpp
=====================================
@@ -2236,7 +2236,7 @@ bool _Matrix::IncreaseStorage (void) {
} else {
theData = (hyFloat*) MemReallocate(theData, lDim*sizeof(void*));
for (long i = lDim-allocationBlock; i < lDim; i++) {
- theData [i] = ZEROPOINTER;
+ theData [i] = ZEROOBJECT;
}
}
} else {
@@ -4815,18 +4815,18 @@ void _Matrix::RowAndColumnMax (hyFloat& r, hyFloat &c, hyFloat * cache) {
if (theIndex) {
if (compressedIndex) {
long from = 0L;
- for (long r = 0; r < hDim; r++) {
- for (long c = from; c < compressedIndex[r]; c++) {
- hyFloat temp = theData[c];
+ for (long row = 0; row < hDim; row++) {
+ for (long col = from; col < compressedIndex[row]; col++) {
+ hyFloat temp = theData[col];
if (temp > 0.0) {
- rowMax[r] += temp;
- colMax[compressedIndex[c+hDim]] += temp;
+ rowMax[row] += temp;
+ colMax[compressedIndex[col+hDim]] += temp;
} else {
- rowMax[r] -= temp;
- colMax[compressedIndex[c+hDim]] -= temp;
+ rowMax[row] -= temp;
+ colMax[compressedIndex[col+hDim]] -= temp;
}
}
- from = compressedIndex[r];
+ from = compressedIndex[row];
}
} else {
for (long i = 0; i<lDim; i++) {
@@ -5451,7 +5451,7 @@ _Matrix* _Matrix::Exponentiate (hyFloat scale_to, bool check_transition, _Mat
tempS.vDim = vDim;
tempS.lDim = lDim;
tempS.theData = stash;
- // zero out the stash 20200929 : is this necessary?
+ // zero out the stash TODO: 20200929 : is this necessary?
memset (stash, 0, sizeof (hyFloat)*lDim);
do {
temp.MultbyS (*this,false, &tempS, nil);
@@ -6901,12 +6901,34 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
}
}
- //memcpy (theData, stash, lDim * sizeof (hyFloat));
+
+ long lDimmod4 = (lDim >> 2) << 2;
+ hyFloat diffs[4] = {0.0,0.0,0.0,0.0};
+
+ for (long s = 0; s < lDimmod4; s+=4) {
+ hyFloat d1 = fabs (theData[s ] - stash[s ]);
+ hyFloat d2 = fabs (theData[s+1] - stash[s+1]);
+ hyFloat d3 = fabs (theData[s+2] - stash[s+2]);
+ hyFloat d4 = fabs (theData[s+3] - stash[s+3]);
+ if (d1 > diffs[0]) diffs[0] = d1;
+ if (d2 > diffs[1]) diffs[1] = d2;
+ if (d3 > diffs[2]) diffs[2] = d3;
+ if (d4 > diffs[3]) diffs[3] = d4;
+ }
+
+ for (long s = lDimmod4; s < lDim; s++) {
+ hyFloat d1 = fabs (theData[s] - stash[s]);
+ if (d1 > diffs[0]) diffs[0] = d1;
+ }
+
+ diff = MAX (MAX (diffs[0], diffs[1]), MAX (diffs[2], diffs[3]));
- for (long s = 0; s < lDim; s++) {
+ memcpy (theData, stash, lDim * sizeof (hyFloat));
+
+ /*for (long s = 0; s < lDim; s++) {
StoreIfGreater(diff, fabs (theData[s] - stash[s]));
theData[s] = stash[s];
- }
+ }*/
}
return diff;
}
=====================================
src/core/topology.cpp
=====================================
@@ -2697,6 +2697,7 @@ void _TreeTopology::RerootTreeInternalTraverser (node<long>* iterator, long orig
create a new root with >=2 children nodes - this node,
and one more containing all other children (>=2 of them)
*/
+
long count = 0L,
root_children_count = theRoot->get_num_nodes();
@@ -2726,7 +2727,7 @@ void _TreeTopology::RerootTreeInternalTraverser (node<long>* iterator, long orig
if (root_children_count > 2) {
res<<')';
}
-
+
PasteBranchLength (stash_originator,res, branch_length_mode, variable_ref);
}
}
@@ -2743,6 +2744,8 @@ void _TreeTopology::PasteBranchLength (node<long>* node, _StringBuffe
}
};
+ //printf ("_TreeTopology::PasteBranchLength %d %g %s\n", mode, GetNodeName(node).get_str(), GetBranchLength(node));
+
switch (mode) {
case kTopologyBranchLengthExpectedSubs: {
result << ':' << _String(GetBranchLength (node)*factor);
@@ -2805,7 +2808,7 @@ HBLObjectRef _TreeTopology::RerootTree (HBLObjectRef new_root, HBLObjectRef cach
(*res)<<'('; // opening tree (
RerootTreeInternalTraverser (reroot_at, reroot_at->get_child_num(),false,*res,settings,kTopologyBranchLengthUserLengths,-1,true);
(*res)<<',';
- SubTreeString (reroot_at, *res,settings,kTopologyBranchLengthUserLengths);
+ SubTreeString (reroot_at, *res,settings,false, kTopologyBranchLengthUserLengths);
(*res)<<')';
}
}
=====================================
src/core/tree_evaluator.cpp
=====================================
@@ -482,10 +482,16 @@ inline void __ll_product_sum_loop_generic (hyFloat const* _hprestrict_ tMatrix,
}
template<long D, bool ADJUST> inline void __ll_loop_handle_scaling (hyFloat& sum, hyFloat* _hprestrict_ parentConditionals, hyFloat* _hprestrict_ scalingAdjustments, long& didScale, long parentCode, long siteCount, long siteID, long& localScalerChange, long siteFrequency) {
+
+ /*if (sum == 0.) {
+ fprintf (stderr, "THE SUM IS EXACTLY ZERO parent code %ld\n", parentCode);
+ }*/
+
if (__builtin_expect(sum < _lfScalingFactorThreshold && sum > 0.0,0)) {
hyFloat scaler = _computeBoostScaler(scalingAdjustments [parentCode*siteCount + siteID] * _lfScalerUpwards, sum, didScale);
+ //fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler);
/*if (likeFuncEvalCallCount == 15098 && siteID == 91) {
fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler);
@@ -495,9 +501,9 @@ template<long D, bool ADJUST> inline void __ll_loop_handle_scaling (hyFloat& sum
#pragma GCC unroll 4
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
- /*if (likeFuncEvalCallCount == 15098 && siteID == 91) {
- fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]);
- }*/
+ //if (likeFuncEvalCallCount == 15098 && siteID == 91) {
+ //fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]);
+ // }
}
if (siteFrequency == 1L) {
@@ -523,9 +529,9 @@ template<long D, bool ADJUST> inline void __ll_loop_handle_scaling (hyFloat& sum
#pragma GCC unroll 4
for (long c = 0; c < D; c++) {
parentConditionals [c] *= scaler;
- /*if (likeFuncEvalCallCount == 15098 && siteID == 91) {
- fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]);
- }*/
+ //if (likeFuncEvalCallCount == 15098 && siteID == 91) {
+ // fprintf (stderr, "%ld=>%g\n", c, parentConditionals [c]);
+ // }
}
if (siteFrequency == 1L) {
@@ -1062,7 +1068,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
#else
__ll_product_sum_loop<61L> (tMatrix, childVector, parentConditionals, sum);
#endif
-
+
__ll_loop_handle_scaling<61L, true> (sum, parentConditionals, scalingAdjustments, didScale, parentCode, siteCount, siteID, localScalerChange, theFilter->theFrequencies.get (siteOrdering.list_data[siteID]));
childVector += 61;
=====================================
src/core/variable.cpp
=====================================
@@ -707,6 +707,7 @@ void _Variable::SetFormula (_Formula& theF) {
for (AVLListXIteratorKeyValue variable_record : AVLListXIterator (&variableNames)) {
_Variable * theV = LocateVar(variable_record.get_value());
//printf ("%s\n", theV->GetName()->get_str());
+ // SLKP 20201028 TODO: this is slow for large trees, because the entire variable space is being traversed.
if (theV->IsContainer()) {
_VariableContainer* theVC = (_VariableContainer*)theV;
if (theVC->SetDependance(theIndex) == -2) {
=====================================
src/core/variablecontainer.cpp
=====================================
@@ -456,6 +456,12 @@ bool _VariableContainer::NeedToExponentiate (bool ignoreCats) const {
auto has_changed = [=] (long var_index, long ref_index, unsigned long) -> bool {
if (ref_index >= 0L) {
return LocateVar (var_index) -> HasChanged (ignoreCats);
+ /*bool haz = LocateVar (var_index) -> HasChanged (ignoreCats);
+ if (haz) {
+ _Variable *lv = LocateVar (var_index);
+ fprintf (stderr, "==> %s HAZ changed in the context of %s (%d, %x, %d)\n", lv->GetName()->get_str(), GetName()->get_str(), lv->varFlags, LocateVar (var_index)->varValue, lv->varValue->IsVariable());
+ }
+ return haz;*/
}
return false;
};
=====================================
tests/hbltests/libv3/FMM.wbf
=====================================
@@ -0,0 +1,40 @@
+GetString (version, HYPHY_VERSION, 0);
+
+/*PRODUCE_OPTIMIZATION_LOG = 1;*/
+
+if (+version >= 2.4) {
+ LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"--code" : "Universal", "--alignment" : PATH_TO_CURRENT_BF + "data/CD2.nex"});
+} else {
+ LoadFunctionLibrary ("SelectionAnalyses/FitMultiModel.bf", {"0" : "Universal", "1" : PATH_TO_CURRENT_BF + "data/CD2.nex", "2" : "GROUP1", "3" : "Yes", "4" : "0.1"});
+
+}
+LoadFunctionLibrary ("shared.bf");
+LoadFunctionLibrary ("libv3/IOFunctions.bf");
+
+/*fprintf ("logger.hbl", CLEAR_F ILE, ^((fitter.results[terms.likelihood_function])+".trace"));*/
+
+
+assert (check_value (
+ ((fitter.json["fits"])["Standard MG94"])["Log Likelihood"], -3405.53, 0.001), "Incorrect log-likelihood for the 1H model");
+
+assert (check_value (
+ ((fitter.json["fits"])["MG94 with double instantaneous substitutions"])["Log Likelihood"], -3403.0265, 0.01), "Incorrect log-likelihood for the 2H model");
+
+assert (check_value (
+ ((fitter.json["fits"])["MG94 with double and triple instantaneous substitutions"])["Log Likelihood"], -3403.026, 0.01), "Incorrect log-likelihood for the 2H model");
+
+assert (check_value (
+ (((((fitter.json["fits"])["MG94 with double and triple instantaneous substitutions"])["Rate Distributions"])["parameters"])["rate at which 2 nucleotides are changed instantly within a single codon"]), 0.157, 0.05), "Incorrect 2H rate estimate for the 3H model");
+
+assert (check_value (
+ (((fitter.json["test results"])["Double-hit vs single-hit"])["p-value"]),0.0251, 0.01), "p-value for the 2H:1H test is incorrect");
+
+assert (check_value (
+ (+(fitter.json["Evidence Ratios"])["Three-hit"]), 187.000, 0.05), "Incorrect sum of ER for 3H");
+
+
+
+
+
+
+
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/17ba90066520e268fc9d4c1a6e53f089abd07f2d
--
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/17ba90066520e268fc9d4c1a6e53f089abd07f2d
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201031/28a54b33/attachment-0001.html>
More information about the debian-med-commit
mailing list