[med-svn] [Git][med-team/hyphy][upstream] New upstream version 2.5.24+dfsg
Nilesh Patra
gitlab at salsa.debian.org
Mon Dec 21 08:12:01 GMT 2020
Nilesh Patra pushed to branch upstream at Debian Med / hyphy
Commits:
e5c8fd32 by Nilesh Patra at 2020-12-21T13:35:05+05:30
New upstream version 2.5.24+dfsg
- - - - -
15 changed files:
- CMakeLists.txt
- res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
- res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
- res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf
- src/core/global_things.cpp
- src/core/include/defines.h
- src/core/include/likefunc.h
- src/core/likefunc.cpp
- src/core/matrix.cpp
- src/core/tree_evaluator.cpp
- src/lib/setup.py
- tests/hbltests/Results/HIVSweden.out
- tests/hbltests/Results/HIVSweden.out_MODEL_-1.nex
- tests/hbltests/Results/HIVSweden.out_MODEL_0.nex
- tests/hbltests/Results/HIVSweden.out_MODEL_1.nex
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -1,8 +1,6 @@
cmake_minimum_required(VERSION 3.12)
project(HyPhy)
cmake_policy(VERSION 3.0.0)
-#cmake_policy(SET CMP0026 OLD)
-#cmake_policy(SET CMP0004 OLD)
enable_testing()
@@ -14,6 +12,7 @@ set(CMAKE_CONFIGURATION_TYPES Release)
option(NOAVX OFF)
option(NOSSE3 OFF)
+option(NONEON OFF)
#-------------------------------------------------------------------------------
# SSE MACROS
@@ -44,6 +43,34 @@ macro(PCL_CHECK_FOR_SSE3)
HAVE_SSE3_EXTENSIONS)
endmacro(PCL_CHECK_FOR_SSE3)
+#-------------------------------------------------------------------------------
+# NEON MACROS
+#-------------------------------------------------------------------------------
+
+macro(PCL_CHECK_FOR_NEON)
+ include(CheckCXXSourceRuns)
+ set(CMAKE_REQUIRED_FLAGS)
+
+ #if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
+ set(CMAKE_REQUIRED_FLAGS "-msse3")
+ #endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
+
+ check_cxx_source_runs("
+ #include <stdio.h>
+ #include <arm_neon.h>
+ int main() {
+ double array[4] = {4.,2.,3.,4};
+ float64x2_t a1 = vld1q_f64 (array),
+ a2 = vld1q_f64 (array+2);
+ a1 = vmulq_f64 (a1,a2);
+ vst1q_lane_f64 (array, a1, 0);
+ printf (\"%g\", array[0]);
+ return 0;
+ }"
+ HAVE_NEON_EXTENSIONS)
+endmacro(PCL_CHECK_FOR_NEON)
+
+
#-------------------------------------------------------------------------------
# SSE MACROS
#-------------------------------------------------------------------------------
@@ -219,6 +246,13 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
endif(${HAVE_SSE3_EXTENSIONS})
endif (${HAVE_AVX_EXTENSIONS})
endif(NOAVX)
+
+ if(NOT NONEON)
+ PCL_CHECK_FOR_NEON()
+ if(${HAVE_NEON_EXTENSIONS})
+ add_definitions (-D_SLKP_USE_ARM_NEON)
+ endif(${HAVE_NEON_EXTENSIONS})
+ endif(NOT NONEON)
set_property(
SOURCE ${SRC_CORE} ${SRC_NEW} ${SRC_UTILS} ${SRC_UNIXMAIN}
@@ -503,11 +537,13 @@ set_target_properties(
HYPHYDEBUG
PROPERTIES
if(${OPENMP_FOUND})
- COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} ${OpenMP_CXX_FLAGS} ${MPI_COMPILE_FLAGS} -g -fprofile-arcs -ftest-coverage -fsanitize=address -fsanitize=leak -O0 "
- LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} ${MPI_LINK_FLAGS} -g -fprofile-arcs -ftest-coverage -fsanitize=address -O0 "
+ #COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} ${OpenMP_CXX_FLAGS} ${MPI_COMPILE_FLAGS} -g -fsanitize=address -fsanitize=leak -O0 "
+ COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} ${OpenMP_CXX_FLAGS} ${MPI_COMPILE_FLAGS} -g -fsanitize=address -O0 "
+ LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} ${OpenMP_CXX_FLAGS} ${MPI_LINK_FLAGS} -g -fsanitize=address -O0 "
else(${OPENMP_FOUND})
- COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} -g -fprofile-arcs -ftest-coverage -fsanitize=address -fsanitize=leak -O0 "
- LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} -g -fprofile-arcs -ftest-coverage -fsanitize=address -O0 "
+ #COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} -g -fsanitize=address -fsanitize=leak -O0 "
+ COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} ${DEFAULT_DEBUG_WARNING_FLAGS} -g -fsanitize=address -O0 "
+ LINK_FLAGS "${DEFAULT_COMPILE_FLAGS} -g -fsanitize=address -O0 "
endif(${OPENMP_FOUND})
)
=====================================
res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
=====================================
@@ -429,34 +429,42 @@ if (busted.do_srv_hmm) {
//parameters.SetConstraint (((busted.test.bsrel_model[terms.parameters])[terms.global])[terms.rate_variation.hmm_lambda],"1e-6", "");
}
-
-parameters.DeclareGlobalWithRanges ("busted.bl.scaler", 1, 0, 1000);
-busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, {
- "retain-lf-object": TRUE,
- terms.run_options.proportional_branch_length_scaler :
- {"0" : "busted.bl.scaler"},
+debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");
+
+if (Type (debug.checkpoint) != "String") {
+ parameters.DeclareGlobalWithRanges ("busted.bl.scaler", 1, 0, 1000);
+ busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, {
+ "retain-lf-object": TRUE,
+ terms.run_options.proportional_branch_length_scaler :
+ {"0" : "busted.bl.scaler"},
- terms.run_options.optimization_settings :
- {
- "OPTIMIZATION_METHOD" : "nedler-mead",
- "MAXIMUM_OPTIMIZATION_ITERATIONS" : 500,
- "OPTIMIZATION_PRECISION" : busted.nm.precision
- } ,
+ terms.run_options.optimization_settings :
+ {
+ "OPTIMIZATION_METHOD" : "nedler-mead",
+ "MAXIMUM_OPTIMIZATION_ITERATIONS" : 500,
+ "OPTIMIZATION_PRECISION" : busted.nm.precision
+ } ,
- terms.search_grid : busted.initial_grid,
- terms.search_restarts : busted.N.initial_guesses
-});
+ terms.search_grid : busted.initial_grid,
+ terms.search_restarts : busted.N.initial_guesses
+ });
-busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, {
- "retain-lf-object": TRUE,
- terms.run_options.optimization_settings :
- {
- "OPTIMIZATION_METHOD" : "hybrid"
- }
+ busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, {
+ "retain-lf-object": TRUE,
+ terms.run_options.optimization_settings :
+ {
+ "OPTIMIZATION_METHOD" : "hybrid",
+ //"OPTIMIZATION_PRECISION" : 1.
+ }
- });
-
+ });
+} else {
+ ExecuteAFile (debug.checkpoint);
+ GetString (lf, LikelihoodFunction, 0);
+ busted.full_model = estimators.ExtractMLEs (lf, busted.model_object_map);
+ busted.full_model[terms.likelihood_function] = lf;
+}
KeywordArgument ("save-fit", "Save BUSTED model fit to this file (default is not to save)", "/dev/null");
@@ -464,6 +472,7 @@ io.SpoolLFToPath(busted.full_model[terms.likelihood_function], io.PromptUserForF
io.ReportProgressMessageMD("BUSTED", "main", "* " + selection.io.report_fit (busted.full_model, 9, busted.codon_data_info[terms.data.sample_size]));
+//VERBOSITY_LEVEL = 101;
io.ReportProgressMessageMD("BUSTED", "main", "* For *test* branches, the following rate distribution for branch-site combinations was inferred");
=====================================
res/TemplateBatchFiles/SelectionAnalyses/PRIME.bf
=====================================
@@ -21,6 +21,9 @@ LoadFunctionLibrary("modules/io_functions.ibf");
LoadFunctionLibrary("modules/selection_lib.ibf");
+
+
+
debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");
/*------------------------------------------------------------------------------
Display analysis information
@@ -103,7 +106,11 @@ This table is meant for HTML rendering in the results web-app; can use HTML char
is 'pop-over' explanation of terms. This is ONLY saved to the JSON file. For Markdown screen output see
the next set of variables.
*/
-prime.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "Property Description", "Importance", "p-value"}};
+
+prime.site.composition.string = "";
+
+
+prime.table_screen_output = {{"Codon", "Partition", "alpha", "beta", "Property Description", "Importance", "p-value","Most common codon substitutions at this site"}};
prime.table_output_options = { terms.table_options.header : TRUE,
terms.table_options.minimum_column_width: 12,
terms.table_options.align : "center",
@@ -112,9 +119,10 @@ prime.table_output_options = { terms.table_options.header : TRUE,
"1" : 12,
"2" : 12,
"3" : 12,
- "4" : 50,
+ "4" : 40,
"5" : 12,
- "6" : 12}
+ "6" : 12,
+ "7" : 70}
};
@@ -129,6 +137,7 @@ prime.property_set = io.SelectAnOption (
{
"Atchley":"Use the five properties derived from a factor analysis of 500 amino-acid properties [Table 2 in PNAS (2005) 102(18) 6395-6400 doi: 10.1073/pnas.0408677102]",
"LCAP":"Use the five properties defined in the Conant and Stadler LCAP model [Mol Biol Evol (2009) 26 (5): 1155-1161. doi: 10.1093/molbev/msp031]",
+ "Random" : "Random properties (for null hypothesis testing)",
"Custom":"Load the set of properties from a file"
},
"The set of properties to use in the model");
@@ -155,6 +164,7 @@ for (_partition_, _selection_; in; prime.selected_branches) {
}
prime.pairwise_counts = genetic_code.ComputePairwiseDifferencesAndExpectedSites(prime.codon_data_info[terms.code], None);
+prime.codon_table = genetic_code.DefineCodonToAAMapping (prime.codon_data_info[terms.code]);
selection.io.startTimer (prime.json [terms.json.timers], "Model fitting",1);
@@ -296,6 +306,9 @@ prime.lambda_range = {
terms.upper_bound: "15"
};
+
+
+
prime.table_headers = { 6 + 3*prime.properties.N, 2};
prime.table_headers[0][0] = "alpha;"; prime.table_headers[0][1] = "Synonymous substitution rate at a site";
@@ -375,7 +388,8 @@ prime.report.significant_site = {{"" + (1+((prime.filter_specification[prime.rep
Format(prime.report.row[1],7,3), // beta
prime.property_report_name, // property name
prime.rate_to_screen ( prime.property_report_name,,prime.report_rate), // property importance
- Format(prime.report.row[prime.print_index],7,3) // property p-value
+ Format(prime.report.row[prime.print_index],7,3), // property p-value
+ Join ("|", prime.site.composition.string[prime.property_report_name])
}};
@@ -659,6 +673,9 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
}
+ ancestral_info = ancestral.build (lf_prop,0,FALSE);
+ branch_substitution_information = (ancestral.ComputeSubstitutionBySite (ancestral_info,0,None))[^"terms.substitutions"];
+ DeleteObject (ancestral_info);
alternative = estimators.ExtractMLEsOptions (lf_prop, model_mapping, {});
alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
@@ -691,6 +708,7 @@ lfunction prime.handle_a_site (lf_fel, lf_prop, filter_data, partition_index, pa
utility.getGlobalValue("terms.alternative") : alternative,
utility.getGlobalValue("terms.Null"): Null,
utility.getGlobalValue("terms.model.residue_properties") : constrained_models,
+ utility.getGlobalValue("terms.branch_selection_attributes") : branch_substitution_information,
utility.getGlobalValue("terms.prime_imputed_states") : character_map
};
}
@@ -738,6 +756,71 @@ function prime.report.echo (prime.report.site, prime.report.partition, prime.rep
lfunction prime.store_results (node, result, arguments) {
+ sub_profile = result[utility.getGlobalValue("terms.branch_selection_attributes")];
+
+
+
+ if (None != sub_profile) {
+
+ total_sub_count = 0;
+
+
+
+ sub_by_count = {"Overall" : {}};
+
+ for (p,v; in; ^"prime.properties") {
+ sub_by_count[p] = {};
+ }
+
+ for (i, v; in; sub_profile) {
+ for (j, b; in; v) {
+ c = Abs (b);
+ total_sub_count += c;
+ ai = (^"prime.codon_table")[i];
+ aj = (^"prime.codon_table")[j];
+ if (Abs (ai) == 0) {ai = "?";}
+ if (Abs (aj) == 0) {aj = "?";}
+
+
+ for (p, counts; in; sub_by_count) {
+ if ((counts/c)==0) {
+ counts [c] = {};
+ }
+
+ delta = "";
+ if (p != "Overall") {
+ delta = "("+Format (Abs (((^"prime.properties")[p])[ai]-((^"prime.properties")[p])[aj]),0,2) + ")";
+ }
+
+ if (ai != aj) {
+ counts[c] + (((^"prime.codon_table")[i] + ">" + (^"prime.codon_table")[j]) + delta);
+ } else {
+ counts[c] + ai;
+ }
+ }
+ }
+ }
+
+ sub_profile = {};
+ for (p,count; in; sub_by_count) {
+
+ sorted_subs = {Abs (count), 1};
+ j = 0;
+ for (i, v; in; count) {
+ sorted_subs[j] = -(+i);
+ j += 1;
+ }
+ sub_profile1 = {};
+ for (i; in; sorted_subs % 0) {
+ sub_profile1 + ("[" + (-i) + "]" + Join (",",count [-i]));
+ }
+
+ sub_profile [p] = sub_profile1;
+ }
+ ^"prime.site.composition.string" = sub_profile;
+ }
+
+
partition_index = arguments [3];
pattern_info = arguments [4];
=====================================
res/TemplateBatchFiles/SelectionAnalyses/contrast-fel.bf
=====================================
@@ -666,12 +666,34 @@ function fel.apply_proportional_site_constraint (tree_name, node_name, alpha_par
//----------------------------------------------------------------------------------------
lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, model_mapping, branch_sets, parameter_names, permutation) {
- utility.ForEach (utility.Keys (branch_sets), "_node_",
+
+ lengths_by_class = {};
+
+ for (_node_; in; utility.Keys (branch_sets)) {
+ _node_class_ = branch_sets[_node_];
+ _beta_scaler = parameter_names[_node_class_];
+ lengths_by_class[_node_class_] += fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.alpha.scaler, _beta_scaler,
+ (( (^"fel.final_partitioned_mg_results")[^"terms.branch_length"])[partition_index])[_node_]);
+ }
+
+ /*utility.ForEach (utility.Keys (branch_sets), "_node_",
'_node_class_ = (`&branch_sets`)[_node_];
_beta_scaler = (`¶meter_names`)[_node_class_];
fel.apply_proportional_site_constraint ("fel.site_tree", _node_, fel.alpha, fel.beta, fel.alpha.scaler, _beta_scaler,
(( fel.final_partitioned_mg_results[terms.branch_length])[partition_index])[_node_]);
- ');
+ ');*/
+
+ ignorable = {};
+ for (fel.t; in; utility.Values (branch_sets)) {
+ if ( lengths_by_class[fel.t] == 0) {
+ ignorable [parameter_names[fel.t]] = 1;
+ } else {
+ ignorable [parameter_names[fel.t]] = 0;
+ }
+ }
+
+ //console.log (lengths_by_class);
+ //console.log (ignorable);
GetString (lfInfo, ^lf,-1);
ExecuteCommands (filter_data);
@@ -765,13 +787,15 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
^ref_parameter = sum /denominator;
+
+
if (testable == 1) {
parameters.SetConstraint ((^"fel.scaler_parameter_names")[^"terms.tree_attributes.background"],ref_parameter, "");
} else {
for (_gname_; in; ^"fel.branches.testable") {
_pname_ = (^"fel.scaler_parameter_names")[_gname_];
- if (_pname_ != ref_parameter && (^"fel.ignorable")[_pname_] == 0) {
+ if (_pname_ != ref_parameter && (ignorable)[_pname_] == 0) {
parameters.SetConstraint (_pname_,ref_parameter, "");
}
}
@@ -788,8 +812,9 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
v1n = (^"fel.branches.testable")[v];
v2n = (^"fel.branches.testable")[v2];
estimators.RestoreLFStateFromSnapshot (lf_id, snapshot);
+
- if ((^"fel.ignorable")[(^"fel.scaler_parameter_names")[v1n]] || (^"fel.ignorable")[(^"fel.scaler_parameter_names")[v2n]]) {
+ if ((ignorable)[(^"fel.scaler_parameter_names")[v1n]] || (ignorable)[(^"fel.scaler_parameter_names")[v2n]]) {
//console.log (v1n + "|" + v2n + " is ignorable");
(pairwise[v1n + "|" + v2n]) = alternative;
} else {
@@ -811,6 +836,9 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
p_values = {};
p_values ["overall"] = lrt[^'terms.p_value'];
test_keys = {};
+
+ //estimators.RestoreLFStateFromSnapshot (lf_id, snapshot);
+
if (^'fel.report.test_count' > 1) {
@@ -826,7 +854,6 @@ lfunction fel.handle_a_site (lf, filter_data, partition_index, pattern_info, mod
}
}
-
p_values = math.HolmBonferroniCorrection (p_values);
if (permutation == FALSE && (Min (p_values,0))["value"] <= ^'fel.p_value') {
=====================================
src/core/global_things.cpp
=====================================
@@ -121,7 +121,7 @@ namespace hy_global {
kErrorStringDatasetRefIndexError ("Dataset index reference out of range"),
kErrorStringMatrixExportError ("Export matrix called with a non-polynomial matrix argument"),
kErrorStringNullOperand ("Attempting to operate on an undefined value; this is probably the result of an earlier 'soft' error condition"),
- kHyPhyVersion = _String ("2.5.23"),
+ kHyPhyVersion = _String ("2.5.24"),
kNoneToken = "None",
kNullToken = "null",
=====================================
src/core/include/defines.h
=====================================
@@ -336,9 +336,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//TODO 20170413: not sure where to put the conditional includes below
+#ifdef _SLKP_USE_ARM_NEON
+ #include <arm_neon.h>
+#endif
#ifdef _SLKP_USE_SSE_INTRINSICS
-#include <pmmintrin.h>
+ //#include "sse2neon.h"
+ //#else
+ #include <pmmintrin.h>
+ //#endif
#endif
#ifdef _SLKP_USE_AVX_INTRINSICS
=====================================
src/core/include/likefunc.h
=====================================
@@ -614,7 +614,8 @@ protected:
hyFloat smoothingTerm,
smoothingReduction,
- smoothingPenalty;
+ smoothingPenalty,
+ errorTolerance;
#ifdef _SLKP_LFENGINE_REWRITE_
/*
=====================================
src/core/likefunc.cpp
=====================================
@@ -559,8 +559,7 @@ _LikelihoodFunction::_LikelihoodFunction (void) {
//_______________________________________________________________________________________
-void _LikelihoodFunction::Init (void)
-{
+void _LikelihoodFunction::Init (void) {
siteResults = nil;
bySiteResults = nil;
hasBeenSetUp = 0;
@@ -571,6 +570,7 @@ void _LikelihoodFunction::Init (void)
evalsSinceLastSetup = 0;
siteArrayPopulated = false;
smoothingTerm = 0.;
+ errorTolerance = 1.;
smoothingPenalty = 0.;
conditionalInternalNodeLikelihoodCaches = nil;
@@ -3638,7 +3638,8 @@ void _LikelihoodFunction::SetupLFCaches (void) {
#endif
evalsSinceLastSetup = 0L;
-
+ unsigned long maxFilterSize = 1;
+
for (unsigned long i=0UL; i<theTrees.lLength; i++) {
_TheTree * cT = GetIthTree(i);
_DataSetFilter const *theFilter = GetIthFilter(i);
@@ -3663,6 +3664,7 @@ void _LikelihoodFunction::SetupLFCaches (void) {
long ambig_resolution_count = 1L;
+ maxFilterSize = MAX (maxFilterSize, theFilter->GetSiteCountInUnits());
if (leafCount > 1UL) {
conditionalInternalNodeLikelihoodCaches[i] = (hyFloat*)MemAllocate (sizeof(hyFloat)*patternCount*stateSpaceDim*iNodeCount*cT->categoryCount, false, 64);
branchCaches[i] = (hyFloat*)MemAllocate (sizeof(hyFloat)*2*patternCount*stateSpaceDim*cT->categoryCount, false, 64);
@@ -3726,7 +3728,7 @@ void _LikelihoodFunction::SetupLFCaches (void) {
}
free (columnBlock); free (translationCache);
conditionalTerminalNodeLikelihoodCaches < ambigs;
-
+ errorTolerance = MAX (1.,round (log (1.+maxFilterSize)/log (10)));
#ifdef MDSOCL
OCLEval[i].init(patternCount, theFilter->GetDimension(), conditionalInternalNodeLikelihoodCaches[i]);
#endif
@@ -4110,8 +4112,12 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
BenchmarkThreads (this);
#ifdef __HYPHYMPI__
}
+
+
#endif
+ //ObjectToConsole(variables_changed_during_last_compute);
+ //NLToConsole();
bool skipCG = ! CheckEqual (get_optimization_setting (kSkipConjugateGradient, 0.0), 0.0),
keepStartingPoint = ! CheckEqual (get_optimization_setting (kUseLastResults, 0.0), 0.0),
@@ -4128,14 +4134,23 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
}
}
-
+ for (unsigned long i=0UL; i<indexInd.lLength; i++) {
+ if (GetIthIndependentVar(i)->varFlags & HY_VARIABLE_CHANGED) {
+ variables_changed_during_last_compute->InsertNumber (GetIthIndependentVar(i)->get_index());
+ //if (variables_changed_during_last_compute->InsertNumber (GetIthIndependentVar(i)->get_index()) >= 0)
+ // printf ("Insert [before] %s\n",GetIthIndependentName(i)->get_str());
+ }
+ }
+
_SimpleList untag;
if (keepStartingPoint) {
indexInd.Each ([this, &untag] (long v, unsigned long i) -> void {
_Variable *iv = GetIthIndependentVar (i);
+ //printf ("%s = %g, global %d, initialized %d, changed %d\n",GetIthIndependentName(i)->get_str(), GetIthIndependent(i), iv->IsGlobal(), iv->HasBeenInitialized(), iv->IsModified());
if (!iv->IsGlobal() && iv->HasBeenInitialized()) {
if (!iv->IsModified()) {
iv->MarkModified();
+ //printf ("TAG [before] %s\n",GetIthIndependentName(i)->get_str());
untag << i;
}
}
@@ -4148,12 +4163,11 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
if (keepStartingPoint) {
untag.Each ([this] (long v, unsigned long i) -> void {
- _Variable *iv = GetIthIndependentVar (i);
+ _Variable *iv = GetIthIndependentVar (v);
+ //printf ("UNTAG [before] %s\n",GetIthIndependentName(v)->get_str());
iv->ClearModified();
});
- }
-
- if (!keepStartingPoint) {
+ } else {
hyFloat global_starting_point = get_optimization_setting (kGlobalStartingPoint, 0.1);
if (CheckEqual (get_optimization_setting (kRandomStartingPerturbations, 0.0), 0.0)) {
@@ -4182,6 +4196,8 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options)
for (unsigned long i=0UL; i<indexInd.lLength; i++) {
if (GetIthIndependentVar(i)->varFlags & HY_VARIABLE_CHANGED) {
variables_changed_during_last_compute->InsertNumber (GetIthIndependentVar(i)->get_index());
+ //if (variables_changed_during_last_compute->InsertNumber (GetIthIndependentVar(i)->get_index()) >= 0)
+ // printf ("Insert [after] %s\n",GetIthIndependentName(i)->get_str());
}
}
@@ -5629,7 +5645,7 @@ long _LikelihoodFunction::Bracket (long index, hyFloat& left, hyFloat& middle
(or involves a paremeter that has very little effect on the LF), recomputation could be within numerical error
**/
- if (rightValue - middleValue > 1e-9 || leftValue - middleValue > 1e-9) {
+ if (rightValue - middleValue > 1e-12*errorTolerance || leftValue - middleValue > 1e-12*errorTolerance) {
char buf[256], buf2[512];
snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::Bracket (index %ld) recomputed the value to midpoint: L(%g) = %g [@%g -> %g:@%g -> %g]]", index, middle, middleValue, left, leftValue,right, rightValue);
snprintf (buf2, 512, "\n\t[_LikelihoodFunction::Bracket (index %ld) BRACKET %s: %20.16g <= %20.16g >= %20.16g. steps, L=%g, R=%g, values %15.12g : %15.12g - %15.12g]", index, successful ? "SUCCESSFUL" : "FAILED", left,middle,right, leftStep, rightStep, leftValue - middleValue, middleValue, rightValue - middleValue);
@@ -6500,7 +6516,7 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision
}
SetAllIndependent (&bestVal);
- if (maxSoFar < initial_value && CheckEqual(maxSoFar, initial_value, kMachineEpsilon * 100.) == false) {
+ if (maxSoFar < initial_value && CheckEqual(maxSoFar, initial_value, kMachineEpsilon * errorTolerance) == false) {
HandleApplicationError (_String("Internal optimization error in _LikelihoodFunction::ConjugateGradientDescent. Worsened likelihood score from ") & initial_value & " to " & maxSoFar);
}
@@ -6874,7 +6890,7 @@ void _LikelihoodFunction::GradientDescent (hyFloat& gPrecision, _Matrix& best
bestVal = current_best_vector;
}
- if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, 100. * kMachineEpsilon)) {
+ if (maxSoFar < initialValue && !CheckEqual (maxSoFar, initialValue, kMachineEpsilon*errorTolerance)) {
_TerminateAndDump(_String (" _LikelihoodFunction::GradientLocateTheBump: in the Brent loop iteration ") & long(outcome) & ". " & _String (maxSoFar, "%18.16g") & " / " & _String (initialValue,"%18.16g") & ".\n Optimization direction: \n" & _String ((_String*)gradient.toStr()) );
return 0.;
@@ -7081,7 +7097,7 @@ void _LikelihoodFunction::LocateTheBump (long index,hyFloat gPrecision, hyFlo
snprintf (buf, 256, "\n\t[_LikelihoodFunction::LocateTheBump (index %ld) RESETTING THE VALUE (worse log likelihood obtained; current value %20.16g, best value %20.16g) ]\n\n", index, GetIthIndependent(index), bestVal);
BufferToConsole (buf);
}
- if (CheckEqual(GetIthIndependent(index), bestVal) && fabs (middleValue-maxSoFar) > 1e-9) {
+ if (CheckEqual(GetIthIndependent(index), bestVal) && fabs (middleValue-maxSoFar) > 1e-12*errorTolerance) {
char buf[256];
snprintf (buf, 256, " \n\tERROR: [_LikelihoodFunction::LocateTheBump (index %ld) current value %20.16g (parameter = %20.16g), best value %20.16g (parameter = %20.16g)); delta = %20.16g ]\n\n", index, middleValue, GetIthIndependent(index), maxSoFar, bestVal, maxSoFar - middleValue);
_TerminateAndDump (_String (buf) & "\n" & "\nParameter name " & *GetIthIndependentName(index));
=====================================
src/core/matrix.cpp
=====================================
@@ -1895,7 +1895,7 @@ bool _Matrix::AmISparseFast (_Matrix& whereTo) {
long * non_zero_index = (long*)alloca (threshold*sizeof(long));
-#ifdef _SLKP_USE_AVX_INTRINSICS
+#if defined _SLKP_USE_AVX_INTRINSICS
__m256d zeros = _mm256_setzero_pd();
long lDimMOD4 = lDim >> 2 << 2;
for (long i = 0; i < lDimMOD4; i+=4) {
@@ -1917,6 +1917,27 @@ bool _Matrix::AmISparseFast (_Matrix& whereTo) {
}
}
}
+#elif _SLKP_USE_ARM_NEON
+ float64x2_t zeros = vdupq_n_f64 (0.);
+ long lDimMOD2 = lDim >> 1 << 1;
+ for (long i = 0; i < lDimMOD2; i+=2) {
+
+ uint64x2_t res = vceqq_f64 (vld1q_f64 (theData+i), zeros);
+ if (vaddvq_u64 (res)) { // something is different
+ if (vgetq_lane_u64 (res,0)) { non_zero_index[k++] = i; if (k == threshold) break; };
+ if (vgetq_lane_u64 (res,1)) { non_zero_index[k++] = i+1; if (k == threshold) break; };
+ }
+ }
+
+ if (k < threshold)
+ for (long i = lDimMOD2; i < lDim; i++) {
+ if (theData[i] != 0.0) {
+ non_zero_index[k++] = i;
+ if (k == threshold) {
+ return false;
+ }
+ }
+ }
#else
for (long i = 0; i < lDim; i++) {
if (theData[i] != 0.0) {
@@ -3357,8 +3378,8 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract
if (subtract) {
#ifdef _SLKP_USE_AVX_INTRINSICS
- #define CELL_OP1(x,y) __m256d y = _mm256_sub_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x))
-#define CELL_OP2(x,y) _mm256_storeu_pd (stData+x,y)
+ #define CELL_OP1(x,y) __m256d y = _mm256_sub_pd (_mm256_loadu_pd (stData+x), _mm256_loadu_pd (argData+x))
+ #define CELL_OP2(x,y) _mm256_storeu_pd (stData+x,y)
#pragma GCC unroll 4
#pragma clang loop vectorize(enable)
@@ -3376,6 +3397,26 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract
CELL_OP2 (idx+8,r3);
CELL_OP2 (idx+12,r4);
}
+#elif defined _SLKP_USE_ARM_NEON
+ #define CELL_OP1(x,y) float64x2_t y = vsubq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
+ #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
+ #pragma GCC unroll 4
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop interleave(enable)
+ #pragma clang loop unroll(enable)
+ #pragma GCC ivdep
+ #pragma ivdep
+ for (long idx = 0; idx < upto; idx+=8) {
+ CELL_OP1 (idx,r1);
+ CELL_OP1 (idx+2,r2);
+ CELL_OP1 (idx+4,r3);
+ CELL_OP1 (idx+6,r4);
+ CELL_OP2 (idx,r1);
+ CELL_OP2 (idx+2,r2);
+ CELL_OP2 (idx+4,r3);
+ CELL_OP2 (idx+6,r4);
+ }
+
#else
for (long idx = 0; idx < upto; idx+=4) {
stData[idx]-=argData[idx];
@@ -3400,7 +3441,27 @@ void _Matrix::AddMatrix (_Matrix& storage, _Matrix& secondArg, bool subtract
CELL_OP (idx+12);
}
- #else
+#elif defined _SLKP_USE_ARM_NEON
+ #define CELL_OP1(x,y) float64x2_t y = vaddq_f64 (vld1q_f64 (stData+x), vld1q_f64 (argData+x))
+ #define CELL_OP2(x,y) vst1q_f64 (stData+x,y)
+ #pragma GCC unroll 4
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop interleave(enable)
+ #pragma clang loop unroll(enable)
+ #pragma GCC ivdep
+ #pragma ivdep
+ for (long idx = 0; idx < upto; idx+=8) {
+ CELL_OP1 (idx,r1);
+ CELL_OP1 (idx+2,r2);
+ CELL_OP1 (idx+4,r3);
+ CELL_OP1 (idx+6,r4);
+ CELL_OP2 (idx,r1);
+ CELL_OP2 (idx+2,r2);
+ CELL_OP2 (idx+4,r3);
+ CELL_OP2 (idx+6,r4);
+ }
+
+#else
for (long idx = 0; idx < upto; idx+=4) {
stData[idx]+=argData[idx];
stData[idx+1]+=argData[idx+1];
@@ -3630,6 +3691,21 @@ void _Matrix::Multiply (_Matrix& storage, hyFloat c)
for (; k < lDim; k++) {
destination[k] = source[k]*c;
}
+ #elif defined _SLKP_USE_ARM_NEON
+ #define CELL_OP(k) vst1q_f64 (destination + k, vmulq_f64(value_op, vld1q_f64 (source+k)))
+ long lDimM8 = lDim >> 3 << 3,
+ k = 0;
+
+ float64x2_t value_op = vdupq_n_f64 (c);
+ for (k = 0L; k < lDimM8; k+=8) {
+ CELL_OP (k);
+ CELL_OP (k+2);
+ CELL_OP (k+4);
+ CELL_OP (k+6);
+ }
+ for (; k < lDim; k++) {
+ destination[k] = source[k]*c;
+ }
#else
for (long k = 0L; k < lDim; k++) {
destination[k] = source[k]*c;
@@ -3730,11 +3806,14 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#elif _SLKP_USE_SSE_INTRINSICS
#define DO_GROUP_OP1(X,Y,k) X = _mm_loadu_pd(dest + row_offset + k); Y = _mm_loadu_pd(secondArg.theData + col_offset + k); X = _mm_add_pd (X, _mm_mul_pd(A4, Y));
#define DO_GROUP_OP2(X,k) _mm_storeu_pd (dest + row_offset + k,X);
+
+#elif _SLKP_USE_ARM_NEON
+ #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64 (dest + row_offset + k); Y = vld1q_f64 (secondArg.theData + col_offset + k); X = vfmaq_f64 (X, A4, Y);
+ #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
#endif
#ifndef _SLKP_SSE_VECTORIZATION_
-
if (dimm4 == vDim) {
#if defined _SLKP_USE_AVX_INTRINSICS
@@ -3818,7 +3897,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
}
return;
#elif _SLKP_USE_SSE_INTRINSICS
- memset (dest, 0, lDim * sizeof (hyFloat));
+ memset (dest, 0, lDim * sizeof (hyFloat));
long ti = 0L,
row_offset = 0L;
@@ -3869,6 +3948,58 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
}
}
return;
+ #elif _SLKP_USE_ARM_NEON
+ memset (dest, 0, lDim * sizeof (hyFloat));
+ long ti = 0L,
+ row_offset = 0L;
+
+ if (vDim == 20UL) {
+ for (long r = 0; r < 20; r++, row_offset += 20) {
+ long col_offset = 0L;
+ for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
+ float64x2_t A4 = vdupq_n_f64(theData[ti]);
+ //for (long k = 0; k < 20L; k+=4) {
+ float64x2_t D4, B4, D4_1, B4_1, D4_2, B4_2, D4_3, B4_3, D4_4, B4_4;
+ DO_GROUP_OP1 (D4, B4, 0);
+ DO_GROUP_OP1 (D4_1, B4_1, 2);
+ DO_GROUP_OP1 (D4_2, B4_2, 4);
+ DO_GROUP_OP1 (D4_3, B4_3, 6);
+ DO_GROUP_OP1 (D4_4, B4_4, 8);
+ DO_GROUP_OP2 (D4, 0);
+ DO_GROUP_OP2 (D4_1, 2);
+ DO_GROUP_OP2 (D4_2, 4);
+ DO_GROUP_OP2 (D4_3, 6);
+ DO_GROUP_OP2 (D4_4, 8);
+ DO_GROUP_OP1 (D4, B4, 10);
+ DO_GROUP_OP1 (D4_1, B4_1, 12);
+ DO_GROUP_OP1 (D4_2, B4_2, 14);
+ DO_GROUP_OP1 (D4_3, B4_3, 16);
+ DO_GROUP_OP1 (D4_4, B4_4, 18);
+ DO_GROUP_OP2 (D4, 10);
+ DO_GROUP_OP2 (D4_1, 12);
+ DO_GROUP_OP2 (D4_2, 14);
+ DO_GROUP_OP2 (D4_3, 16);
+ DO_GROUP_OP2 (D4_4, 18);
+ //}
+ }
+ }
+ } else
+ for (long r = 0; r < vDim; r++, row_offset += vDim) {
+ long col_offset = 0L;
+ for (long c = 0; c < vDim; c++, ti++, col_offset += vDim) {
+ float64x2_t A4 = vdupq_n_f64(theData[ti]);
+ #pragma GCC unroll 4
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop interleave(enable)
+ #pragma clang loop unroll(enable)
+ for (long k = 0; k < vDim; k+=2) {
+ float64x2_t D4, B4;
+ DO_GROUP_OP1 (D4, B4, k);
+ DO_GROUP_OP2 (D4, k);
+ }
+ }
+ }
+ return;
#endif
memset (dest, 0, lDim * sizeof (hyFloat));
@@ -3970,7 +4101,6 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
if (dimm4 == 60UL) { // codons
-
for (long r = 0; r < hDim; r++, row_offset += vDim) {
long col_offset = 0L;
for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
@@ -4147,7 +4277,116 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
}
}
}
- #else
+ #elif _SLKP_USE_ARM_NEON
+ long ti = 0L,
+ row_offset = 0L;
+
+ if (dimm4 == 60UL) { // codons
+
+
+ for (long r = 0; r < hDim; r++, row_offset += vDim) {
+ long col_offset = 0L;
+ for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
+ float64x2_t A4 = vdupq_n_f64(theData[ti]);
+
+ float64x2_t D4_1, D4_2, D4_3, D4_4;
+ float64x2_t B4_1, B4_2, B4_3, B4_4;
+
+ DO_GROUP_OP1 (D4_1,B4_1,0);
+ DO_GROUP_OP1 (D4_2,B4_2,2);
+ DO_GROUP_OP1 (D4_3,B4_3,4);
+ DO_GROUP_OP1 (D4_4,B4_4,6);
+ DO_GROUP_OP2 (D4_1,0);
+ DO_GROUP_OP2 (D4_2,2);
+ DO_GROUP_OP2 (D4_3,4);
+ DO_GROUP_OP2 (D4_4,6);
+
+ DO_GROUP_OP1 (D4_1,B4_1,8);
+ DO_GROUP_OP1 (D4_2,B4_2,10);
+ DO_GROUP_OP1 (D4_3,B4_3,12);
+ DO_GROUP_OP1 (D4_4,B4_4,14);
+ DO_GROUP_OP2 (D4_1,8);
+ DO_GROUP_OP2 (D4_2,10);
+ DO_GROUP_OP2 (D4_3,12);
+ DO_GROUP_OP2 (D4_4,14);
+
+ DO_GROUP_OP1 (D4_1,B4_1,16);
+ DO_GROUP_OP1 (D4_2,B4_2,18);
+ DO_GROUP_OP1 (D4_3,B4_3,20);
+ DO_GROUP_OP1 (D4_4,B4_4,22);
+ DO_GROUP_OP2 (D4_1,16);
+ DO_GROUP_OP2 (D4_2,18);
+ DO_GROUP_OP2 (D4_3,20);
+ DO_GROUP_OP2 (D4_4,22);
+
+ DO_GROUP_OP1 (D4_1,B4_1,24);
+ DO_GROUP_OP1 (D4_2,B4_2,26);
+ DO_GROUP_OP1 (D4_3,B4_3,28);
+ DO_GROUP_OP1 (D4_4,B4_4,30);
+ DO_GROUP_OP2 (D4_1,24);
+ DO_GROUP_OP2 (D4_2,26);
+ DO_GROUP_OP2 (D4_3,28);
+ DO_GROUP_OP2 (D4_4,30);
+
+ DO_GROUP_OP1 (D4_1,B4_1,32);
+ DO_GROUP_OP1 (D4_2,B4_2,34);
+ DO_GROUP_OP1 (D4_3,B4_3,36);
+ DO_GROUP_OP1 (D4_4,B4_4,38);
+ DO_GROUP_OP2 (D4_1,32);
+ DO_GROUP_OP2 (D4_2,34);
+ DO_GROUP_OP2 (D4_3,36);
+ DO_GROUP_OP2 (D4_4,38);
+
+ DO_GROUP_OP1 (D4_1,B4_1,40);
+ DO_GROUP_OP1 (D4_2,B4_2,42);
+ DO_GROUP_OP1 (D4_3,B4_3,44);
+ DO_GROUP_OP1 (D4_4,B4_4,46);
+ DO_GROUP_OP2 (D4_1,40);
+ DO_GROUP_OP2 (D4_2,42);
+ DO_GROUP_OP2 (D4_3,44);
+ DO_GROUP_OP2 (D4_4,46);
+
+ DO_GROUP_OP1 (D4_1,B4_1,48);
+ DO_GROUP_OP1 (D4_2,B4_2,50);
+ DO_GROUP_OP1 (D4_3,B4_3,52);
+ DO_GROUP_OP1 (D4_4,B4_4,54);
+ DO_GROUP_OP2 (D4_1,48);
+ DO_GROUP_OP2 (D4_2,50);
+ DO_GROUP_OP2 (D4_3,52);
+ DO_GROUP_OP2 (D4_4,54);
+
+ DO_GROUP_OP1 (D4_1,B4_1,56);
+ DO_GROUP_OP1 (D4_2,B4_2,58);
+ DO_GROUP_OP2 (D4_1,56);
+ DO_GROUP_OP2 (D4_2,58);
+
+ for (long k = dimm4; k < vDim; k++) {
+ dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
+ }
+ }
+ }
+ } else { // something else
+ for (long r = 0; r < hDim; r++, row_offset += vDim) {
+ long col_offset = 0L;
+ for (long c = 0; c < hDim; c++, ti++, col_offset += vDim) {
+ float64x2_t A4 = vdupq_n_f64(theData[ti]);
+ #pragma GCC unroll 4
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop interleave(enable)
+ #pragma clang loop unroll(enable)
+ for (long k = 0; k < dimm4; k+=2) {
+ float64x2_t D4, B4;
+ DO_GROUP_OP1 (D4, B4, k);
+ DO_GROUP_OP2 (D4, k);
+ }
+
+ for (long k = dimm4; k < vDim; k++) {
+ dest[row_offset + k] += theData[ti] * secondArg.theData[col_offset + k];
+ }
+ }
+ }
+ }
+ #else
const unsigned long
column_shift2 = secondArg.vDim << 1,
column_shift3 = (secondArg.vDim << 1) + secondArg.vDim,
@@ -4185,7 +4424,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#endif
}
-#else
+ #else
secondArg.Transpose();
for (long i=0; i<hDim; i++, row += vDim) {
for (long j=0; j<hDim; j++) {
@@ -4340,7 +4579,19 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
CELL_OP(48);CELL_OP(52);CELL_OP(56);
+ #elif _SLKP_USE_ARM_NEON
+ float64x2_t value_op = vdupq_n_f64 (value);
+ #define CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
+
+ CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
+ CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
+ CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
+ CELL_OP(24);CELL_OP(26);CELL_OP(28);
+ CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
+ CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
+ CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
+ CELL_OP(56);CELL_OP(58);
#else
for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
res[i] += value * secArg[i];
@@ -4359,6 +4610,7 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
} else {
+
for (unsigned long k=0UL; k<lDim; k++) { // loop over entries in the sparse matrix
long m = theIndex[k];
if (m >= 0L) {
@@ -4380,13 +4632,26 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
CELL_OP(16);CELL_OP(20);CELL_OP(24);CELL_OP(28);
CELL_OP(32);CELL_OP(36);CELL_OP(40);CELL_OP(44);
CELL_OP(48);CELL_OP(52);CELL_OP(56);
+ #elif _SLKP_USE_ARM_NEON
+ float64x2_t value_op = vdupq_n_f64 (value);
+ #define CELL_OP(x) vst1q_f64 (res+x, vfmaq_f64 (vld1q_f64(res+x), value_op, vld1q_f64 (secArg+x)))
+
+
+ CELL_OP(0);CELL_OP(2);CELL_OP(4);CELL_OP(6);
+ CELL_OP(8);CELL_OP(10);CELL_OP(12);CELL_OP(14);
+ CELL_OP(16);CELL_OP(18);CELL_OP(20);CELL_OP(22);
+ CELL_OP(24);CELL_OP(26);CELL_OP(28);
+ CELL_OP(30);CELL_OP(32);CELL_OP(34);CELL_OP(36);CELL_OP(38);
+ CELL_OP(40);CELL_OP(42);CELL_OP(44);CELL_OP(46);
+ CELL_OP(48);CELL_OP(50);CELL_OP(52);CELL_OP(54);
+ CELL_OP(56);CELL_OP(58);
#else
- for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
- res[i] += value * secArg[i];
- res[i+1] += value * secArg[i+1];
- res[i+2] += value * secArg[i+2];
- res[i+3] += value * secArg[i+3];
- }
+ for (unsigned long i = 0UL; i < 60UL; i+=4UL) {
+ res[i] += value * secArg[i];
+ res[i+1] += value * secArg[i+1];
+ res[i+2] += value * secArg[i+2];
+ res[i+3] += value * secArg[i+3];
+ }
#endif
res[60] += value * secArg[60];
}
@@ -4410,13 +4675,19 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#ifdef _SLKP_USE_AVX_INTRINSICS
__m256d value_op = _mm256_set1_pd (value);
#endif
+ #ifdef _SLKP_USE_ARM_NEON
+ float64x2_t value_op = vdupq_n_f64 (value);
+ #endif
for (unsigned long i = 0UL; i < loopBound; i+=4) {
#ifdef _SLKP_USE_AVX_INTRINSICS
- #ifdef _SLKP_USE_FMA3_INTRINSICS
- _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
- #else
- _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
- #endif
+ #ifdef _SLKP_USE_FMA3_INTRINSICS
+ _mm256_storeu_pd (res+i, _mm256_fmadd_pd (value_op, _mm256_loadu_pd (secArg+i),_mm256_loadu_pd(res+i)));
+ #else
+ _mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
+ #endif
+ #elif defined _SLKP_USE_ARM_NEON
+ vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
+ vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
#else
res[i] += value * secArg[i];
res[i+1] += value * secArg[i+1];
@@ -4446,6 +4717,9 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#ifdef _SLKP_USE_AVX_INTRINSICS
__m256d value_op = _mm256_set1_pd (value);
#endif
+ #ifdef _SLKP_USE_ARM_NEON
+ float64x2_t value_op = vdupq_n_f64 (value);
+ #endif
for (unsigned long i = 0UL; i < loopBound; i+=4) {
#ifdef _SLKP_USE_AVX_INTRINSICS
@@ -4454,12 +4728,16 @@ void _Matrix::Multiply (_Matrix& storage, _Matrix const& secondArg) const
#else
_mm256_storeu_pd (res+i, _mm256_add_pd (_mm256_loadu_pd(res+i), _mm256_mul_pd(value_op, _mm256_loadu_pd (secArg+i))));
#endif
+ #elif defined _SLKP_USE_ARM_NEON
+ vst1q_f64 (res+i, vfmaq_f64 (vld1q_f64(res+i), value_op, vld1q_f64 (secArg+i)));
+ vst1q_f64 (res+i+2, vfmaq_f64 (vld1q_f64(res+i+2), value_op, vld1q_f64 (secArg+i+2)));
#else
res[i] += value * secArg[i];
res[i+1] += value * secArg[i+1];
res[i+2] += value * secArg[i+2];
res[i+3] += value * secArg[i+3];
#endif
+
}
for (unsigned long i = loopBound; i < vDim; i++) {
res[i] += value * secArg[i];
@@ -6547,9 +6825,8 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
Swap(temp);
return DBL_EPSILON * 1.e4;
} else {
- if (hDim==4)
+ if (hDim==4) {
// special case for nucleotides
- {
for (unsigned long i=0UL, k = 0UL; i<16; i+=4) {
for (unsigned long j=0UL; j<4UL; j++, k++) {
hyFloat p1 = theData[i] * theData [j];
@@ -6564,6 +6841,123 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
//vDim - vDim % 4;
// loop interchange rocks!
+#ifdef _SLKP_USE_ARM_NEON
+ #define DO_GROUP_OP0(X,Y,k) Y = vld1q_f64(theData + col_offset + k); X = vmulq_f64(A4, Y);
+ #define DO_GROUP_OP1(X,Y,k) X = vld1q_f64(dest + row_offset + k); Y = vld1q_f64(theData + col_offset + k); X = vfmaq_f64 (X,A4,Y);
+ #define DO_GROUP_OP2(X,k) vst1q_f64 (dest + row_offset + k,X);
+
+ if (true) {
+ hyFloat * _hprestrict_ dest = stash;
+
+ long ti = 0L,
+ row_offset = 0L;
+
+ if (loopBound == 60UL) {
+ for (long r = 0; r < hDim; r++, row_offset += vDim) {
+ long col_offset = 0L;
+ { // row 1
+ float64x2_t A4 = vdupq_n_f64 (theData[ti]);
+ float64x2_t X1, X2, X3, X4,
+ Y1, Y2, Y3, Y4;
+
+ DO_GROUP_OP0 (X1, Y1, 0); DO_GROUP_OP0 (X2, Y2, 2); DO_GROUP_OP0 (X3, Y3, 4); DO_GROUP_OP0 (X4, Y4, 6);
+ DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
+ DO_GROUP_OP0 (X1, Y1, 8); DO_GROUP_OP0 (X2, Y2, 10); DO_GROUP_OP0 (X3, Y3, 12); DO_GROUP_OP0 (X4, Y4, 14);
+ DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
+ DO_GROUP_OP0 (X1, Y1, 16); DO_GROUP_OP0 (X2, Y2, 18); DO_GROUP_OP0 (X3, Y3, 20); DO_GROUP_OP0 (X4, Y4, 22);
+ DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
+ DO_GROUP_OP0 (X1, Y1, 24); DO_GROUP_OP0 (X2, Y2, 26); DO_GROUP_OP0 (X3, Y3, 28); DO_GROUP_OP0 (X4, Y4, 30);
+ DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
+ DO_GROUP_OP0 (X1, Y1, 32); DO_GROUP_OP0 (X2, Y2, 34); DO_GROUP_OP0 (X3, Y3, 36); DO_GROUP_OP0 (X4, Y4, 38);
+ DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
+ DO_GROUP_OP0 (X1, Y1, 40); DO_GROUP_OP0 (X2, Y2, 42); DO_GROUP_OP0 (X3, Y3, 44); DO_GROUP_OP0 (X4, Y4, 46);
+ DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
+ DO_GROUP_OP0 (X1, Y1, 48); DO_GROUP_OP0 (X2, Y2, 50); DO_GROUP_OP0 (X3, Y3, 52); DO_GROUP_OP0 (X4, Y4, 54);
+ DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
+ DO_GROUP_OP0 (X1, Y1, 56); DO_GROUP_OP0 (X2, Y2, 58);
+ DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
+
+
+ for (long k = 60; k < vDim; k++) {
+ dest[row_offset + k] = theData[ti] * theData[k];
+ }
+ col_offset = vDim;
+ ti++;
+ }
+
+ for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
+ float64x2_t A4 = vdupq_n_f64 (theData[ti]);
+ float64x2_t X1, X2, X3, X4,
+ Y1, Y2, Y3, Y4;
+
+ DO_GROUP_OP1 (X1, Y1, 0); DO_GROUP_OP1 (X2, Y2, 2); DO_GROUP_OP1 (X3, Y3, 4); DO_GROUP_OP1 (X4, Y4, 6);
+ DO_GROUP_OP2 (X1,0); DO_GROUP_OP2 (X2,2); DO_GROUP_OP2 (X3,4); DO_GROUP_OP2 (X4,6);
+ DO_GROUP_OP1 (X1, Y1, 8); DO_GROUP_OP1 (X2, Y2, 10); DO_GROUP_OP1 (X3, Y3, 12); DO_GROUP_OP1 (X4, Y4, 14);
+ DO_GROUP_OP2 (X1,8); DO_GROUP_OP2 (X2,10); DO_GROUP_OP2 (X3,12); DO_GROUP_OP2 (X4,14);
+ DO_GROUP_OP1 (X1, Y1, 16); DO_GROUP_OP1 (X2, Y2, 18); DO_GROUP_OP1 (X3, Y3, 20); DO_GROUP_OP1 (X4, Y4, 22);
+ DO_GROUP_OP2 (X1,16); DO_GROUP_OP2 (X2,18); DO_GROUP_OP2 (X3,20); DO_GROUP_OP2 (X4,22);
+ DO_GROUP_OP1 (X1, Y1, 24); DO_GROUP_OP1 (X2, Y2, 26); DO_GROUP_OP1 (X3, Y3, 28); DO_GROUP_OP1 (X4, Y4, 30);
+ DO_GROUP_OP2 (X1,24); DO_GROUP_OP2 (X2,26); DO_GROUP_OP2 (X3,28); DO_GROUP_OP2 (X4,30);
+ DO_GROUP_OP1 (X1, Y1, 32); DO_GROUP_OP1 (X2, Y2, 34); DO_GROUP_OP1 (X3, Y3, 36); DO_GROUP_OP1 (X4, Y4, 38);
+ DO_GROUP_OP2 (X1,32); DO_GROUP_OP2 (X2,34); DO_GROUP_OP2 (X3,36); DO_GROUP_OP2 (X4,38);
+ DO_GROUP_OP1 (X1, Y1, 40); DO_GROUP_OP1 (X2, Y2, 42); DO_GROUP_OP1 (X3, Y3, 44); DO_GROUP_OP1 (X4, Y4, 46);
+ DO_GROUP_OP2 (X1,40); DO_GROUP_OP2 (X2,42); DO_GROUP_OP2 (X3,44); DO_GROUP_OP2 (X4,46);
+ DO_GROUP_OP1 (X1, Y1, 48); DO_GROUP_OP1 (X2, Y2, 50); DO_GROUP_OP1 (X3, Y3, 52); DO_GROUP_OP1 (X4, Y4, 54);
+ DO_GROUP_OP2 (X1,48); DO_GROUP_OP2 (X2,50); DO_GROUP_OP2 (X3,52); DO_GROUP_OP2 (X4,54);
+ DO_GROUP_OP1 (X1, Y1, 56); DO_GROUP_OP1 (X2, Y2, 58);
+ DO_GROUP_OP2 (X1,56); DO_GROUP_OP2 (X2,58);
+
+ for (long k = 60; k < vDim; k++) {
+ dest[row_offset + k] += theData[ti] * theData[col_offset + k];
+ }
+ }
+ }
+ } else {
+ for (long r = 0; r < hDim; r++, row_offset += vDim) {
+ long col_offset = 0L;
+ { // row 1
+ float64x2_t A4 = vdupq_n_f64 (theData[ti]);
+ #pragma GCC unroll 4
+ #pragma clang loop vectorize(enable)
+ #pragma clang loop interleave(enable)
+ #pragma clang loop unroll(enable)
+ #pragma GCC ivdep
+ #pragma ivdep
+ for (long k = 0; k < loopBound; k+=4) {
+ float64x2_t D4, B4, X1, X2;
+ DO_GROUP_OP0 (D4, B4, k);
+ DO_GROUP_OP0 (X1, X2, k+2);
+ DO_GROUP_OP2 (D4,k);
+ DO_GROUP_OP2 (X1,k+2);
+ }
+
+ for (long k = loopBound; k < vDim; k++) {
+ dest[row_offset + k] = theData[ti] * theData[k];
+ }
+ col_offset = vDim;
+ ti++;
+ }
+
+ for (long c = 1; c < hDim; c++, ti++, col_offset += vDim) {
+ float64x2_t A4 = vdupq_n_f64 (theData[ti]);
+ for (long k = 0; k < loopBound; k+=4) {
+ float64x2_t D4, B4, X1, X2;
+ DO_GROUP_OP1 (D4, B4, k);
+ DO_GROUP_OP1 (X1, X2, k+2);
+ DO_GROUP_OP2 (D4,k);
+ DO_GROUP_OP2 (X1,k+2);
+ }
+
+ for (long k = loopBound; k < vDim; k++) {
+ dest[row_offset + k] += theData[ti] * theData[col_offset + k];
+ }
+ }
+ }
+ }
+ } else {
+
+#endif
+
#ifdef _SLKP_USE_AVX_INTRINSICS
#define DO_GROUP_OP0(X,Y,k) Y = _mm256_loadu_pd(theData + col_offset + k); X = _mm256_mul_pd(A4, Y);
#ifdef _SLKP_USE_FMA3_INTRINSICS
@@ -6906,7 +7300,9 @@ hyFloat _Matrix::Sqr (hyFloat* _hprestrict_ stash) {
#endif
}
}
-
+#ifdef _SLKP_USE_ARM_NEON
+ }
+#endif
long lDimmod4 = (lDim >> 2) << 2;
hyFloat diffs[4] = {0.0,0.0,0.0,0.0};
=====================================
src/core/tree_evaluator.cpp
=====================================
@@ -122,6 +122,11 @@ inline double _sse_sum_2 (__m128d const & x) {
}
#endif
+#ifdef _SLKP_USE_ARM_NEON
+inline double _neon_sum_2 (float64x2_t const & x) {
+ return vgetq_lane_f64 (x,0) + vgetq_lane_f64 (x,1);
+}
+#endif
template<long D> inline void __ll_handle_matrix_transpose (hyFloat const * __restrict transitionMatrix, hyFloat * __restrict tMatrixT) {
long i = 0L;
@@ -268,6 +273,40 @@ template<long D, long S> inline void __ll_handle_block10_product_sum_linear (hyF
}
#endif
+#ifdef _SLKP_USE_ARM_NEON
+template<long D, long S> inline void __ll_handle_block10_product_sum_linear (hyFloat const* _hprestrict_ tT, hyFloat * _hprestrict_ childVector, float64x2_t * lastTotal) {
+
+ float64x2_t T [5] = {
+ vld1q_f64 (tT+S),
+ vld1q_f64 (tT+S+2),
+ vld1q_f64 (tT+S+4),
+ vld1q_f64 (tT+S+6),
+ vld1q_f64 (tT+S+8)
+ };
+
+ float64x2_t C [5] = {
+ vld1q_f64 (childVector+S),
+ vld1q_f64 (childVector+S+2),
+ vld1q_f64 (childVector+S+4),
+ vld1q_f64 (childVector+S+6),
+ vld1q_f64 (childVector+S+8)
+ };
+
+ if (S) {
+ T[0] = vfmaq_f64 (vmulq_f64 (T[1], C[1]), T[0], C[0]); //0+1
+ T[2] = vfmaq_f64 (vmulq_f64 (T[3], C[3]), T[2], C[2]); //2+3
+ T[3] = vfmaq_f64 (T[0], T[4], C[4]); //0+1+4
+ T[0] = vaddq_f64 (T[3], T[2]); // 0+1+2+3+4
+ *lastTotal = vaddq_f64 (*lastTotal, T[0]);
+ } else {
+ T[0] = vfmaq_f64 (vmulq_f64 (T[1], C[1]), T[0], C[0]); //0+1
+ T[2] = vfmaq_f64 (vmulq_f64 (T[3], C[3]), T[2], C[2]); //2+3
+ T[3] = vfmaq_f64 (T[0], T[4], C[4]); //0+1+4
+ *lastTotal = vaddq_f64 (T[3], T[2]); // 0+1+2+3+4
+ }
+}
+#endif
+
#ifdef _SLKP_USE_AVX_INTRINSICS
template<long D, long S> inline void __ll_handle_block20_product_sum_linear (hyFloat const* _hprestrict_ tT, hyFloat * _hprestrict_ childVector, __m256d * lastTotal) {
@@ -374,9 +413,9 @@ template<long D, long S> inline __m128d __ll_handle_block10_product_sum (hyFloat
_mm_storeu_pd(parentConditionals+S+8, P[4]);
P[0] =_mm_add_pd (P[0],P[1]);
- P[1] =_mm_add_pd (P[2],P[3]);
+ P[2] =_mm_add_pd (P[2],P[3]);
P[0] =_mm_add_pd (P[0],P[4]);
- P[2] =_mm_add_pd (P[0],P[1]);
+ P[2] =_mm_add_pd (P[0],P[2]);
if (grandTotal) {
*grandTotal = _mm_add_pd (P[2],*grandTotal);
return *grandTotal;
@@ -386,6 +425,57 @@ template<long D, long S> inline __m128d __ll_handle_block10_product_sum (hyFloat
}
#endif
+#ifdef _SLKP_USE_ARM_NEON
+template<long D, long S> inline float64x2_t __ll_handle_block10_product_sum (hyFloat const* _hprestrict_ transposedMatrix, hyFloat * _hprestrict_ childVector, hyFloat * _hprestrict_ parentConditionals, float64x2_t * grandTotal) {
+
+ float64x2_t P [5] = {
+ vdupq_n_f64(0.),
+ vdupq_n_f64(0.),
+ vdupq_n_f64(0.),
+ vdupq_n_f64(0.),
+ vdupq_n_f64(0.)
+ };
+
+ hyFloat const * __restrict tT = transposedMatrix;
+
+ for (long col_idx = 0; col_idx < D; col_idx++, tT+=D) {
+ if (childVector[col_idx] > 0.) {
+ float64x2_t C = vdupq_n_f64(childVector[col_idx]);
+
+ P[0] = vfmaq_f64 (P[0], vld1q_f64 (tT+S), C);
+ P[1] = vfmaq_f64 (P[1], vld1q_f64 (tT+S+2), C);
+ P[2] = vfmaq_f64 (P[2], vld1q_f64 (tT+S+4), C);
+ P[3] = vfmaq_f64 (P[3], vld1q_f64 (tT+S+6), C);
+ P[4] = vfmaq_f64 (P[4], vld1q_f64 (tT+S+8), C);
+ }
+ }
+
+
+ P[0] = vmulq_f64(vld1q_f64(parentConditionals+S), P[0]);
+ P[1] = vmulq_f64(vld1q_f64(parentConditionals+S+2), P[1]);
+ P[2] = vmulq_f64(vld1q_f64(parentConditionals+S+4), P[2]);
+ P[3] = vmulq_f64(vld1q_f64(parentConditionals+S+6), P[3]);
+ P[4] = vmulq_f64(vld1q_f64(parentConditionals+S+8), P[4]);
+
+ vst1q_f64(parentConditionals+S, P[0]);
+ vst1q_f64(parentConditionals+S+2, P[1]);
+ vst1q_f64(parentConditionals+S+4, P[2]);
+ vst1q_f64(parentConditionals+S+6, P[3]);
+ vst1q_f64(parentConditionals+S+8, P[4]);
+
+ P[0] = vaddq_f64 (P[0],P[1]);
+ P[2] = vaddq_f64 (P[2],P[3]);
+ P[0] = vaddq_f64 (P[0],P[4]);
+ P[2] = vaddq_f64 (P[0],P[2]);
+ if (grandTotal) {
+ *grandTotal = vaddq_f64 (P[2],*grandTotal);
+ return *grandTotal;
+ }
+ return P[2];
+
+}
+#endif
+
#ifdef _SLKP_USE_AVX_INTRINSICS
template<long D, long S> inline __m256d __ll_handle_block20_product_sum (hyFloat const* _hprestrict_ transposedMatrix, hyFloat * _hprestrict_ childVector, hyFloat * _hprestrict_ parentConditionals, __m256d * grandTotal) {
__m256d P [5] = {
@@ -728,11 +818,12 @@ inline void _handle4x4_pruning_case (double const* childVector, double const* tM
#elif defined _SLKP_USE_AVX_INTRINSICS
- __m256d c3 = _mm256_set1_pd(childVector[3]),
- c0 = _mm256_set1_pd(childVector[0]),
- c1 = _mm256_set1_pd(childVector[1]),
- c2 = _mm256_set1_pd(childVector[2]),
- t0,t1,t2,t3;
+ __m256d
+ c0 = _mm256_set1_pd(childVector[0]),
+ c1 = _mm256_set1_pd(childVector[1]),
+ c2 = _mm256_set1_pd(childVector[2]),
+ c3 = _mm256_set1_pd(childVector[3]),
+ t0,t1,t2,t3;
if (transposed_mx) {
t0 = ((__m256d*)transposed_mx)[0];
@@ -761,9 +852,38 @@ inline void _handle4x4_pruning_case (double const* childVector, double const* tM
#endif
+#elif defined _SLKP_USE_ARM_NEON
+ float64x2_t c0 = vdupq_n_f64(childVector[0]),
+ c1 = vdupq_n_f64(childVector[1]),
+ c2 = vdupq_n_f64(childVector[2]),
+ c3 = vdupq_n_f64(childVector[3]);
+
+ float64x2x2_t t0, t1, t2, t3;
+
+ if (transposed_mx) {
+ t0 = ((float64x2x2_t*)transposed_mx)[0];
+ t1 = ((float64x2x2_t*)transposed_mx)[1];
+ t2 = ((float64x2x2_t*)transposed_mx)[2];
+ t3 = ((float64x2x2_t*)transposed_mx)[3];
+ } else {
+ t0 = (float64x2x2_t){tMatrix[0],tMatrix[4],tMatrix[8],tMatrix[12]};
+ t1 = (float64x2x2_t){tMatrix[1],tMatrix[5],tMatrix[9],tMatrix[13]};
+ t2 = (float64x2x2_t){tMatrix[2],tMatrix[6],tMatrix[10],tMatrix[14]};
+ t3 = (float64x2x2_t){tMatrix[3],tMatrix[7],tMatrix[11],tMatrix[15]};
+ }
+ t0.val[0] = vfmaq_f64 (vmulq_f64 (c1,t1.val[0]),c0,t0.val[0]);
+ t0.val[1] = vfmaq_f64 (vmulq_f64 (c1,t1.val[1]),c0,t0.val[1]);
+ t2.val[0] = vfmaq_f64 (vmulq_f64 (c3,t3.val[0]),c2,t2.val[0]);
+ t2.val[1] = vfmaq_f64 (vmulq_f64 (c3,t3.val[1]),c2,t2.val[1]);
+ vst1q_f64 (parentConditionals,
+ vmulq_f64 (vld1q_f64 (parentConditionals), vaddq_f64 (t0.val[0], t2.val[0])));
+ vst1q_f64 (parentConditionals+2, vmulq_f64 (vld1q_f64 (parentConditionals+2), vaddq_f64 (t0.val[1], t2.val[1])));
+
#else
+
+
// 12 multiplications, 16 additions, 3 subtractions
/*hyFloat t1 = childVector[0] - childVector[3],
@@ -836,7 +956,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
siteTo = siteCount;
}
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
hyFloat * tMatrixT = nil;
switch (alphabetDimension) {
case 20:
@@ -940,6 +1060,16 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
(__m256d) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]}
};
#endif
+
+ #ifdef _SLKP_USE_ARM_NEON
+ float64x2x2_t tmatrix_transpose [4] = {
+ (float64x2x2_t) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]},
+ (float64x2x2_t) {transitionMatrix[1],transitionMatrix[5],transitionMatrix[9],transitionMatrix[13]},
+ (float64x2x2_t) {transitionMatrix[2],transitionMatrix[6],transitionMatrix[10],transitionMatrix[14]},
+ (float64x2x2_t) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]}
+ };
+ #endif
+
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 4UL) {
__ll_loop_preamble
if (__ll_handle_conditional_array_initialization<4> (
@@ -949,7 +1079,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
- #ifdef _SLKP_USE_AVX_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS or defined _SLKP_USE_ARM_NEON
_handle4x4_pruning_case (childVector, tMatrix, parentConditionals, tmatrix_transpose);
#else
_handle4x4_pruning_case (childVector, tMatrix, parentConditionals, nil);
@@ -968,7 +1098,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
// START AMINO-ACID CASE
else if (alphabetDimension == 20UL) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<20> (transitionMatrix, tMatrixT);
#endif
@@ -985,6 +1115,11 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
grandTotal = __ll_handle_block10_product_sum<20,0> (tMatrixT, childVector, parentConditionals, nil);
grandTotal = __ll_handle_block10_product_sum<20,10> (tMatrixT, childVector, parentConditionals, &grandTotal),grandTotal;
sum = _sse_sum_2 (grandTotal);
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal
+ = __ll_handle_block10_product_sum<20,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<20,10> (tMatrixT, childVector, parentConditionals, &grandTotal),grandTotal;
+ sum = _neon_sum_2 (grandTotal);
#else
__ll_product_sum_loop<20L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -999,7 +1134,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
// END AMINO-ACID CASE
// START UNIVERSAL CODE
else if (alphabetDimension == 60UL) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<60> (transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 60L) {
@@ -1025,6 +1160,15 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
grandTotal = __ll_handle_block10_product_sum<60,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
grandTotal = __ll_handle_block10_product_sum<60,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
sum += _sse_sum_2(grandTotal);
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal
+ = __ll_handle_block10_product_sum<60,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<60,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ sum += _neon_sum_2(grandTotal);
#else
__ll_product_sum_loop<60L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1036,7 +1180,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
}
}
else if (alphabetDimension == 61UL) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<61> (transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 61L) {
@@ -1122,6 +1266,26 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
hyFloat s60 = _sse_sum_2(lastTotal) + childVector[60] * tT[60];
parentConditionals[60] *= s60;
sum += _sse_sum_2(grandTotal) + s60;
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal;
+ grandTotal = __ll_handle_block10_product_sum<61,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<61,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 61*60;
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<61,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60];
+ parentConditionals[60] *= s60;
+ sum += _neon_sum_2(grandTotal) + s60;
#else
__ll_product_sum_loop<61L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1138,7 +1302,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
__ll_loop_epilogue
}
} else if (alphabetDimension == 62UL) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<62> (transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 62L) {
@@ -1206,6 +1370,40 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
parentConditionals[61] *= s61;
sum += _sse_sum_2(grandTotal) + s60 + s61;
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal
+ = __ll_handle_block10_product_sum<62,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<62,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 62*60;
+
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<62,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60] + childVector[61] * tT[61];
+ parentConditionals[60] *= s60;
+
+ tT += 62;
+ float64x2_t lastTotal2;
+ __ll_handle_block10_product_sum_linear<62,0> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,10> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,20> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,30> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,40> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,50> (tT, childVector, &lastTotal2);
+
+ hyFloat s61 = _neon_sum_2(lastTotal2) + childVector[60] * tT[60] + childVector[61] * tT[61];
+ parentConditionals[61] *= s61;
+
+ sum += _neon_sum_2(grandTotal) + s60 + s61;
#else
__ll_product_sum_loop<62L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1216,7 +1414,7 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
__ll_loop_epilogue
}
} else if (alphabetDimension == 63UL) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<63> (transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 63L) {
@@ -1305,6 +1503,51 @@ hyFloat _TheTree::ComputeTreeBlockByBranch ( _SimpleList
parentConditionals[62] *= s62;
sum += _sse_sum_2(grandTotal) + s60 + s61 + s62;
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal
+ = __ll_handle_block10_product_sum<63,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<63,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 63*60;
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];
+ parentConditionals[60] *= s60;
+
+ tT += 63;
+ float64x2_t lastTotal2;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal2);
+
+ hyFloat s61 = _neon_sum_2(lastTotal2) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];;
+ parentConditionals[61] *= s61;
+
+ tT += 63;
+ float64x2_t lastTotal3;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal3);
+
+ hyFloat s62 = _neon_sum_2(lastTotal3) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];;
+ parentConditionals[62] *= s62;
+
+ sum += _neon_sum_2(grandTotal) + s60 + s61 + s62;
#else
__ll_product_sum_loop<63L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1568,7 +1811,7 @@ void _TheTree::ComputeBranchCache (
myParent = flatParents.list_data[myParent+flatLeaves.lLength];
} while (myParent >= 0);
-#if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+#if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
hyFloat * tMatrixT = nil;
switch (alphabetDimension) {
case 20:
@@ -1755,11 +1998,20 @@ void _TheTree::ComputeBranchCache (
if (alphabetDimension == 4L) {
#ifdef _SLKP_USE_AVX_INTRINSICS
- __m256d tmatrix_transpose [4] = {
- (__m256d) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]},
- (__m256d) {transitionMatrix[1],transitionMatrix[5],transitionMatrix[9],transitionMatrix[13]},
- (__m256d) {transitionMatrix[2],transitionMatrix[6],transitionMatrix[10],transitionMatrix[14]},
- (__m256d) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]}
+ __m256d tmatrix_transpose [4] = {
+ (__m256d) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]},
+ (__m256d) {transitionMatrix[1],transitionMatrix[5],transitionMatrix[9],transitionMatrix[13]},
+ (__m256d) {transitionMatrix[2],transitionMatrix[6],transitionMatrix[10],transitionMatrix[14]},
+ (__m256d) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]}
+ };
+ #endif
+
+ #ifdef _SLKP_USE_ARM_NEON
+ float64x2x2_t tmatrix_transpose [4] = {
+ (float64x2x2_t) {transitionMatrix[0],transitionMatrix[4],transitionMatrix[8],transitionMatrix[12]},
+ (float64x2x2_t) {transitionMatrix[1],transitionMatrix[5],transitionMatrix[9],transitionMatrix[13]},
+ (float64x2x2_t) {transitionMatrix[2],transitionMatrix[6],transitionMatrix[10],transitionMatrix[14]},
+ (float64x2x2_t) {transitionMatrix[3],transitionMatrix[7],transitionMatrix[11],transitionMatrix[15]}
};
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 4L) {
@@ -1773,7 +2025,7 @@ void _TheTree::ComputeBranchCache (
continue;
}
long didScale = 0;
- #ifdef _SLKP_USE_AVX_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_ARM_NEON
_handle4x4_pruning_case (childVector, tMatrix, parentConditionals, tmatrix_transpose);
#else
_handle4x4_pruning_case (childVector, tMatrix, parentConditionals, nil);
@@ -1791,7 +2043,7 @@ void _TheTree::ComputeBranchCache (
__handle_site_corrections(didScale, siteID);
}
} else if (alphabetDimension == 20L) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<20>(transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 20L) {
@@ -1809,6 +2061,11 @@ void _TheTree::ComputeBranchCache (
__m128d s128;
s128 = __ll_handle_block10_product_sum<20,0> (tMatrixT, childVector, parentConditionals, nil);
s128 = __ll_handle_block10_product_sum<20,10> (tMatrixT, childVector, parentConditionals, &s128);
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal = vdupq_n_f64(0.);
+ grandTotal = __ll_handle_block10_product_sum<20,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<20,10> (tMatrixT, childVector, parentConditionals, &grandTotal),grandTotal;
+ sum = _neon_sum_2 (grandTotal);
#else
__ll_product_sum_loop<20L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1824,7 +2081,7 @@ void _TheTree::ComputeBranchCache (
__handle_site_corrections(didScale, siteID);
}
} else if (alphabetDimension == 60L) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<60L>(transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 60L) {
@@ -1849,6 +2106,14 @@ void _TheTree::ComputeBranchCache (
grandTotal = __ll_handle_block10_product_sum<60,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
grandTotal = __ll_handle_block10_product_sum<60,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
grandTotal = __ll_handle_block10_product_sum<60,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t
+ grandTotal = __ll_handle_block10_product_sum<60,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<60,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<60,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
#else
__ll_product_sum_loop<60L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1864,7 +2129,7 @@ void _TheTree::ComputeBranchCache (
__handle_site_corrections(didScale, siteID);
}
} else if (alphabetDimension == 61L) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<61L>(transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 61L) {
@@ -1909,6 +2174,25 @@ void _TheTree::ComputeBranchCache (
__ll_handle_block10_product_sum_linear<61,50> (tT, childVector, &lastTotal);
hyFloat s60 = _sse_sum_2(lastTotal) + childVector[60] * tT[60];
parentConditionals[60] *= s60;
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal;
+ grandTotal = __ll_handle_block10_product_sum<61,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<61,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<61,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 61*60;
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<61,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<61,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60];
+ parentConditionals[60] *= s60;
#else
__ll_product_sum_loop<61L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -1925,7 +2209,7 @@ void _TheTree::ComputeBranchCache (
__handle_site_corrections(didScale, siteID);
}
} else if (alphabetDimension == 62L) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<62L>(transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 62L) {
@@ -1990,6 +2274,36 @@ void _TheTree::ComputeBranchCache (
__ll_handle_block10_product_sum_linear<62,50> (tT, childVector, &lastTotal2);
hyFloat s61 = _sse_sum_2(lastTotal2) + childVector[60] * tT[60] + childVector[61] * tT[61];
parentConditionals[61] *= s61;
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t grandTotal
+ = __ll_handle_block10_product_sum<62,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<62,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<62,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 62*60;
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<62,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<62,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60] + childVector[61] * tT[61];
+ parentConditionals[60] *= s60;
+
+ tT += 62;
+ float64x2_t lastTotal2;
+ __ll_handle_block10_product_sum_linear<62,0> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,10> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,20> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,30> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,40> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<62,50> (tT, childVector, &lastTotal2);
+ hyFloat s61 = _neon_sum_2(lastTotal2) + childVector[60] * tT[60] + childVector[61] * tT[61];
+ parentConditionals[61] *= s61;
#else
__ll_product_sum_loop<62L> (tMatrix, childVector, parentConditionals, sum);
#endif
@@ -2005,7 +2319,7 @@ void _TheTree::ComputeBranchCache (
__handle_site_corrections(didScale, siteID);
}
} else if (alphabetDimension == 63L) {
- #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS
+ #if defined _SLKP_USE_AVX_INTRINSICS || defined _SLKP_USE_SSE_INTRINSICS || defined _SLKP_USE_ARM_NEON
__ll_handle_matrix_transpose<63L>(transitionMatrix, tMatrixT);
#endif
for (long siteID = siteFrom; siteID < siteTo; siteID++, parentConditionals += 63L) {
@@ -2089,6 +2403,48 @@ void _TheTree::ComputeBranchCache (
__ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal3);
hyFloat s62 = _sse_sum_2(lastTotal3) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];
parentConditionals[62] *= s62;
+
+ #elif defined _SLKP_USE_ARM_NEON
+ float64x2_t
+ grandTotal = __ll_handle_block10_product_sum<63,0> (tMatrixT, childVector, parentConditionals, nil);
+ grandTotal = __ll_handle_block10_product_sum<63,10> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,20> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,30> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,40> (tMatrixT, childVector, parentConditionals, &grandTotal);
+ grandTotal = __ll_handle_block10_product_sum<63,50> (tMatrixT, childVector, parentConditionals, &grandTotal);
+
+ hyFloat const * __restrict tT = tMatrix + 63*60;
+ float64x2_t lastTotal;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal);
+ hyFloat s60 = _neon_sum_2(lastTotal) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];
+ parentConditionals[60] *= s60;
+
+ tT += 63;
+ float64x2_t lastTotal2;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal2);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal2);
+ hyFloat s61 = _neon_sum_2(lastTotal2) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];
+ parentConditionals[61] *= s61;
+
+ tT += 63;
+ float64x2_t lastTotal3;
+ __ll_handle_block10_product_sum_linear<63,0> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,10> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,20> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,30> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,40> (tT, childVector, &lastTotal3);
+ __ll_handle_block10_product_sum_linear<63,50> (tT, childVector, &lastTotal3);
+ hyFloat s62 = _neon_sum_2(lastTotal3) + childVector[60] * tT[60] + childVector[61] * tT[61] + childVector[62] * tT[62];
+ parentConditionals[62] *= s62;
#else
__ll_product_sum_loop<63L> (tMatrix, childVector, parentConditionals, sum);
#endif
=====================================
src/lib/setup.py
=====================================
@@ -5,7 +5,6 @@ from os import path
from glob import glob
import sys
-from platform import architecture, mac_ver
#incdir = get_python_inc(plat_specific=1)
#print incdir
@@ -48,8 +47,10 @@ includePaths += [linkPath, contribPath, sqlitePath]
define_macros = [('__HYPHY_64__', None)] if '64' in architecture()[0] else []
# openmp on Mac OS X Lion is broken
-major, minor = mac_ver()[0].split('.')
-openmp = ['-fopenmp'] if int(major) < 10 or (int(major) == 10 and int(minor) < 7) else []
+#major, minor = mac_ver()[0].split('.')
+#openmp = ['-fopenmp'] if int(major) < 10 or (int(major) == 10 and int(minor) < 7) else []
+
+openmp = []
setup(
name = 'HyPhy',
@@ -59,7 +60,7 @@ setup(
author_email = 'spond at temple.edu',
url = 'http://www.hyphy.org/',
packages = ['HyPhy'],
- package_dir = {'HyPhy': 'LibraryModules/Python/HyPhy'},
+ package_dir = {'HyPhy': './'},
# data_files = resFiles,
# py_modules = ['HyPhy'],
ext_modules = [Extension('_HyPhy',
=====================================
tests/hbltests/Results/HIVSweden.out
=====================================
@@ -2,28 +2,28 @@
*** RUNNING SINGLE RATE MODEL ***
#################################
->Done in 5 seconds
+>Done in 3 seconds
--1137.68842968708
+-1137.68870195817
-----------------------------------
-dN/dS = 0.9051395450601901
+dN/dS = 0.9071719918589423
*** RUNNING MODEL 1 (Neutral) ***
######################################
->Done in 3 seconds
+>Done in 2 seconds
--1114.64197979838
+-1114.64190562717
------------------------------------------------
-dN/dS = 0.5549192972885679 (sample variance = 0.2120268320358276)
+dN/dS = 0.5548244466259989 (sample variance = 0.2120544773207587)
-Rate[1]= 0.07854090 (weight=0.4830173)
-Rate[2]= 1.00000000 (weight=0.5169827)
+Rate[1]= 0.07848545 (weight=0.4830912)
+Rate[2]= 1.00000000 (weight=0.5169088)
------------------------------------------------
@@ -37,215 +37,215 @@ Import the following part into a data processing program
for further analysis
-Rate/Site Rate=0.07854089951899505 Rate=1
-1 0.0004154841702208897 0.9995845158297791
-2 0.9494800946553694 0.05051990534463052
-3 0.8527138076557068 0.1472861923442931
-4 0.9772953209605599 0.02270467903944003
-5 0.7089270358876215 0.2910729641123785
-6 0.9706034286518881 0.02939657134811178
-7 0.8411759549187992 0.1588240450812008
-8 0.644850909128687 0.3551490908713131
-9 0.009023903715709264 0.9909760962842907
-10 0.2515932185849544 0.7484067814150457
-11 0.9362971760831602 0.0637028239168398
-12 0.8039911310919982 0.1960088689080018
-13 0.9592990339188422 0.04070096608115781
-14 0.6831216537039989 0.3168783462960011
-15 0.9686853075822512 0.03131469241774875
-16 0.9686853075822512 0.03131469241774875
-17 0.9494800946553694 0.05051990534463052
-18 0.09627709694116433 0.9037229030588357
-19 0.8046571650681456 0.1953428349318545
-20 0.4254688557117581 0.5745311442882419
-21 0.001162113715308391 0.9988378862846916
-22 0.001180617896019509 0.9988193821039805
-23 0.9494800946553694 0.05051990534463052
-24 7.060663427640608e-05 0.9999293933657236
-25 0.8600353102504529 0.1399646897495471
-26 2.239754017472222e-05 0.9999776024598254
-27 0.7157862269098134 0.2842137730901866
-28 1.823968038733901e-08 0.9999999817603197
-29 0.9785832556654707 0.02141674433452926
-30 0.582384830553312 0.417615169446688
-31 0.002379358355164586 0.9976206416448354
-32 0.8411759549187992 0.1588240450812008
-33 0.9362971760831602 0.0637028239168398
-34 0.6979822928927298 0.3020177071072702
-35 0.778392401117516 0.2216075988824839
-36 0.01421053968210576 0.9857894603178943
-37 0.06336691422415654 0.9366330857758434
-38 0.6912831003120626 0.3087168996879373
-39 0.001919852660566921 0.998080147339433
-40 0.003960890204721402 0.9960391097952785
-41 0.5294498773319029 0.470550122668097
-42 0.8383792474897703 0.1616207525102296
-43 0.5921771900482152 0.4078228099517847
-44 0.208194488060306 0.791805511939694
-45 0.4812547391790605 0.5187452608209394
-46 0.01854854305069108 0.9814514569493089
-47 0.7638290038081602 0.2361709961918397
-48 0.1402805111076657 0.8597194888923344
-49 0.07649019370504373 0.9235098062949563
-50 0.5565553788637815 0.4434446211362185
-51 4.401412699981744e-06 0.9999955985873
-52 0.9686853075822512 0.03131469241774875
-53 0.9337995342113474 0.06620046578865253
-54 0.05713111117495385 0.9428688888250462
-55 0.6733962368122044 0.3266037631877956
-56 0.9785832556654707 0.02141674433452926
-57 0.03638999158091847 0.9636100084190816
-58 0.9366001361131814 0.06339986388681852
-59 0.02363153610583355 0.9763684638941664
-60 0.7157862269098134 0.2842137730901866
-61 0.2740628614189302 0.7259371385810699
-62 0.1720628516827533 0.8279371483172466
-63 0.06696027896282203 0.933039721037178
-64 0.1590255451412893 0.8409744548587106
-65 0.1371557240403739 0.8628442759596261
-66 2.83460436728884e-08 0.9999999716539563
-67 0.5390460493463879 0.4609539506536121
-68 0.0006628127730370201 0.999337187226963
-69 2.873441346176776e-05 0.9999712655865381
-70 0.3234733611365553 0.6765266388634448
-71 0.2249283193971887 0.7750716806028114
-72 0.161927560424059 0.8380724395759409
-73 0.1394764462745381 0.8605235537254619
-74 0.9494800946553694 0.05051990534463052
-75 0.02458202885075857 0.9754179711492413
-76 6.411295676480708e-05 0.9999358870432352
-77 0.9715441430178714 0.02845585698212867
-78 0.7862785460776877 0.2137214539223124
-79 0.7328737186780433 0.2671262813219566
-80 0.9706034286518881 0.02939657134811178
-81 0.8611706364190057 0.1388293635809944
-82 0.7668224471831266 0.2331775528168733
-83 5.672677285259407e-05 0.9999432732271474
-84 0.006741759929513022 0.993258240070487
-85 0.96836526712463 0.03163473287536989
-86 0.9686853075822512 0.03131469241774875
-87 1.531106965534881e-05 0.9999846889303446
-88 0.7668224471831266 0.2331775528168733
-89 0.9316117506366601 0.06838824936333981
-90 0.0386175672192274 0.9613824327807726
-91 0.5872082036930426 0.4127917963069574
+Rate/Site Rate=0.07848544796897376 Rate=1
+1 0.0004147478183825302 0.9995852521816174
+2 0.949615551955995 0.05038444804400498
+3 0.8529359368370378 0.1470640631629622
+4 0.9773716102717996 0.02262838972820037
+5 0.7091414936372149 0.290858506362785
+6 0.9706969154415642 0.02930308455843569
+7 0.841382629288338 0.158617370711662
+8 0.6450287771422856 0.3549712228577144
+9 0.009017591385717552 0.9909824086142826
+10 0.2517327512397574 0.7482672487602428
+11 0.9364355050188269 0.06356449498117313
+12 0.8042392149007731 0.1957607850992268
+13 0.9594019831567395 0.04059801684326048
+14 0.6833362390928011 0.316663760907199
+15 0.9687756816361081 0.03122431836389203
+16 0.9687756816361081 0.03122431836389203
+17 0.949615551955995 0.05038444804400498
+18 0.09626486184129626 0.9037351381587037
+19 0.8048657022558181 0.1951342977441818
+20 0.4258332890773215 0.5741667109226785
+21 0.001162544759598141 0.9988374552404018
+22 0.00117934284219372 0.9988206571578063
+23 0.949615551955995 0.05038444804400498
+24 7.05692249317919e-05 0.9999294307750682
+25 0.8602498450519684 0.1397501549480315
+26 2.235787126908494e-05 0.9999776421287309
+27 0.7160075322697251 0.2839924677302748
+28 1.81694382789184e-08 0.9999999818305617
+29 0.978655781308275 0.02134421869172495
+30 0.5825330972325473 0.4174669027674526
+31 0.00237797957064055 0.9976220204293594
+32 0.841382629288338 0.158617370711662
+33 0.9364355050188269 0.06356449498117313
+34 0.6984640898990011 0.3015359101009989
+35 0.7788646013536071 0.2211353986463928
+36 0.01422156141350081 0.9857784385864992
+37 0.06342743744187564 0.9365725625581244
+38 0.6917494525406751 0.3082505474593248
+39 0.00191842922725556 0.9980815707727445
+40 0.003962940766435124 0.9960370592335649
+41 0.5299471303854615 0.4700528696145386
+42 0.8385993998396862 0.1614006001603138
+43 0.5923948163573549 0.407605183642645
+44 0.2085284079556125 0.7914715920443876
+45 0.4816557620682597 0.5183442379317403
+46 0.01854486833821928 0.9814551316617807
+47 0.7640484766915798 0.2359515233084201
+48 0.1404682371700924 0.8595317628299075
+49 0.07657491682608442 0.9234250831739156
+50 0.5570427216616054 0.4429572783383945
+51 4.395262579126055e-06 0.9999956047374209
+52 0.9687756816361081 0.03122431836389203
+53 0.9339655403783996 0.06603445962160043
+54 0.057176351410101 0.942823648589899
+55 0.6738749864466691 0.3261250135533308
+56 0.978655781308275 0.02134421869172495
+57 0.03639242086085162 0.9636075791391484
+58 0.9367572417729919 0.06324275822700812
+59 0.02363022019514807 0.9763697798048518
+60 0.7160075322697251 0.2839924677302748
+61 0.2742154507693895 0.7257845492306105
+62 0.1721183776736239 0.8278816223263761
+63 0.06701858054585133 0.9329814194541487
+64 0.1592881364653803 0.8407118635346197
+65 0.1373340643424358 0.8626659356575642
+66 2.827881136893144e-08 0.9999999717211886
+67 0.5391574881592042 0.4608425118407958
+68 0.0006622339418346806 0.9993377660581653
+69 2.866242595935145e-05 0.9999713375740407
+70 0.3237224918056043 0.6762775081943957
+71 0.2250277291358384 0.7749722708641616
+72 0.1621092944461913 0.8378907055538086
+73 0.1394617511496932 0.8605382488503069
+74 0.949615551955995 0.05038444804400498
+75 0.02458626333717552 0.9754137366628245
+76 6.407045470315398e-05 0.9999359295452969
+77 0.9716263288159281 0.02837367118407193
+78 0.7864931352062936 0.2135068647937064
+79 0.7333948721239267 0.2666051278760733
+80 0.9706969154415642 0.02930308455843569
+81 0.8613585063476906 0.1386414936523094
+82 0.7670412142745399 0.2329587857254601
+83 5.666114976590877e-05 0.9999433388502341
+84 0.006727990372494228 0.9932720096275057
+85 0.9684585560287761 0.03154144397122403
+86 0.9687756816361081 0.03122431836389203
+87 1.52384846143212e-05 0.9999847615153856
+88 0.7670412142745399 0.2329587857254601
+89 0.9317501104144779 0.06824988958552211
+90 0.03862425798799864 0.9613757420120014
+91 0.587359366036023 0.4126406339639771
*** RUNNING MODEL 2 (Selection) ***
######################################
->Done in 8 seconds
+>Done in 6 seconds
--1106.44545333916
+-1106.44543364521
------------------------------------------------
-dN/dS = 1.129068005488161 (sample variance = 1.588067143267372)
+dN/dS = 1.126641026596627 (sample variance = 1.581382036629081)
-Rate[1]= 0.05854806 (weight=0.3747723)
-Rate[2]= 1.00000000 (weight=0.4427396)
-Rate[3]= 3.64070986 (weight=0.1824881)
+Rate[1]= 0.05768870 (weight=0.3739141)
+Rate[2]= 1.00000000 (weight=0.4447790)
+Rate[3]= 3.64184458 (weight=0.1813069)
------------------------------------------------
Sites with dN/dS>1 (Posterior cutoff = 0.9)
-26 (0.9071235888236596)
-28 (0.999208561635355)
-66 (0.9984823131311625)
-87 (0.985639180825136)
+26 (0.905748932682225)
+28 (0.9991894014403121)
+66 (0.9984584808494847)
+87 (0.9854566318670548)
------------------------------------------------
Sites with dN/dS<=1 (Posterior cutoff = 0.9)
-1 (0.5052373923971539)
-2 (0.0004593492021371499)
-3 (0.005917205452138728)
-4 (9.663607149571083e-05)
-5 (0.03172842746069128)
-6 (0.0001511795330813065)
-7 (0.006943635909993886)
-8 (0.05673891477006113)
-9 (0.7226692280451891)
-10 (0.09651028335914213)
-11 (0.0009207100403048468)
-12 (0.01064028334210353)
-13 (0.0004934182636844576)
-14 (0.03991745163515635)
-15 (0.0002225580972989534)
-16 (0.0002225580972989534)
-17 (0.0004593492021371499)
-18 (0.4330406113257519)
-19 (0.0116121595707726)
-20 (0.0336812685288552)
-21 (0.2473118054259542)
-22 (0.7984465921549734)
-23 (0.0004593492021371499)
-24 (0.5842280859182822)
-25 (0.005172980464989402)
-27 (0.02940476216359821)
-29 (7.952947195176931e-05)
-30 (0.09146670281887008)
-31 (0.5699030448214001)
-32 (0.006943635909993886)
-33 (0.0009207100403048468)
-34 (0.004515364181301996)
-35 (0.002050750658840959)
-36 (0.1052215418403245)
-37 (0.08602792425933428)
-38 (0.005398479347200226)
-39 (0.6440044083471241)
-40 (0.4558311806110331)
-41 (0.01185264798035508)
-42 (0.006857989522987185)
-43 (0.08367248274274289)
-44 (0.01183660486503293)
-45 (0.01750623812172373)
-46 (0.4294127609528174)
-47 (0.0183780006215666)
-48 (0.02534884977862542)
-49 (0.07112745644118525)
-50 (0.01063437293207849)
-51 (0.8861014582086777)
-52 (0.0002225580972989534)
-53 (0.0007683604393760527)
-54 (0.09835228649247904)
-55 (0.006339545564899368)
-56 (7.952947195176931e-05)
-57 (0.2050735349427733)
-58 (0.000727531168723411)
-59 (0.3314435529804894)
-60 (0.02940476216359821)
-61 (0.08200111674080049)
-62 (0.193666026549605)
-63 (0.0772513986346591)
-64 (0.02148421691961612)
-65 (0.02745286636506417)
-67 (0.1261823647518457)
-68 (0.605555060135807)
-69 (0.8333573458992974)
-70 (0.05534546347214257)
-71 (0.1227852244756305)
-72 (0.02110507208988902)
-73 (0.2776010249671293)
-74 (0.0004593492021371499)
-75 (0.3073445506286841)
-76 (0.6759845714674814)
-77 (0.0002305219770671781)
-78 (0.01499353952539858)
-79 (0.003225300542333516)
-80 (0.0001511795330813065)
-81 (0.005173815704594973)
-82 (0.01777066638393202)
-83 (0.8142466349701873)
-84 (0.2783391381741632)
-85 (0.0001964834081581603)
-86 (0.0002225580972989534)
-88 (0.01777066638393202)
-89 (0.001173394928906147)
-90 (0.1864176575163458)
-91 (0.08828981163248605)
+1 (0.5005649106541394)
+2 (0.0004506733110064033)
+3 (0.005834191751320087)
+4 (9.457380175062416e-05)
+5 (0.03138510059998675)
+6 (0.0001480393169770257)
+7 (0.006847428124619753)
+8 (0.05619655457626769)
+9 (0.7202616116659697)
+10 (0.09532197568044432)
+11 (0.0009046734380393297)
+12 (0.01050223976511856)
+13 (0.0004847586715730755)
+14 (0.03950684521589812)
+15 (0.0002182155996077391)
+16 (0.0002182155996077391)
+17 (0.0004506733110064033)
+18 (0.4303827103641656)
+19 (0.01146247222682962)
+20 (0.03318736449456186)
+21 (0.2436354993793777)
+22 (0.7962699486165546)
+23 (0.0004506733110064033)
+24 (0.57941118084374)
+25 (0.005099728165153801)
+27 (0.02908192622556446)
+29 (7.778197017495751e-05)
+30 (0.09069853965707597)
+31 (0.5660401862868557)
+32 (0.006847428124619753)
+33 (0.0009046734380393297)
+34 (0.004442814164845603)
+35 (0.002016405422878198)
+36 (0.1033806809795843)
+37 (0.0845869735850874)
+38 (0.005315569117438836)
+39 (0.6404963641339237)
+40 (0.4513223091835529)
+41 (0.01166252323108743)
+42 (0.006762560825262604)
+43 (0.08294489462806978)
+44 (0.01159145309975683)
+45 (0.01723403953949377)
+46 (0.4258799056869767)
+47 (0.01815822194877413)
+48 (0.0248595357138091)
+49 (0.06985038806310395)
+50 (0.01046813156931801)
+51 (0.8842009233039249)
+52 (0.0002182155996077391)
+53 (0.0007544903027667682)
+54 (0.09676453858925446)
+55 (0.006243972369370006)
+56 (7.778197017495751e-05)
+57 (0.2024175281590031)
+58 (0.0007143331590382112)
+59 (0.328082302559163)
+60 (0.02908192622556446)
+61 (0.08094857058070434)
+62 (0.191684602210924)
+63 (0.07594221099860735)
+64 (0.02106638033100854)
+65 (0.02692637587837913)
+67 (0.1252291578690442)
+68 (0.6013314754839405)
+69 (0.8310257634017505)
+70 (0.05459581602399919)
+71 (0.1213339421503428)
+72 (0.02070417777354739)
+73 (0.2752230672821743)
+74 (0.0004506733110064033)
+75 (0.3040808672019115)
+76 (0.6716789194025321)
+77 (0.0002260604432214562)
+78 (0.01480917575963592)
+79 (0.003175560377811062)
+80 (0.0001480393169770257)
+81 (0.005098851302542715)
+82 (0.01755686700678069)
+83 (0.8117048671851982)
+84 (0.2741932110594019)
+85 (0.0001925217117709219)
+86 (0.0002182155996077391)
+88 (0.01755686700678069)
+89 (0.001153571766383773)
+90 (0.1839063827910584)
+91 (0.0875405164290031)
------------------------------------------------
@@ -256,95 +256,95 @@ Import the following part into a data processing program
for further analysis
-Rate/Site Rate=0.05854805862606752 Rate=1 Rate=3.640709855148721
-1 2.123821587431194e-05 0.4947413693869718 0.5052373923971539
-2 0.8454805150711112 0.1540601357267516 0.0004593492021371499
-3 0.7171925566791651 0.2768902378686959 0.005917205452138728
-4 0.9023145803022071 0.09758878362629728 9.663607149571083e-05
-5 0.5829266831612094 0.3853448893780992 0.03172842746069128
-6 0.8874735538558555 0.112375266611063 0.0001511795330813065
-7 0.701299413788691 0.2917569503013152 0.006943635909993886
-8 0.5258486059883327 0.4174124792416061 0.05673891477006113
-9 0.00105308734360597 0.276277684611205 0.7226692280451891
-10 0.1004481920361048 0.803041524604753 0.09651028335914213
-11 0.8194140603392319 0.1796652296204631 0.0009207100403048468
-12 0.6717772510719154 0.3175824655859811 0.01064028334210353
-13 0.8646165745664404 0.1348900071698753 0.0004934182636844576
-14 0.5624375756998519 0.3976449726649919 0.03991745163515635
-15 0.8822191864215847 0.1175582554811163 0.0002225580972989534
-16 0.8822191864215847 0.1175582554811163 0.0002225580972989534
-17 0.8454805150711112 0.1540601357267516 0.0004593492021371499
-18 0.032983308732931 0.533976079941317 0.4330406113257519
-19 0.6645919677056602 0.3237958727235673 0.0116121595707726
-20 0.1597162578236267 0.806602473647518 0.0336812685288552
-21 6.497561809983018e-05 0.752623218955946 0.2473118054259542
-22 6.147883799728457e-05 0.2014919290070293 0.7984465921549734
-23 0.8454805150711112 0.1540601357267516 0.0004593492021371499
-24 1.810178742010458e-06 0.4157701039029758 0.5842280859182822
-25 0.7259101630303384 0.2689168565046722 0.005172980464989402
-26 1.883724016460931e-07 0.09287622280393867 0.9071235888236596
-27 0.5896752507993037 0.3809199870370982 0.02940476216359821
-28 7.593295246048614e-14 0.0007914383645691474 0.999208561635355
-29 0.9055057132759483 0.09441475725209994 7.952947195176931e-05
-30 0.4715021983865639 0.437031098794566 0.09146670281887008
-31 0.0001979673775266633 0.4298989878010734 0.5699030448214001
-32 0.701299413788691 0.2917569503013152 0.006943635909993886
-33 0.8194140603392319 0.1796652296204631 0.0009207100403048468
-34 0.2960737907520586 0.6994108450666394 0.004515364181301996
-35 0.3559604560803941 0.641988793260765 0.002050750658840959
-36 0.00130942674258278 0.8934690314170927 0.1052215418403245
-37 0.01180978530471919 0.9021622904359465 0.08602792425933428
-38 0.2941041942604504 0.7004973263923493 0.005398479347200226
-39 0.000144144499790302 0.3558514471530856 0.6440044083471241
-40 0.0002529699651473707 0.5439158494238194 0.4558311806110331
-41 0.2120674596489594 0.7760798923706854 0.01185264798035508
-42 0.7007026627516525 0.2924393477253602 0.006857989522987185
-43 0.4841294396612101 0.432198077596047 0.08367248274274289
-44 0.0301228872990572 0.9580405078359099 0.01183660486503293
-45 0.1893248839788894 0.7931688778993868 0.01750623812172373
-46 0.003359794088751816 0.5672274449584306 0.4294127609528174
-47 0.63009891120623 0.3515230881722035 0.0183780006215666
-48 0.02206748041476434 0.9525836698066102 0.02534884977862542
-49 0.01356591438428317 0.9153066291745316 0.07112745644118525
-50 0.2240745276745924 0.7652910993933291 0.01063437293207849
-51 2.492645830161824e-08 0.1138985168648641 0.8861014582086777
-52 0.8822191864215847 0.1175582554811163 0.0002225580972989534
-53 0.8211660693180184 0.1780655702426055 0.0007683604393760527
-54 0.01100295212811388 0.8906447613794071 0.09835228649247904
-55 0.2833340811759865 0.7103263732591142 0.006339545564899368
-56 0.9055057132759483 0.09441475725209994 7.952947195176931e-05
-57 0.007045975624482142 0.7878804894327445 0.2050735349427733
-58 0.8240613622898397 0.1752111065414368 0.000727531168723411
-59 0.004581163540158042 0.6639752834793526 0.3314435529804894
-60 0.5896752507993037 0.3809199870370982 0.02940476216359821
-61 0.107806169745878 0.8101927135133216 0.08200111674080049
-62 0.06861334373069095 0.7377206297197041 0.193666026549605
-63 0.01242704838986371 0.9103215529754771 0.0772513986346591
-64 0.02390636440935269 0.9546094186710312 0.02148421691961612
-65 0.02151934533944069 0.9510277882954953 0.02745286636506417
-66 2.671166083534726e-12 0.001517686866166226 0.9984823131311625
-67 0.4295450384222473 0.4442725968259071 0.1261823647518457
-68 3.079998533959148e-05 0.3944141398788535 0.605555060135807
-69 4.243743084525438e-07 0.1666422297263941 0.8333573458992974
-70 0.1288881665397853 0.8157663699880722 0.05534546347214257
-71 0.08913564852994389 0.7880791269944257 0.1227852244756305
-72 0.02499602234463322 0.9538989055654777 0.02110507208988902
-73 0.05300541149090041 0.6693935635419703 0.2776010249671293
-74 0.8454805150711112 0.1540601357267516 0.0004593492021371499
-75 0.004981355134669857 0.687674094236646 0.3073445506286841
-76 1.275841252079727e-06 0.3240141526912666 0.6759845714674814
-77 0.8883601983297144 0.1114092796932183 0.0002305219770671781
-78 0.6485688357833093 0.3364376246912923 0.01499353952539858
-79 0.3321969903648257 0.6645777090928409 0.003225300542333516
-80 0.8874735538558555 0.112375266611063 0.0001511795330813065
-81 0.7193517468744175 0.2754744374209876 0.005173815704594973
-82 0.6319797015005985 0.3502496321154693 0.01777066638393202
-83 9.451297855017863e-07 0.1857524199000273 0.8142466349701873
-84 9.483309866743794e-05 0.7215660287271692 0.2783391381741632
-85 0.8807566499603399 0.119046866631502 0.0001964834081581603
-86 0.8822191864215847 0.1175582554811163 0.0002225580972989534
-87 1.512672292179054e-08 0.01436080404814105 0.985639180825136
-88 0.6319797015005985 0.3502496321154693 0.01777066638393202
-89 0.8117475469052963 0.1870790581657977 0.001173394928906147
-90 0.007483970311633406 0.8060983721720208 0.1864176575163458
-91 0.4758571278249646 0.4358530605425494 0.08828981163248605
+Rate/Site Rate=0.05768869986246565 Rate=1 Rate=3.641844577696067
+1 2.014182872779183e-05 0.4994149475171328 0.5005649106541394
+2 0.8452194549572273 0.1543298717317663 0.0004506733110064033
+3 0.7164617500753068 0.2777040581733731 0.005834191751320087
+4 0.9022505078903756 0.09765491830787378 9.457380175062416e-05
+5 0.5818848042477683 0.3867300951522449 0.03138510059998675
+6 0.8873636945850181 0.112488266098005 0.0001480393169770257
+7 0.7005076792958981 0.2926448925794821 0.006847428124619753
+8 0.5247883329162988 0.4190151125074336 0.05619655457626769
+9 0.001025124165060435 0.2787132641689699 0.7202616116659697
+10 0.0988666787453516 0.8058113455742041 0.09532197568044432
+11 0.8190555318482564 0.1800397947137043 0.0009046734380393297
+12 0.6708961696125029 0.3186015906223785 0.01050223976511856
+13 0.864433000690832 0.1350822406375951 0.0004847586715730755
+14 0.5613826888688731 0.399110465915229 0.03950684521589812
+15 0.8820922333215474 0.117689551078845 0.0002182155996077391
+16 0.8820922333215474 0.117689551078845 0.0002182155996077391
+17 0.8452194549572273 0.1543298717317663 0.0004506733110064033
+18 0.0324851386885853 0.537132150947249 0.4303827103641656
+19 0.6636954012888419 0.3248421264843285 0.01146247222682962
+20 0.1574210003780902 0.8093916351273479 0.03318736449456186
+21 6.147404749371294e-05 0.7563030265731285 0.2436354993793777
+22 5.912803676264833e-05 0.2036709233466827 0.7962699486165546
+23 0.8452194549572273 0.1543298717317663 0.0004506733110064033
+24 1.697882291310795e-06 0.4205871212739687 0.57941118084374
+25 0.7252007199820658 0.2696995518527805 0.005099728165153801
+26 1.76903184809418e-07 0.09425089041459019 0.905748932682225
+27 0.5886393930203893 0.3822786807540463 0.02908192622556446
+28 7.257830102405714e-14 0.000810598559615294 0.9991894014403121
+29 0.905453981684754 0.09446823634507101 7.778197017495751e-05
+30 0.4704804321322921 0.438821028210632 0.09069853965707597
+31 0.000190267409636111 0.4337695463035081 0.5660401862868557
+32 0.7005076792958981 0.2926448925794821 0.006847428124619753
+33 0.8190555318482564 0.1800397947137043 0.0009046734380393297
+34 0.2927441515249551 0.7028130343101993 0.004442814164845603
+35 0.3524412830671291 0.6455423115099926 0.002016405422878198
+36 0.001253859115858844 0.895365459904557 0.1033806809795843
+37 0.01145549195170351 0.9039575344632091 0.0845869735850874
+38 0.2907971110003181 0.7038873198822431 0.005315569117438836
+39 0.0001386157668289592 0.3593650200992473 0.6404963641339237
+40 0.0002432878779577585 0.5484344029384893 0.4513223091835529
+41 0.2092473563395368 0.7790901204293758 0.01166252323108743
+42 0.6999063654856726 0.2933310736890647 0.006762560825262604
+43 0.4831185953460878 0.4339365100258424 0.08294489462806978
+44 0.0292576800605162 0.9591508668397269 0.01159145309975683
+45 0.1867006668711101 0.796065293589396 0.01723403953949377
+46 0.003266315231745858 0.5708537790812774 0.4258799056869767
+47 0.6291274207544222 0.3527143572968038 0.01815822194877413
+48 0.02141713714322223 0.9537233271429686 0.0248595357138091
+49 0.01317171754156796 0.9169778943953281 0.06985038806310395
+50 0.2211497254419244 0.7683821429887575 0.01046813156931801
+51 2.314690514493323e-08 0.11579905354917 0.8842009233039249
+52 0.8820922333215474 0.117689551078845 0.0002182155996077391
+53 0.8208101360851635 0.1784353736120698 0.0007544903027667682
+54 0.01067202582008938 0.8925634355906562 0.09676453858925446
+55 0.2800657198062314 0.7136903078243987 0.006243972369370006
+56 0.905453981684754 0.09446823634507101 7.778197017495751e-05
+57 0.006838110356898206 0.7907443614840988 0.2024175281590031
+58 0.8237175911135334 0.1755680757274284 0.0007143331590382112
+59 0.004450345634491975 0.6674673518063451 0.328082302559163
+60 0.5886393930203893 0.3822786807540463 0.02908192622556446
+61 0.1061213208343808 0.812930108584915 0.08094857058070434
+62 0.06753143245596548 0.7407839653331104 0.191684602210924
+63 0.01205277332437679 0.9120050156770158 0.07594221099860735
+64 0.02320619483438383 0.9557274248346076 0.02106638033100854
+65 0.02088692244142688 0.952186701680194 0.02692637587837913
+66 2.448383399924293e-12 0.001541519148066872 0.9984584808494847
+67 0.4285837636617972 0.4461870784691586 0.1252291578690442
+68 2.946025360697879e-05 0.3986390642624525 0.6013314754839405
+69 3.98203585590263e-07 0.1689738383946639 0.8310257634017505
+70 0.1269233311499915 0.8184808528260092 0.05459581602399919
+71 0.08772963484839656 0.7909364230012607 0.1213339421503428
+72 0.02426486820361876 0.9550309540228338 0.02070417777354739
+73 0.05217899891051785 0.6725979338073078 0.2752230672821743
+74 0.8452194549572273 0.1543298717317663 0.0004506733110064033
+75 0.004838241899341649 0.6910808908987468 0.3040808672019115
+76 1.198920235239551e-06 0.3283198816772326 0.6716789194025321
+77 0.8882667916649903 0.1115071478917882 0.0002260604432214562
+78 0.6476389301739991 0.337551894066365 0.01480917575963592
+79 0.3286907698848671 0.668133669737322 0.003175560377811062
+80 0.8873636945850181 0.112488266098005 0.0001480393169770257
+81 0.7186192824913594 0.2762818662060978 0.005098851302542715
+82 0.63100855429723 0.3514345786959893 0.01755686700678069
+83 8.934097890872577e-07 0.1882942394050127 0.8117048671851982
+84 9.275585147221103e-05 0.725714033089126 0.2741932110594019
+85 0.8806249498392179 0.1191825284490111 0.0001925217117709219
+86 0.8820922333215474 0.117689551078845 0.0002182155996077391
+87 1.429077450481847e-08 0.0145433538421707 0.9854566318670548
+88 0.63100855429723 0.3514345786959893 0.01755686700678069
+89 0.8113577821477324 0.1874886460858839 0.001153571766383773
+90 0.007262246700933957 0.8088313705080076 0.1839063827910584
+91 0.4748305935897082 0.4376288899812887 0.0875405164290031
=====================================
tests/hbltests/Results/HIVSweden.out_MODEL_-1.nex
=====================================
@@ -37,8 +37,8 @@ END;
BEGIN HYPHY;
-global c=0.9051395450601901;
-global kappa=0.4052422178470169;
+global c=0.9071719918589423;
+global kappa=0.4054779971121202;
modelMatrix={61,61};
modelMatrix[0][1]:=kappa*c*t;
modelMatrix[0][2]:=t;
@@ -636,33 +636,35 @@ TRY_NUMERIC_SEQUENCE_MATCH=0;
ACCEPT_ROOTED_TREES=0;
UseModel (theModel);
-Tree givenTree=(((U68496,U68497)Node3,(U68501,(U68502,(U68503,((((U68504,U68506)Node15,U68505)Node14,U68507)Node13,U68508)Node12)Node10)Node8)Node6)Node2,U68498,(U68499,U68500)Node6_1);
+Tree givenTree=(U68496,U68497,((U68498,(U68499,U68500)Node7)Node5,(U68501,(U68502,(U68503,((((U68504,U68506)Node19,U68505)Node18,U68507)Node17,U68508)Node16)Node14)Node12)Node10)Node4);
+
+givenTree.U68496.t=0.1930890973257582;
+givenTree.U68497.t=0.6424612223788787;
+givenTree.U68498.t=0.409121209278724;
+givenTree.U68499.t=0.2307021519626118;
+givenTree.U68500.t=0.8109768825230198;
+givenTree.Node7.t=0.6770911492487591;
+givenTree.Node5.t=0.192056403882526;
+givenTree.U68501.t=1.068413022713992;
+givenTree.U68502.t=1.553405763007259;
+givenTree.U68503.t=1.569000404650932;
+givenTree.U68504.t=0.1542179615342612;
+givenTree.U68506.t=0.5708195775387649;
+givenTree.Node19.t=0.4514474490820354;
+givenTree.U68505.t=0.2461875505020575;
+givenTree.Node18.t=0.2979766094476649;
+givenTree.U68507.t=0.4958919610910434;
+givenTree.Node17.t=0.2823906966108597;
+givenTree.U68508.t=1.611576611031497;
+givenTree.Node16.t=0.4108260863152219;
+givenTree.Node14.t=0.5198983543648987;
+givenTree.Node12.t=0.2732130787806588;
+givenTree.Node10.t=0.3908328965048091;
+givenTree.Node4.t=1.241058558313147;SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0);
+SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0);
-givenTree.U68496.t=0.1932757790297635;
-givenTree.U68497.t=0.6442091360697517;
-givenTree.Node3.t=1.244091350196699;
-givenTree.U68501.t=1.072314145612458;
-givenTree.U68502.t=1.555368999549274;
-givenTree.U68503.t=1.573795636757062;
-givenTree.U68504.t=0.1545017778755618;
-givenTree.U68506.t=0.5724438957384254;
-givenTree.Node15.t=0.452076415208067;
-givenTree.U68505.t=0.2464768693858635;
-givenTree.Node14.t=0.2982771493845721;
-givenTree.U68507.t=0.4974360955659346;
-givenTree.Node13.t=0.2831885574728508;
-givenTree.U68508.t=1.613919861191977;
-givenTree.Node12.t=0.41156092487671;
-givenTree.Node10.t=0.5209728334125625;
-givenTree.Node8.t=0.2739993974710216;
-givenTree.Node6.t=0.3910850502827137;
-givenTree.Node2.t=0.1922077082389846;
-givenTree.U68498.t=0.4102499738247144;
-givenTree.U68499.t=0.2311741894617957;
-givenTree.U68500.t=0.8119659118420136;
-givenTree.Node6_1.t=0.678816471820221;
DataSet ds = ReadDataFile(USE_NEXUS_FILE_DATA);
-DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0,1,5-8,10,9,11,12,2-4","TAA,TAG,TGA");
+DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0-8,10,9,11,12","TAA,TAG,TGA");
ASSUME_REVERSIBLE_MODELS=0;
USE_LAST_RESULTS=1;
LikelihoodFunction lf = (filteredData,givenTree);
=====================================
tests/hbltests/Results/HIVSweden.out_MODEL_0.nex
=====================================
@@ -37,11 +37,11 @@ END;
BEGIN HYPHY;
-global W=0.07854089951899505;
+global W=0.07848544796897376;
W:<1;
-global P=0.483017317295036;
+global P=0.4830911811352628;
P:<1;
-global kappa=0.3863915314909612;
+global kappa=0.3862572108595822;
c.weights={1,2};
c.weights[0][0]:=P;
@@ -652,33 +652,35 @@ TRY_NUMERIC_SEQUENCE_MATCH=0;
ACCEPT_ROOTED_TREES=0;
UseModel (theModel);
-Tree givenTree=(((U68496,U68497)Node3,(U68501,(U68502,(U68503,((((U68504,U68506)Node15,U68505)Node14,U68507)Node13,U68508)Node12)Node10)Node8)Node6)Node2,U68498,(U68499,U68500)Node6_1);
+Tree givenTree=(U68496,U68497,((U68498,(U68499,U68500)Node7)Node5,(U68501,(U68502,(U68503,((((U68504,U68506)Node19,U68505)Node18,U68507)Node17,U68508)Node16)Node14)Node12)Node10)Node4);
+
+givenTree.U68496.t=0.3017206887153597;
+givenTree.U68497.t=1.059711785366983;
+givenTree.U68498.t=0.6389397418529784;
+givenTree.U68499.t=0.3596540096621793;
+givenTree.U68500.t=1.321906771217612;
+givenTree.Node7.t=1.136554744000134;
+givenTree.Node5.t=0.3043513200848355;
+givenTree.U68501.t=1.769569624859784;
+givenTree.U68502.t=2.675115127943899;
+givenTree.U68503.t=2.706643713132597;
+givenTree.U68504.t=0.2431327474261901;
+givenTree.U68506.t=0.9044122018972757;
+givenTree.Node19.t=0.7162910919155517;
+givenTree.U68505.t=0.3956882184714828;
+givenTree.Node18.t=0.4996707175132051;
+givenTree.U68507.t=0.774075369790494;
+givenTree.Node17.t=0.2764202496369115;
+givenTree.U68508.t=2.852778762374758;
+givenTree.Node16.t=0.7482651562390982;
+givenTree.Node14.t=0.8229119584835572;
+givenTree.Node12.t=0.3096780975715163;
+givenTree.Node10.t=0.6408915003259278;
+givenTree.Node4.t=2.084190034363844;SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0);
+SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0);
-givenTree.U68496.t=0.3014658876783538;
-givenTree.U68497.t=1.058937965054783;
-givenTree.Node3.t=2.08174984846752;
-givenTree.U68501.t=1.764333870286974;
-givenTree.U68502.t=2.673570567299918;
-givenTree.U68503.t=2.699658069558426;
-givenTree.U68504.t=0.2431541588198822;
-givenTree.U68506.t=0.9038084521967604;
-givenTree.Node15.t=0.7153498168088975;
-givenTree.U68505.t=0.3956539001874164;
-givenTree.Node14.t=0.4993402949067045;
-givenTree.U68507.t=0.7737346315034068;
-givenTree.Node13.t=0.277145717534158;
-givenTree.U68508.t=2.851718049074558;
-givenTree.Node12.t=0.7478267266145866;
-givenTree.Node10.t=0.8232048503168657;
-givenTree.Node8.t=0.309357412100375;
-givenTree.Node6.t=0.6402997546627597;
-givenTree.Node2.t=0.3042887273137687;
-givenTree.U68498.t=0.6387909451494354;
-givenTree.U68499.t=0.3594052835330087;
-givenTree.U68500.t=1.319979691408115;
-givenTree.Node6_1.t=1.13804766964687;
DataSet ds = ReadDataFile(USE_NEXUS_FILE_DATA);
-DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0,1,5-8,10,9,11,12,2-4","TAA,TAG,TGA");
+DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0-8,10,9,11,12","TAA,TAG,TGA");
ASSUME_REVERSIBLE_MODELS=0;
USE_LAST_RESULTS=1;
LikelihoodFunction lf = (filteredData,givenTree);
=====================================
tests/hbltests/Results/HIVSweden.out_MODEL_1.nex
=====================================
@@ -37,15 +37,15 @@ END;
BEGIN HYPHY;
-global P2=0.7081253811493516;
+global P2=0.7104121522639678;
P2:<1;
-global W_1=0.05854805862606752;
+global W_1=0.05768869986246565;
W_1:<1;
-global W_2=3.640709855148721;
+global W_2=3.641844577696067;
W_2:>1;
-global P1=0.3747723001916182;
+global P1=0.3739141342952348;
P1:<1;
-global kappa=0.3597040016204291;
+global kappa=0.3597183898000834;
c.weights={1,3};
c.weights[0][0]:=P1;
@@ -658,33 +658,35 @@ TRY_NUMERIC_SEQUENCE_MATCH=0;
ACCEPT_ROOTED_TREES=0;
UseModel (theModel);
-Tree givenTree=(((U68496,U68497)Node3,(U68501,(U68502,(U68503,((((U68504,U68506)Node15,U68505)Node14,U68507)Node13,U68508)Node12)Node10)Node8)Node6)Node2,U68498,(U68499,U68500)Node6_1);
+Tree givenTree=(U68496,U68497,((U68498,(U68499,U68500)Node7)Node5,(U68501,(U68502,(U68503,((((U68504,U68506)Node19,U68505)Node18,U68507)Node17,U68508)Node16)Node14)Node12)Node10)Node4);
+
+givenTree.U68496.t=0.1820765235473329;
+givenTree.U68497.t=0.6321073455214453;
+givenTree.U68498.t=0.3343564501791256;
+givenTree.U68499.t=0.1876640197504523;
+givenTree.U68500.t=0.8005801879569905;
+givenTree.Node7.t=0.750108304945318;
+givenTree.Node5.t=0.189875002788432;
+givenTree.U68501.t=1.055571283712865;
+givenTree.U68502.t=1.747134065548263;
+givenTree.U68503.t=1.779098089271837;
+givenTree.U68504.t=0.1466122835681893;
+givenTree.U68506.t=0.5212546540008767;
+givenTree.Node19.t=0.4171959202637034;
+givenTree.U68505.t=0.2414131900046768;
+givenTree.Node18.t=0.2762126262689943;
+givenTree.U68507.t=0.4784362608407944;
+givenTree.Node17.t=0.006818214720109361;
+givenTree.U68508.t=1.945808887957573;
+givenTree.Node16.t=0.5086054961633685;
+givenTree.Node14.t=0.508023542650578;
+givenTree.Node12.t=0.1816851776949857;
+givenTree.Node10.t=0.394526379685196;
+givenTree.Node4.t=1.247106653589127;SetParameter (DEFER_CONSTRAINT_APPLICATION, 1, 0);
+SetParameter (DEFER_CONSTRAINT_APPLICATION, 0, 0);
-givenTree.U68496.t=0.1819372645612003;
-givenTree.U68497.t=0.6305701510272947;
-givenTree.Node3.t=1.244260990073252;
-givenTree.U68501.t=1.05208595250357;
-givenTree.U68502.t=1.746445328113835;
-givenTree.U68503.t=1.774938469552289;
-givenTree.U68504.t=0.1463724860825859;
-givenTree.U68506.t=0.5200057981767465;
-givenTree.Node15.t=0.4166914000694648;
-givenTree.U68505.t=0.241055361539026;
-givenTree.Node14.t=0.2754520695076969;
-givenTree.U68507.t=0.4771824473110387;
-givenTree.Node13.t=0.006305605103123437;
-givenTree.U68508.t=1.944259154790765;
-givenTree.Node12.t=0.5081406979260248;
-givenTree.Node10.t=0.5074584441922946;
-givenTree.Node8.t=0.1808898503811754;
-givenTree.Node6.t=0.3939031724767187;
-givenTree.Node2.t=0.1896556393922262;
-givenTree.U68498.t=0.3337924593752155;
-givenTree.U68499.t=0.1875734352334583;
-givenTree.U68500.t=0.7998185516002427;
-givenTree.Node6_1.t=0.7492860788646263;
DataSet ds = ReadDataFile(USE_NEXUS_FILE_DATA);
-DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0,1,5-8,10,9,11,12,2-4","TAA,TAG,TGA");
+DataSetFilter filteredData = CreateFilter(ds,3,"0-272","0-8,10,9,11,12","TAA,TAG,TGA");
ASSUME_REVERSIBLE_MODELS=0;
USE_LAST_RESULTS=1;
LikelihoodFunction lf = (filteredData,givenTree);
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/e5c8fd32984d059ec09233aa8f7e5cdb888c538c
--
View it on GitLab: https://salsa.debian.org/med-team/hyphy/-/commit/e5c8fd32984d059ec09233aa8f7e5cdb888c538c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201221/84805600/attachment-0001.html>
More information about the debian-med-commit
mailing list