[med-svn] [libhmsbeagle] 03/03: Fix unclear sequence of execution in kernels4.cu
Andreas Tille
tille at debian.org
Sat Jun 17 18:42:19 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch debian/jessie-backports
in repository libhmsbeagle.
commit 9fb65c696dea390e1b10e90c9c65b300bb01859e
Author: Andreas Tille <tille at debian.org>
Date: Sat Jun 17 20:42:02 2017 +0200
Fix unclear sequence of execution in kernels4.cu
---
debian/changelog | 1 +
debian/patches/fix_kernels4.cu.patch | 138 +++++++++++++++++++++++++++++++++++
debian/patches/series | 1 +
3 files changed, 140 insertions(+)
diff --git a/debian/changelog b/debian/changelog
index b3da3de..b2032a4 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
libhmsbeagle (2.1.2+20160831-5~bpo8+1) jessie-backports; urgency=medium
* Rebuild for jessie-backports.
+ * Fix unclear sequence of execution in kernels4.cu
-- Andreas Tille <tille at debian.org> Mon, 12 Jun 2017 20:45:36 +0200
diff --git a/debian/patches/fix_kernels4.cu.patch b/debian/patches/fix_kernels4.cu.patch
new file mode 100644
index 0000000..3d902f5
--- /dev/null
+++ b/debian/patches/fix_kernels4.cu.patch
@@ -0,0 +1,138 @@
+Author: Gunter Königsmann <gunter at peterpall.de>
+ James Cowgill <jcowgill at debian.org>
+Last-Update: Sat, 17 Jun 2017 12:08:21 +0100
+Description: Quoting Gunter Königsmann <gunter at peterpall.de>:
+ Even if these code lines would work they probably should be
+ reformulated by upstream so they are easier to read:
+ .
+ ++i assigns I the value I+1 and returns the new i. And then i is assigned
+ another new value by the i= in the same line...
+ .
+ Perhaps the c compiler is as unsure which assignment should have the
+ precedence as I am.
+
+--- a/libhmsbeagle/GPU/kernels/kernels4.cu
++++ b/libhmsbeagle/GPU/kernels/kernels4.cu
+@@ -321,13 +321,13 @@
+ int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+ sum1 = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+ sum2 = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);
+
+@@ -339,11 +339,11 @@
+ int i = pat;\
+ int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+ sum2 = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix2[multBy4(i) | state], sPartials2[patIdx16pat4 | i], sum2);
+
+ #define SUM_PARTIALS_SINGLE_4_GPU()\
+@@ -351,11 +351,11 @@
+ int i = pat;\
+ int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+ sum1 = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA( sMatrix1[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);
+
+ #define SUM_STATES_SINGLE_4_GPU()\
+@@ -373,15 +373,15 @@
+ sum1 = sMatrix1[ multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+ sumFirstDeriv = sMatrixFirstDeriv[ multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+ sumSecondDeriv = sMatrixSecondDeriv[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA(sMatrix1[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+ FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA(sMatrix1[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+ FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);\
+- i = (++i) & 0x3;\
++ i = (i + 1) & 0x3;\
+ FMA(sMatrix1[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+ FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+ FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);
+@@ -1194,15 +1194,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+ sum1 = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+@@ -1283,15 +1283,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+ sum1 = sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+ sum2 = sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+ sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+ sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+ sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+
+@@ -1358,15 +1358,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+ sum1 = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
+- i = (++i) & 0x3;
++ i = (i + 1) & 0x3;
+ sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+ sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+
diff --git a/debian/patches/series b/debian/patches/series
index 7f4aa4d..d3243c9 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -2,3 +2,4 @@ enable_static.patch
# disable_cpu_sse_plugin.patch
gcc-4.7.patch
doxygen_update.patch
+fix_kernels4.cu.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libhmsbeagle.git
More information about the debian-med-commit
mailing list