[med-svn] [libhmsbeagle] 03/03: Fix unclear sequence of execution in kernels4.cu

Andreas Tille tille at debian.org
Sat Jun 17 18:42:19 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch debian/jessie-backports
in repository libhmsbeagle.

commit 9fb65c696dea390e1b10e90c9c65b300bb01859e
Author: Andreas Tille <tille at debian.org>
Date:   Sat Jun 17 20:42:02 2017 +0200

    Fix unclear sequence of execution in kernels4.cu
---
 debian/changelog                     |   1 +
 debian/patches/fix_kernels4.cu.patch | 138 +++++++++++++++++++++++++++++++++++
 debian/patches/series                |   1 +
 3 files changed, 140 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index b3da3de..b2032a4 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
 libhmsbeagle (2.1.2+20160831-5~bpo8+1) jessie-backports; urgency=medium
 
   * Rebuild for jessie-backports.
+  * Fix unclear sequence of execution in kernels4.cu
 
  -- Andreas Tille <tille at debian.org>  Mon, 12 Jun 2017 20:45:36 +0200
 
diff --git a/debian/patches/fix_kernels4.cu.patch b/debian/patches/fix_kernels4.cu.patch
new file mode 100644
index 0000000..3d902f5
--- /dev/null
+++ b/debian/patches/fix_kernels4.cu.patch
@@ -0,0 +1,138 @@
+Author: Gunter Königsmann <gunter at peterpall.de>
+        James Cowgill <jcowgill at debian.org>
+Last-Update: Sat, 17 Jun 2017 12:08:21 +0100
+Description: Quoting Gunter Königsmann <gunter at peterpall.de>:
+  Even if these code lines would work they probably should be
+  reformulated by upstream so they are easier to read:
+  .
+  ++i assigns I the value I+1 and returns the new i. And then i is assigned
+  another new value by the i= in the same line...
+  .
+  Perhaps the c compiler is as unsure which assignment should have the
+  precedence as I am.
+
+--- a/libhmsbeagle/GPU/kernels/kernels4.cu
++++ b/libhmsbeagle/GPU/kernels/kernels4.cu
+@@ -321,13 +321,13 @@
+     int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+     sum1 = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+     sum2 = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(   sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(   sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(   sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(   sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(   sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(   sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);
+ 
+@@ -339,11 +339,11 @@
+     int i = pat;\
+     int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+     sum2  = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix2[multBy4(i) | state],  sPartials2[patIdx16pat4 | i], sum2);
+ 
+ #define SUM_PARTIALS_SINGLE_4_GPU()\
+@@ -351,11 +351,11 @@
+     int i = pat;\
+     int patIdx16pat4 = multBy16(patIdx) | (tx & 0xC);\
+     sum1  = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(    sMatrix1[multBy4(i) | state],  sPartials1[patIdx16pat4 | i], sum1);
+ 
+ #define SUM_STATES_SINGLE_4_GPU()\
+@@ -373,15 +373,15 @@
+     sum1           = sMatrix1[          multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+     sumFirstDeriv  = sMatrixFirstDeriv[ multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+     sumSecondDeriv = sMatrixSecondDeriv[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(sMatrix1[          multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+     FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(sMatrix1[          multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+     FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);\
+-    i = (++i) & 0x3;\
++    i = (i + 1) & 0x3;\
+     FMA(sMatrix1[          multBy4(i) | state], sPartials1[patIdx16pat4 | i], sum1);\
+     FMA(sMatrixFirstDeriv[ multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumFirstDeriv);\
+     FMA(sMatrixSecondDeriv[multBy4(i) | state], sPartials1[patIdx16pat4 | i], sumSecondDeriv);
+@@ -1194,15 +1194,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+             sum1  = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+             sum2  = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-            i = (++i) & 0x3;
++            i = (i + 1) & 0x3;
+             sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+             sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-            i = (++i) & 0x3;
++            i = (i + 1) & 0x3;
+             sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+             sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-            i = (++i) & 0x3;
++            i = (i + 1) & 0x3;
+             sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+             sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+             
+@@ -1283,15 +1283,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+         sum1  = sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+         sum2  = sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+ 
+-        i = (++i) & 0x3;
++        i = (i + 1) & 0x3;
+         sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+         sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+ 
+-        i = (++i) & 0x3;
++        i = (i + 1) & 0x3;
+         sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+         sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+ 
+-        i = (++i) & 0x3;
++        i = (i + 1) & 0x3;
+         sum1 += sMatrix1[i * 4 + state] * sPartials1[patIdx * 16 + pat * 4 + i];
+         sum2 += sMatrix2[i * 4 + state] * sPartials2[patIdx * 16 + pat * 4 + i];
+         
+@@ -1358,15 +1358,15 @@ KW_GLOBAL_KERNEL void kernelPartialsPart
+     sum1  = sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+     sum2  = sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-    i = (++i) & 0x3;
++    i = (i + 1) & 0x3;
+     sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+     sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-    i = (++i) & 0x3;
++    i = (i + 1) & 0x3;
+     sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+     sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+ 
+-    i = (++i) & 0x3;
++    i = (i + 1) & 0x3;
+     sum1 += sMatrix1[multBy4(i) | state] * sPartials1[patIdx16pat4 | i];
+     sum2 += sMatrix2[multBy4(i) | state] * sPartials2[patIdx16pat4 | i];
+     
diff --git a/debian/patches/series b/debian/patches/series
index 7f4aa4d..d3243c9 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -2,3 +2,4 @@ enable_static.patch
 # disable_cpu_sse_plugin.patch
 gcc-4.7.patch
 doxygen_update.patch
+fix_kernels4.cu.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libhmsbeagle.git



More information about the debian-med-commit mailing list