Bug#877316: #877316: clblas: Crashes on single-precision-only hardware, due to double-precision literals
Rebecca N. Palmer
rebecca_palmer at zoho.com
Sun Jan 27 17:54:47 GMT 2019
The attached patch fixes this issue in sgemm, which is enough to pass
the libgpuarray tests. (This is the only testing it's had, as the
clblas package doesn't appear to run its own tests.)
I don't know if any other operations (that libgpuarray doesn't use) are
affected, but there are suspicious-looking instances in at least her(2)
and rot(m)g; see attached list.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 877316.patch
Type: text/x-diff
Size: 3103 bytes
Desc: not available
URL: <http://alioth-lists.debian.net/pipermail/debian-science-maintainers/attachments/20190127/cfb307d9/attachment-0001.patch>
-------------- next part --------------
$ grep -rniE -e "[0-9]\.[0-9]*[^f]" ../clblas/src/library
then some manual editing
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu.cl:14:#define ZERO ( 0.0)
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu.cl:15:#define ONE ( 1.0)
../clblas/src/library/blas/gens/clTemplates/her2.cl:18://NOTE: THIS FILE IS NOT USED. SEE SYR2_HER2.CLT
../clblas/src/library/blas/gens/clTemplates/her2.cl:50: if( (alpha.even == 0.0) && (alpha.odd == 0.0) )
../clblas/src/library/blas/gens/clTemplates/her2.cl:156: /* HER2 defn: On output, if alpha not equal to 0.0, then imaginary part of A is set to zero. */
../clblas/src/library/blas/gens/clTemplates/her2.cl:158: res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/her2.cl:372: if( (alpha.even == 0.0) && (alpha.odd == 0.0) )
../clblas/src/library/blas/gens/clTemplates/her2.cl:479: /* HER2 defn: On output, if alpha not equal to 0.0, then imaginary part of A is set to zero. */
../clblas/src/library/blas/gens/clTemplates/her2.cl:481: res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/her.cl:129: these values. On output, if alpha not equal to 0.0, they are set to zero. */
../clblas/src/library/blas/gens/clTemplates/her.cl:131: res.odd = ((r == c) && (alpha != 0.0)) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/her.cl:384: these values. On output, if alpha not equal to 0.0, they are set to zero. */
../clblas/src/library/blas/gens/clTemplates/her.cl:386: res.odd = ((r == c) && (alpha != 0.0)) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/rotg.cl:28:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotg.cl:29:#define PZERO (%PTYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotg.cl:56: Creg = 1.0;
../clblas/src/library/blas/gens/clTemplates/rotg.cl:71: ( (isnotequal(Creg, ZERO))? (1.0/Creg): 1.0 );
../clblas/src/library/blas/gens/clTemplates/rotg.cl:87: Sreg = (%TYPE)(1.0, 0.0);
../clblas/src/library/blas/gens/clTemplates/asum.cl:29: %TYPE asum = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/asum.cl:33: scratchBuff[0] = (%PTYPE)0.0;
../clblas/src/library/blas/gens/clTemplates/rotm.cl:28:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotm.cl:29:#define ONE (%TYPE)1.0
../clblas/src/library/blas/gens/clTemplates/rotm.cl:30:#define TWO (%TYPE)2.0
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu192.cl:8:#define ZERO ( 0.0)
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu192.cl:9:#define ONE ( 1.0)
../clblas/src/library/blas/gens/clTemplates/trmv.cl:98: //float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:99: %TYPE acc = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:100: %TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:128: // accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:148: %TYPE sumTemp= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:209: //float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:210: %TYPE acc = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:211: %TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:248: //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:349: //float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:350: %TYPE acc = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:351: %TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:378: //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:398: %TYPE sumTemp= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:460: //float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:461: %TYPE acc = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:462: %TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:497: //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:590: %TYPE accTemp= %INIT( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:637: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:638: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:678: //loadedA.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:775: %TYPE sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:776: %TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:802: //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:823: // float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:824: %TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:871: //acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:872: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:873: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:910: //loadedA.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:68: sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:69: localRed[ lId ] = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:109: reg1.odd = 0.0; // Imaginary part of diagonal is assumed to be zero
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:151: sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:225: %TYPE thrSum = %MAKEVEC(0.0); //Private sum for each thread
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:273: %TYPE tempSum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/dot.cl:33: %TYPE dotP = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:33: %TYPE redVal = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:168: #define MIN 0x1.0p-1022 // Min in case of d/z (values from khronos site)
../clblas/src/library/blas/gens/clTemplates/reduction.cl:170: #define MIN 0x1.0p-126f // Min in case od s/c
../clblas/src/library/blas/gens/clTemplates/reduction.cl:237: %TYPE redVal = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:275:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/reduction.cl:317: %TYPE ssq = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/trsv.cl:63: %TYPE sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:64: %TYPE xVal = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:65: %TYPE loadedA = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:164: %TYPE sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:165: %TYPE xVal = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:166: %TYPE loadedA = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:286: %TYPE diagA = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:393: %TYPE diagA = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/syr2_her2.cl:175: res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/syr2_her2.cl:541: res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:54: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:55: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:138: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:179: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:290: %TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:354: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:355: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:443: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:484: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:602: %TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:668: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:669: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:764: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:805: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:914: %TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:977: %TYPE sum = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:978: %TYPE loadedA = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1066: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1107: %TYPE sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1223: %TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1346: accTemp = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1366: sacc = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1451: accTemp = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1471: sacc = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:38: scratchBuff[0] = (%PTYPE)0.0;
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:44: %TYPE%V res = (%TYPE%V) 0.0;
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:74: nrm2_ptype = hypot( nrm2.even, nrm2.odd );
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:98:#define PZERO (%PTYPE)0.0
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:99:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:100:#define VZERO (%TYPE%V)0.0
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:48: if(alpha == 0.0)
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:143: res.odd = (r == c) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:328: if(alpha == 0.0)
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:422: res.odd = (r == c) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/iamax.cl:52: scratchBufVal[0] = (%PTYPE)0.0;
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:29:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:30:#define ONE (%TYPE)1.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:31:#define TWO (%TYPE)2.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:33:#define GAM (%TYPE)4096.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:35:#define RGAMSQ (%TYPE)( 1.0 / GAMSQ )
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:289: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:290: plB[16] = CurrentOffSetB+16>=N?0.0:B[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:292: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:293: plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:634: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:635: plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:637: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:638: plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:976: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:977: plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:979: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:980: plA[16] = CurrentOffSetA+16>=M?0.0:A[16*lda];
../clblas/src/library/blas/gens/legacy/trsm_img.c:337: kgenAddStmt(ctx, "*y = trunc((-0.5 + sqrt(2.0 * n + 0.25)));\n");
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:45: revAlp = "div((double2)(-1., 0), alpha)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:46: alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:50: revAlp = "-1. / alpha";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:51: alp = "1.";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:111: alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:115: alp = "1.";
../clblas/src/library/blas/gens/kprintf.cpp:1619: numCharsWritten = sprintf(dst,"%s.odd = 0.0f", id1);
../clblas/src/library/blas/gens/kprintf.cpp:1951: numCharsWritten = sprintf(dst, "\t %s = (isnotequal(%s, (%s)0.0))?\n", p3, p2, (get("%PTYPE").value));
../clblas/src/library/blas/gens/kprintf.cpp:2042: numCharsWritten = sprintf(dst, "\t %s = (isnotequal(%s, (%s)0.0))?\n", p3, p2, (get("%PTYPE").value));
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:37: return static_cast<T>(0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:44: return floatComplex(0.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:51: return doubleComplex(0.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:58: return static_cast<T>(1.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:65: return floatComplex(1.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:72: return doubleComplex(1.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:79: return static_cast<T>(2.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:86: return floatComplex(2.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:93: return doubleComplex(2.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:362: if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:363: return 0.0;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:370: if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:371: return 0.0;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:375:#define FLOAT_UPPER_BOUND pow(2.0, 23)
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:376:#define DOUBLE_UPPER_BOUND pow(2.0, 52)
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmPreCompileKernels.cpp:824: beta = 1.0;
../clblas/src/library/blas/xher2k.c:137: CIMAG( kargs->alpha.argFloatComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:138: CREAL( kargs->beta.argFloatComplex ) = 1.0;
../clblas/src/library/blas/xher2k.c:139: CIMAG( kargs->beta.argFloatComplex ) = 0.0;
../clblas/src/library/blas/xher2k.c:143: CIMAG( kargs->alpha.argDoubleComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:144: CREAL( kargs->beta.argDoubleComplex ) = 1.0;
../clblas/src/library/blas/xher2k.c:145: CIMAG( kargs->beta.argDoubleComplex ) = 0.0;
../clblas/src/library/blas/xher2k.c:184: CIMAG(fBeta) = 0.0f;
../clblas/src/library/blas/xher2k.c:192: CIMAG( kargs.alpha.argFloatComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:230: CIMAG(fBeta) = 0.0f;
../clblas/src/library/blas/xher2k.c:239: CIMAG( kargs.alpha.argDoubleComplex ) *= -1.0;
../clblas/src/library/blas/xherk.c:158: CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/xherk.c:160: CIMAG(fBeta) = 0.0f;
../clblas/src/library/blas/xherk.c:198: CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/xherk.c:200: CIMAG(fBeta) = 0.0f;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:361: double zero = 0.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:409: double neg_one = -1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:410: double one = 1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:411: double zero = 0.0 ;
../clblas/src/library/blas/functor/functor_xscal.cc:318: CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/functor/functor_xscal.cc:372: CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:320: double zero = 0.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:368: double neg_one = -1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:369: double one = 1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:370: double zero = 0.0 ;
../clblas/src/library/blas/xtbsv.c:699: kargs.alpha.argFloat = -1.0;
../clblas/src/library/blas/xtbsv.c:700: kargs.beta.argFloat = 1.0;
../clblas/src/library/blas/xtbsv.c:734: kargs.alpha.argDouble = -1.0;
../clblas/src/library/blas/xtbsv.c:735: kargs.beta.argDouble = 1.0;
../clblas/src/library/blas/xtbsv.c:771: CREAL(alpha) = -1.0;
../clblas/src/library/blas/xtbsv.c:772: CIMAG(alpha) = 0.0;
../clblas/src/library/blas/xtbsv.c:773: CREAL(beta) = 1.0;
../clblas/src/library/blas/xtbsv.c:774: CIMAG(beta) = 0.0;
../clblas/src/library/blas/xtbsv.c:813: CREAL(alpha) = -1.0;
../clblas/src/library/blas/xtbsv.c:814: CIMAG(alpha) = 0.0;
../clblas/src/library/blas/xtbsv.c:815: CREAL(beta) = 1.0;
../clblas/src/library/blas/xtbsv.c:816: CIMAG(beta) = 0.0;
../clblas/src/library/blas/xsymm.c:234: GEMMTArgs.beta.argFloat = 1.0f;
../clblas/src/library/blas/xsymm.c:235: GEMMDArgs.beta.argFloat = 1.0f;
../clblas/src/library/blas/xsymm.c:239: GEMMTArgs.beta.argDouble = 1.0;
../clblas/src/library/blas/xsymm.c:240: GEMMDArgs.beta.argDouble = 1.0;
../clblas/src/library/blas/xsymm.c:244: CREAL(cBeta) = 1.0f;
../clblas/src/library/blas/xsymm.c:245: CIMAG(cBeta) = 0.0f;
../clblas/src/library/blas/xsymm.c:251: CREAL(zBeta) = 1.0;
../clblas/src/library/blas/xsymm.c:252: CIMAG(zBeta) = 0.0;
../clblas/src/library/blas/gens/trsm.c:836: revAlp = "div((double2)(-1., 0), alpha)";
../clblas/src/library/blas/gens/trsm.c:837: alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/trsm.c:841: revAlp = "-1. / alpha";
../clblas/src/library/blas/gens/trsm.c:842: alp = "1.";
../clblas/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp:19:#define ZERO ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp:20:#define ONE ( 1.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp:44:#define ZERO ( 0.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp:45:#define ONE ( 1.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp:21:#define ZERO ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp:22:#define ONE ( 1.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp:19:#define ZERO ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp:20:#define ONE ( 1.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp:42:#define ZERO ( 0.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp:43:#define ONE ( 1.0) \n
../clblas/src/library/blas/xtrsm.cc:280: double zero = 0.0;
../clblas/src/library/blas/xtrsm.cc:615: double neg_one = -1.0;
../clblas/src/library/blas/xtrsm.cc:616: double one = 1.0;
../clblas/src/library/blas/xtrsm.cc:617: double zero = 0.0;
../clblas/src/library/blas/xtrsm.cc:1176: double neg_one = -1.0;
../clblas/src/library/blas/xtrsm.cc:1177: double one = 1.0;
../clblas/src/library/blas/xtrsm.cc:1178: double zero = 0.0;
../clblas/src/library/common/devinfo-cache.c:57: " sum = (float4)(0.0); \n"
../clblas/src/library/common/devinfo-cache.c:103: " sum = (float4)(0.0); \n"
Done:
../clblas/src/library/blas/AutoGemm/KernelOpenCL.py:366: zeroString = "(double2)(0.0, 0.0)"
../clblas/src/library/blas/AutoGemm/KernelOpenCL.py:368: zeroString = "0.0"
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:89: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:90: plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:92: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:93: plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:88: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:89: plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:91: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:92: plA[16] = CurrentOffSetA+16>=M?0.0:A[16*lda];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:88: plB[0] = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:89: plB[16] = CurrentOffSetB+16>=N?0.0:B[16];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:91: plA[0] = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:92: plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
Presumed (but not checked) not kernel code:
../clblas/src/library/common/tests/t_dblock_kgen.c:116:const float boundMarker = 5.0;
../clblas/src/library/common/tests/t_dblock_kgen.c:788: imageWidth = fl4RowWidth(tdesc->dim.x * 3.5, tsize);
../clblas/src/library/tools/tune/dimension.c:91: step = 2.8f * step;
../clblas/src/library/tools/tune/subdim.c:411: const double K_INCREASE = 1.5;
../clblas/src/library/tools/tune/subdim.c:412: const double K_GLOBAL = 0.97;
../clblas/src/library/tools/tune/subdim.c:480: maxTime = fmax(2.1*midTime - sd->minTime, sd->minTime*5);
../clblas/src/library/tools/tune/subdim.c:519: double kgroup = 1.0;
../clblas/src/library/tools/tune/subdim.c:534: kgroup *= 1.1;
../clblas/src/library/tools/tune/subdim.c:548: if (vi->time == 0 && vi->weight >= 0.01 ) {
../clblas/src/library/tools/tune/subdim.c:720: sd->allVariant[i].minTime = 0.0;
../clblas/src/library/tools/tune/subdim.c:721: sd->allVariant[i].probableTime = 0.0;
../clblas/src/library/tools/tune/subdim.c:722: sd->allVariant[i].maxTime = 5000.0;
../clblas/src/library/tools/tune/storage_io.c:153: else if (bParam->time > 10000.0) {
../clblas/src/library/tools/tune/tune.c:632: args->alpha.argFloat = 1.0;
../clblas/src/library/tools/tune/tune.c:636: args->alpha.argDouble = 1.0;
../clblas/src/library/tools/tune/tune.c:640: args->alpha.argFloatComplex.s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:641: args->alpha.argFloatComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:643: args->beta.argFloatComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:647: args->alpha.argDoubleComplex.s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:648: args->alpha.argDoubleComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:650: args->beta.argDoubleComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:851: m->f[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:891: mi->A.d[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:894: mi->B.d[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:900: mi->A.f2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:901: mi->A.f2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:904: mi->B.f2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:905: mi->B.f2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:911: mi->A.d2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:912: mi->A.d2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:915: mi->B.d2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:916: mi->B.d2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:1011: if (fabs(t - oldt) < 0.0001) {
../clblas/src/library/tools/tune/tune.c:1893: time[DIMARRAY_SMALL] = 5000.0;
../clblas/src/library/tools/tune/tune.c:1894: time[DIMARRAY_MIDDLE] = 5000.0;
../clblas/src/library/tools/tune/tune.c:1895: time[DIMARRAY_BIG] = 5000.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:87: return static_cast<T>(0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:94: return floatComplex(0.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:101: return doubleComplex(0.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:108: return static_cast<T>(1.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:115: return floatComplex(1.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:122: return doubleComplex(1.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:129: return static_cast<T>(2.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:136: return floatComplex(2.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:143: return doubleComplex(2.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:412: if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:413: return 0.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:420: if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:421: return 0.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:425:#define FLOAT_UPPER_BOUND pow(2.0, 23)
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:426:#define DOUBLE_UPPER_BOUND pow(2.0, 52)
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:252: rule.startSize = sqrt(M*N)+0.5;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:472:const float peakGflops = 5.24e3; // sp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:474:const float peakGflops = 2.62e3; // dp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:844: double gFlops = (1.0*totalFlops) / (1.0*totalNs);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:137: printf("MISMATCH C[%u][%u]: gpu= %4.1f + %4.1fi, cpu= %4.1f + %4.1fi\n",
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:142: printf("MISMATCH C[%u][%u]: gpu= %4.1f, cpu= %4.1f\n",
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:160:const float peakGflops = 5.24e3; // sp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:162:const float peakGflops = 2.62e3; // dp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:676: double gFlops = (1.0*totalFlops) / (1.0*timeNs);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:677: printf("%12llu flops in %12llu ns = %7.1f Gflop/s (%5.1f%% of peak)\n", totalFlops, timeNs, gFlops, 100*gFlops/peakGflops);
../clblas/src/library/blas/generic/solution_seq_make.c:771: /* This implementation assumes that alignment is the power of 2. */
../clblas/src/library/blas/generic/solution_seq_make.c:820: stepSize.y = (size_t)(size.y * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:836: stepSize.x = (size_t)(size.x * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:883: /* 1. Sort steps according to the number of CU they have */
../clblas/src/library/blas/generic/solution_seq_make.c:912: /* 2. Calculate rectangle sizes */
../clblas/src/library/blas/generic/solution_seq_make.c:935: stepSize.y = (size_t)(size.y * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:950: stepSize.x = (size_t)(size.x * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:362: C.d[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:363: C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:374: C.f[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:375: C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:484: tmp.d2.s[0] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[0] -
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:485: C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:486: tmp.d2.s[1] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[1] +
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:487: C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:488: C_naive.d[(i * N + j) * 2] = tmp.d2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:489: C_naive.d[(i * N + j) * 2 + 1] = tmp.d2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:542: tmp.f2.s[0] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[0] -
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:543: C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:544: tmp.f2.s[1] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[1] +
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:545: C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:546: C_naive.f[(i * N + j) * 2] = tmp.f2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:547: C_naive.f[(i * N + j) * 2 + 1] = tmp.f2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:634: alpha.f2.s[0] = 1;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:635: alpha.f2.s[1] = 0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:639: alpha.d2.s[0] = 1;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:640: alpha.d2.s[1] = 0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:703: alpha.f2.s[0] = cmdAlpha;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:704: alpha.f2.s[1] = -cmdAlpha / 2;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:707: alpha.d2.s[0] = cmdAlpha;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:708: alpha.d2.s[1] = -cmdAlpha / 2;
../clblas/src/library/blas/gens/tests/t_tilemul.c:80: "types. Default is 1.\n"
../clblas/src/library/blas/gens/tests/t_tilemul.c:571: C.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:572: C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:583: C.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:584: C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:598: C.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:599: C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:610: C.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:611: C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:699: val.s[0] = C_naive.d2[i * N + j].s[0] * alpha.d2.s[0] -
../clblas/src/library/blas/gens/tests/t_tilemul.c:700: C_naive.d2[i * N + j].s[1] * alpha.d2.s[1];
../clblas/src/library/blas/gens/tests/t_tilemul.c:701: val.s[1] = C_naive.d2[i * N + j].s[0] * alpha.d2.s[1] +
../clblas/src/library/blas/gens/tests/t_tilemul.c:702: C_naive.d2[i * N + j].s[1] * alpha.d2.s[0];
../clblas/src/library/blas/gens/tests/t_tilemul.c:744: val.s[0] = C_naive.f2[i * N + j].s[0] * alpha.f2.s[0] -
../clblas/src/library/blas/gens/tests/t_tilemul.c:745: C_naive.f2[i * N + j].s[1] * alpha.f2.s[1];
../clblas/src/library/blas/gens/tests/t_tilemul.c:746: val.s[1] = C_naive.f2[i * N + j].s[0] * alpha.f2.s[1] +
../clblas/src/library/blas/gens/tests/t_tilemul.c:747: C_naive.f2[i * N + j].s[1] * alpha.f2.s[0];
../clblas/src/library/blas/gens/tests/t_tilemul.c:784: printf("(%4.1f, %4.1f) ", aik.s[0], aik.s[1]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:789: printf("%4.1f ", aik);
../clblas/src/library/blas/gens/tests/t_tilemul.c:801: printf("(%4.1f, %4.1f) ", bkj.s[0], bkj.s[1]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:806: printf("%4.1f ", bkj);
../clblas/src/library/blas/gens/tests/t_tilemul.c:816: printf("(%4.1f, %4.1f) ",
../clblas/src/library/blas/gens/tests/t_tilemul.c:821: printf("%4.1f ", C_naive.f[i * N + j]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:831: printf("(%4.1f, %4.1f) ",
../clblas/src/library/blas/gens/tests/t_tilemul.c:835: printf("%4.1f ", C.f[i * N + j]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:952: alpha.f2.s[0] = 1;
../clblas/src/library/blas/gens/tests/t_tilemul.c:953: alpha.f2.s[1] = 0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:957: alpha.d2.s[0] = 1;
../clblas/src/library/blas/gens/tests/t_tilemul.c:958: alpha.d2.s[1] = 0;
More information about the debian-science-maintainers
mailing list