Bug#877316: #877316: clblas: Crashes on single-precision-only hardware, due to double-precision literals

Rebecca N. Palmer rebecca_palmer at zoho.com
Sun Jan 27 17:54:47 GMT 2019


The attached patch fixes this issue in sgemm, which is enough to pass 
the libgpuarray tests.  (This is the only testing it's had, as the 
clblas package doesn't appear to run its own tests.)

I don't know if any other operations (that libgpuarray doesn't use) are 
affected, but there are suspicious-looking instances in at least her(2) 
and rot(m)g; see attached list.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 877316.patch
Type: text/x-diff
Size: 3103 bytes
Desc: not available
URL: <http://alioth-lists.debian.net/pipermail/debian-science-maintainers/attachments/20190127/cfb307d9/attachment-0001.patch>
-------------- next part --------------
$ grep -rniE -e "[0-9]\.[0-9]*[^f]" ../clblas/src/library
then some manual editing

../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu.cl:14:#define ZERO              ( 0.0)
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu.cl:15:#define ONE               ( 1.0)
../clblas/src/library/blas/gens/clTemplates/her2.cl:18://NOTE: THIS FILE IS NOT USED. SEE SYR2_HER2.CLT
../clblas/src/library/blas/gens/clTemplates/her2.cl:50:	if( (alpha.even == 0.0) && (alpha.odd == 0.0) )
../clblas/src/library/blas/gens/clTemplates/her2.cl:156: /* HER2 defn: On output, if alpha not equal to 0.0, then imaginary part of A is set to zero. */
../clblas/src/library/blas/gens/clTemplates/her2.cl:158:			res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/her2.cl:372:	if( (alpha.even == 0.0) && (alpha.odd == 0.0) )
../clblas/src/library/blas/gens/clTemplates/her2.cl:479: /* HER2 defn: On output, if alpha not equal to 0.0, then imaginary part of A is set to zero. */
../clblas/src/library/blas/gens/clTemplates/her2.cl:481:			res2.odd = (r == c) ? 0.0 : res2.odd;
../clblas/src/library/blas/gens/clTemplates/her.cl:129:	these values. On output, if alpha not equal to 0.0, they are set to zero. */
../clblas/src/library/blas/gens/clTemplates/her.cl:131:			res.odd = ((r == c) && (alpha != 0.0)) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/her.cl:384:	these values. On output, if alpha not equal to 0.0, they are set to zero. */
../clblas/src/library/blas/gens/clTemplates/her.cl:386:            res.odd = ((r == c) && (alpha != 0.0)) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/rotg.cl:28:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotg.cl:29:#define PZERO (%PTYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotg.cl:56:	            Creg = 1.0;
../clblas/src/library/blas/gens/clTemplates/rotg.cl:71:	                    ( (isnotequal(Creg, ZERO))? (1.0/Creg): 1.0 );
../clblas/src/library/blas/gens/clTemplates/rotg.cl:87:	            Sreg = (%TYPE)(1.0, 0.0);
../clblas/src/library/blas/gens/clTemplates/asum.cl:29:    %TYPE asum = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/asum.cl:33:            scratchBuff[0] = (%PTYPE)0.0;
../clblas/src/library/blas/gens/clTemplates/rotm.cl:28:#define ZERO    (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotm.cl:29:#define ONE     (%TYPE)1.0
../clblas/src/library/blas/gens/clTemplates/rotm.cl:30:#define TWO     (%TYPE)2.0
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu192.cl:8:#define ZERO              ( 0.0)
../clblas/src/library/blas/gens/clTemplates/dtrsm_gpu192.cl:9:#define ONE               ( 1.0)



../clblas/src/library/blas/gens/clTemplates/trmv.cl:98:		//float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:99:		%TYPE acc 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:100:		%TYPE accTemp 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:128:				    // accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:148:		%TYPE sumTemp= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:209:			//float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:210:			%TYPE acc 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:211:			%TYPE accTemp 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:248:            	        //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:349:		//float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:350:		%TYPE acc 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:351:		%TYPE accTemp 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:378:                    //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:398:		%TYPE sumTemp= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:460:			//float acc = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:461:			%TYPE acc 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:462:			%TYPE accTemp 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:497:                        //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:590:	%TYPE accTemp= %INIT( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:637:	%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:638:	%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:678:            	    //loadedA.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:775:			%TYPE sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:776:			%TYPE accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:802:                	    //accTemp.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/trmv.cl:823:		// float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:824:		%TYPE accTemp = %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:871:		//acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:872:		%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:873:		%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trmv.cl:910:                        //loadedA.odd = 0.0f;
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:68:        sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:69:        localRed[ lId ] = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:109:                        reg1.odd = 0.0;                 // Imaginary part of diagonal is assumed to be zero
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:151:        sum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:225:    %TYPE thrSum = %MAKEVEC(0.0); //Private sum for each thread
../clblas/src/library/blas/gens/clTemplates/gbmv.cl:273:        %TYPE tempSum = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/dot.cl:33:    %TYPE dotP = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:33:    %TYPE redVal = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:168:    #define MIN 0x1.0p-1022         // Min in case of d/z (values from khronos site)
../clblas/src/library/blas/gens/clTemplates/reduction.cl:170:    #define MIN 0x1.0p-126f         // Min in case od s/c
../clblas/src/library/blas/gens/clTemplates/reduction.cl:237:    %TYPE redVal = (%TYPE) 0.0;
../clblas/src/library/blas/gens/clTemplates/reduction.cl:275:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/reduction.cl:317:    %TYPE ssq = (%TYPE) 0.0;

../clblas/src/library/blas/gens/clTemplates/trsv.cl:63:    %TYPE sum     = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:64:    %TYPE xVal    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:65:    %TYPE loadedA     = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:164:    %TYPE sum     = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:165:    %TYPE xVal    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:166:    %TYPE loadedA     = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:286:        %TYPE diagA = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv.cl:393:        %TYPE diagA = %INIT(0.0);

../clblas/src/library/blas/gens/clTemplates/syr2_her2.cl:175:				res2.odd = (r == c) ? 0.0 : res2.odd;

../clblas/src/library/blas/gens/clTemplates/syr2_her2.cl:541:				res2.odd = (r == c) ? 0.0 : res2.odd;

../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:54:	%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:55:	%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:138:		%TYPE   sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:179:			%TYPE sumTemp 	    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:290:				%TYPE   accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:354:	%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:355:	%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:443:		%TYPE   sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:484:			%TYPE sumTemp 	    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:602:				%TYPE   accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:668:	%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:669:	%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:764:		%TYPE   sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:805:			%TYPE sumTemp 	    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:914:				%TYPE   accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:977:	%TYPE sum 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:978:	%TYPE loadedA 	= %MAKEVEC( 0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1066:		%TYPE   sumTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1107:			%TYPE sumTemp 	    = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1223:				%TYPE   accTemp = %MAKEVEC(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1346:	accTemp = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1366:		sacc = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1451:	accTemp = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/trsv_gemv.cl:1471:		sacc = %INIT(0.0);
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:38:            scratchBuff[0] = (%PTYPE)0.0;
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:44:    %TYPE%V res = (%TYPE%V) 0.0;
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:74:        nrm2_ptype = hypot( nrm2.even, nrm2.odd );
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:98:#define PZERO (%PTYPE)0.0
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:99:#define ZERO (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/nrm2.cl:100:#define VZERO (%TYPE%V)0.0


../clblas/src/library/blas/gens/clTemplates/syr_her.cl:48:	if(alpha == 0.0)
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:143:				res.odd = (r == c) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:328:	if(alpha == 0.0)
../clblas/src/library/blas/gens/clTemplates/syr_her.cl:422:				res.odd = (r == c) ? 0.0 : res.odd;
../clblas/src/library/blas/gens/clTemplates/iamax.cl:52:            scratchBufVal[0] = (%PTYPE)0.0;

../clblas/src/library/blas/gens/clTemplates/rotmg.cl:29:#define ZERO    (%TYPE)0.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:30:#define ONE     (%TYPE)1.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:31:#define TWO     (%TYPE)2.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:33:#define GAM     (%TYPE)4096.0
../clblas/src/library/blas/gens/clTemplates/rotmg.cl:35:#define RGAMSQ  (%TYPE)( 1.0 / GAMSQ )
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:289:        plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:290:        plB[16] = CurrentOffSetB+16>=N?0.0:B[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:292:	    plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:293:        plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:634:        plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:635:        plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:637:	    plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:638:        plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:976:      plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:977:      plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:979:      plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/gens/clTemplates/sgemm_gcn_SmallMatrices.cl:980:      plA[16] = CurrentOffSetA+16>=M?0.0:A[16*lda];


../clblas/src/library/blas/gens/legacy/trsm_img.c:337:        kgenAddStmt(ctx, "*y = trunc((-0.5 + sqrt(2.0 * n + 0.25)));\n");
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:45:            revAlp = "div((double2)(-1., 0), alpha)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:46:            alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:50:        revAlp = "-1. / alpha";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:51:        alp = "1.";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:111:            alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/legacy/trsm_kgen_legacy.c:115:        alp = "1.";


../clblas/src/library/blas/gens/kprintf.cpp:1619:        numCharsWritten = sprintf(dst,"%s.odd = 0.0f", id1);
../clblas/src/library/blas/gens/kprintf.cpp:1951:                                        numCharsWritten = sprintf(dst, "\t %s = (isnotequal(%s, (%s)0.0))?\n", p3, p2, (get("%PTYPE").value));
../clblas/src/library/blas/gens/kprintf.cpp:2042:                                        numCharsWritten = sprintf(dst, "\t %s = (isnotequal(%s, (%s)0.0))?\n", p3, p2, (get("%PTYPE").value));


../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:37:    return static_cast<T>(0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:44:    return floatComplex(0.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:51:    return doubleComplex(0.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:58:    return static_cast<T>(1.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:65:    return floatComplex(1.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:72:    return doubleComplex(1.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:79:    return static_cast<T>(2.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:86:    return floatComplex(2.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:93:    return doubleComplex(2.0, 0.0);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:362:    if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:363:        return 0.0;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:370:    if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:371:        return 0.0;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:375:#define FLOAT_UPPER_BOUND   pow(2.0, 23)
../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h:376:#define DOUBLE_UPPER_BOUND  pow(2.0, 52)


../clblas/src/library/blas/AutoGemm/AutoGemmTools/AutoGemmPreCompileKernels.cpp:824:	  beta = 1.0;
../clblas/src/library/blas/xher2k.c:137:            CIMAG( kargs->alpha.argFloatComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:138:            CREAL( kargs->beta.argFloatComplex ) = 1.0;
../clblas/src/library/blas/xher2k.c:139:            CIMAG( kargs->beta.argFloatComplex ) = 0.0;
../clblas/src/library/blas/xher2k.c:143:            CIMAG( kargs->alpha.argDoubleComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:144:            CREAL( kargs->beta.argDoubleComplex ) = 1.0;
../clblas/src/library/blas/xher2k.c:145:            CIMAG( kargs->beta.argDoubleComplex ) = 0.0;
../clblas/src/library/blas/xher2k.c:184:    CIMAG(fBeta)  = 0.0f;
../clblas/src/library/blas/xher2k.c:192:        CIMAG( kargs.alpha.argFloatComplex ) *= -1.0;
../clblas/src/library/blas/xher2k.c:230:    CIMAG(fBeta)  = 0.0f;
../clblas/src/library/blas/xher2k.c:239:        CIMAG( kargs.alpha.argDoubleComplex ) *= -1.0;
../clblas/src/library/blas/xherk.c:158:    CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/xherk.c:160:    CIMAG(fBeta)  = 0.0f;
../clblas/src/library/blas/xherk.c:198:    CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/xherk.c:200:    CIMAG(fBeta)  = 0.0f;




../clblas/src/library/blas/functor/gpu_dtrsm.cc:361:  double zero = 0.0 ; 
../clblas/src/library/blas/functor/gpu_dtrsm.cc:409:  double neg_one = -1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:410:  double one     =  1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm.cc:411:  double zero    =  0.0 ; 
../clblas/src/library/blas/functor/functor_xscal.cc:318:  CIMAG(fAlpha) = 0.0f;
../clblas/src/library/blas/functor/functor_xscal.cc:372:  CIMAG(fAlpha) = 0.0f;


../clblas/src/library/blas/functor/gpu_dtrsm192.cc:320:  double zero = 0.0 ; 
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:368:  double neg_one = -1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:369:  double one     =  1.0 ;
../clblas/src/library/blas/functor/gpu_dtrsm192.cc:370:  double zero    =  0.0 ; 

../clblas/src/library/blas/xtbsv.c:699:    kargs.alpha.argFloat = -1.0;
../clblas/src/library/blas/xtbsv.c:700:    kargs.beta.argFloat = 1.0;
../clblas/src/library/blas/xtbsv.c:734:    kargs.alpha.argDouble = -1.0;
../clblas/src/library/blas/xtbsv.c:735:    kargs.beta.argDouble = 1.0;
../clblas/src/library/blas/xtbsv.c:771:    CREAL(alpha) = -1.0;
../clblas/src/library/blas/xtbsv.c:772:    CIMAG(alpha) = 0.0;
../clblas/src/library/blas/xtbsv.c:773:    CREAL(beta) = 1.0;
../clblas/src/library/blas/xtbsv.c:774:    CIMAG(beta) = 0.0;
../clblas/src/library/blas/xtbsv.c:813:    CREAL(alpha) = -1.0;
../clblas/src/library/blas/xtbsv.c:814:    CIMAG(alpha) = 0.0;
../clblas/src/library/blas/xtbsv.c:815:    CREAL(beta) = 1.0;
../clblas/src/library/blas/xtbsv.c:816:    CIMAG(beta) = 0.0;
../clblas/src/library/blas/xsymm.c:234:            GEMMTArgs.beta.argFloat = 1.0f;
../clblas/src/library/blas/xsymm.c:235:            GEMMDArgs.beta.argFloat = 1.0f;
../clblas/src/library/blas/xsymm.c:239:            GEMMTArgs.beta.argDouble = 1.0;
../clblas/src/library/blas/xsymm.c:240:            GEMMDArgs.beta.argDouble = 1.0;
../clblas/src/library/blas/xsymm.c:244:            CREAL(cBeta) = 1.0f;
../clblas/src/library/blas/xsymm.c:245:            CIMAG(cBeta) = 0.0f;
../clblas/src/library/blas/xsymm.c:251:            CREAL(zBeta) = 1.0;
../clblas/src/library/blas/xsymm.c:252:            CIMAG(zBeta) = 0.0;



../clblas/src/library/blas/gens/trsm.c:836:            revAlp = "div((double2)(-1., 0), alpha)";
../clblas/src/library/blas/gens/trsm.c:837:            alp = "(double2)(1., 0)";
../clblas/src/library/blas/gens/trsm.c:841:        revAlp = "-1. / alpha";
../clblas/src/library/blas/gens/trsm.c:842:        alp = "1.";



../clblas/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp:19:#define ZERO              ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp:20:#define ONE               ( 1.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp:44:#define ZERO              ( 0.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp:45:#define ONE               ( 1.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp:21:#define ZERO              ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp:22:#define ONE               ( 1.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp:19:#define ZERO              ( 0.0) \n
../clblas/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp:20:#define ONE               ( 1.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp:42:#define ZERO              ( 0.0) \n
../clblas/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp:43:#define ONE               ( 1.0) \n
../clblas/src/library/blas/xtrsm.cc:280:	double zero = 0.0;
../clblas/src/library/blas/xtrsm.cc:615:			double neg_one = -1.0;
../clblas/src/library/blas/xtrsm.cc:616:			double one = 1.0;
../clblas/src/library/blas/xtrsm.cc:617:			double zero = 0.0;
../clblas/src/library/blas/xtrsm.cc:1176:	double neg_one = -1.0;
../clblas/src/library/blas/xtrsm.cc:1177:	double one = 1.0;
../clblas/src/library/blas/xtrsm.cc:1178:	double zero = 0.0;

../clblas/src/library/common/devinfo-cache.c:57:    "    sum = (float4)(0.0);                                       \n"
../clblas/src/library/common/devinfo-cache.c:103:    "    sum = (float4)(0.0);                                       \n"

Done:

../clblas/src/library/blas/AutoGemm/KernelOpenCL.py:366:    zeroString = "(double2)(0.0, 0.0)"
../clblas/src/library/blas/AutoGemm/KernelOpenCL.py:368:    zeroString = "0.0"
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:89:        plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:90:        plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:92:	    plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp:93:        plA[16] = CurrentOffSetA+16>=M?0.0:A[16];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:88:      plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:89:      plB[16] = CurrentOffSetB+16>=N?0.0:B[16*ldb];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:91:      plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp:92:      plA[16] = CurrentOffSetA+16>=M?0.0:A[16*lda];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:88:        plB[0]  = CurrentOffSetB>=N?0.0:B[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:89:        plB[16] = CurrentOffSetB+16>=N?0.0:B[16];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:91:	    plA[0]  = CurrentOffSetA>=M?0.0:A[0];
../clblas/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp:92:        plA[16] = CurrentOffSetA+16>=M?0.0:A[16];

Presumed (but not checked) not kernel code:

../clblas/src/library/common/tests/t_dblock_kgen.c:116:const float boundMarker = 5.0;
../clblas/src/library/common/tests/t_dblock_kgen.c:788:        imageWidth = fl4RowWidth(tdesc->dim.x * 3.5, tsize);

../clblas/src/library/tools/tune/dimension.c:91:            step = 2.8f * step;
../clblas/src/library/tools/tune/subdim.c:411:    const double K_INCREASE = 1.5;
../clblas/src/library/tools/tune/subdim.c:412:    const double K_GLOBAL = 0.97;
../clblas/src/library/tools/tune/subdim.c:480:    maxTime = fmax(2.1*midTime - sd->minTime,  sd->minTime*5);
../clblas/src/library/tools/tune/subdim.c:519:            double kgroup = 1.0;
../clblas/src/library/tools/tune/subdim.c:534:                        kgroup *= 1.1;
../clblas/src/library/tools/tune/subdim.c:548:        if (vi->time == 0 && vi->weight >= 0.01 ) {
../clblas/src/library/tools/tune/subdim.c:720:        sd->allVariant[i].minTime = 0.0;
../clblas/src/library/tools/tune/subdim.c:721:        sd->allVariant[i].probableTime = 0.0;
../clblas/src/library/tools/tune/subdim.c:722:        sd->allVariant[i].maxTime = 5000.0;

../clblas/src/library/tools/tune/storage_io.c:153:    else if (bParam->time > 10000.0) {

../clblas/src/library/tools/tune/tune.c:632:        args->alpha.argFloat = 1.0;
../clblas/src/library/tools/tune/tune.c:636:        args->alpha.argDouble = 1.0;
../clblas/src/library/tools/tune/tune.c:640:        args->alpha.argFloatComplex.s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:641:        args->alpha.argFloatComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:643:        args->beta.argFloatComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:647:        args->alpha.argDoubleComplex.s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:648:        args->alpha.argDoubleComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:650:        args->beta.argDoubleComplex.s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:851:        m->f[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:891:                mi->A.d[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:894:                mi->B.d[i] = 1.0;
../clblas/src/library/tools/tune/tune.c:900:                mi->A.f2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:901:                mi->A.f2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:904:                mi->B.f2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:905:                mi->B.f2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:911:                mi->A.d2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:912:                mi->A.d2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:915:                mi->B.d2[i].s[0] = 1.0;
../clblas/src/library/tools/tune/tune.c:916:                mi->B.d2[i].s[1] = 0.0;
../clblas/src/library/tools/tune/tune.c:1011:        if (fabs(t - oldt) < 0.0001) {
../clblas/src/library/tools/tune/tune.c:1893:    time[DIMARRAY_SMALL] = 5000.0;
../clblas/src/library/tools/tune/tune.c:1894:    time[DIMARRAY_MIDDLE] = 5000.0;
../clblas/src/library/tools/tune/tune.c:1895:    time[DIMARRAY_BIG] = 5000.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:87:    return static_cast<T>(0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:94:    return floatComplex(0.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:101:    return doubleComplex(0.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:108:    return static_cast<T>(1.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:115:    return floatComplex(1.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:122:    return doubleComplex(1.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:129:    return static_cast<T>(2.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:136:    return floatComplex(2.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:143:    return doubleComplex(2.0, 0.0);
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:412:    if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:413:        return 0.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:420:    if ((CREAL(a) == 0.0) && (CIMAG(a) == 0.0))
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:421:        return 0.0;
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:425:#define FLOAT_UPPER_BOUND   pow(2.0, 23)
../clblas/src/library/tools/ktest/naive/naive_blas.cpp:426:#define DOUBLE_UPPER_BOUND  pow(2.0, 52)



../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:252:      rule.startSize = sqrt(M*N)+0.5;
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:472:const float peakGflops = 5.24e3; // sp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:474:const float peakGflops = 2.62e3; // dp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp:844:    double gFlops = (1.0*totalFlops) / (1.0*totalNs);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:137:                printf("MISMATCH C[%u][%u]: gpu= %4.1f + %4.1fi, cpu= %4.1f + %4.1fi\n",
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:142:                printf("MISMATCH C[%u][%u]: gpu= %4.1f, cpu= %4.1f\n",
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:160:const float peakGflops = 5.24e3; // sp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:162:const float peakGflops = 2.62e3; // dp for W9100
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:676:      double gFlops = (1.0*totalFlops) / (1.0*timeNs);
../clblas/src/library/blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp:677:      printf("%12llu flops in %12llu ns = %7.1f Gflop/s (%5.1f%% of peak)\n", totalFlops, timeNs, gFlops, 100*gFlops/peakGflops);

../clblas/src/library/blas/generic/solution_seq_make.c:771:    /* This implementation assumes that alignment is the power of 2. */
../clblas/src/library/blas/generic/solution_seq_make.c:820:                stepSize.y = (size_t)(size.y * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:836:                stepSize.x = (size_t)(size.x * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:883:     /* 1. Sort steps according to the number of CU they have */
../clblas/src/library/blas/generic/solution_seq_make.c:912:     /* 2. Calculate rectangle sizes */
../clblas/src/library/blas/generic/solution_seq_make.c:935:                 stepSize.y = (size_t)(size.y * (double)nrCU / totalCUs + 0.5);
../clblas/src/library/blas/generic/solution_seq_make.c:950:                 stepSize.x = (size_t)(size.x * (double)nrCU / totalCUs + 0.5);





../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:362:            C.d[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:363:            C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:374:            C.f[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:375:            C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:484:                    tmp.d2.s[0] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[0] -
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:485:                                  C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:486:                    tmp.d2.s[1] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[1] +
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:487:                                  C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:488:                    C_naive.d[(i * N + j) * 2] = tmp.d2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:489:                    C_naive.d[(i * N + j) * 2 + 1] = tmp.d2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:542:                    tmp.f2.s[0] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[0] -
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:543:                                  C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:544:                    tmp.f2.s[1] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[1] +
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:545:                                  C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:546:                    C_naive.f[(i * N + j) * 2] = tmp.f2.s[0];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:547:                    C_naive.f[(i * N + j) * 2 + 1] = tmp.f2.s[1];
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:634:        alpha.f2.s[0] = 1;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:635:        alpha.f2.s[1] = 0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:639:        alpha.d2.s[0] = 1;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:640:        alpha.d2.s[1] = 0;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:703:            alpha.f2.s[0] = cmdAlpha;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:704:            alpha.f2.s[1] = -cmdAlpha / 2;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:707:            alpha.d2.s[0] = cmdAlpha;
../clblas/src/library/blas/gens/legacy/tests/t_blkmul.c:708:            alpha.d2.s[1] = -cmdAlpha / 2;


../clblas/src/library/blas/gens/tests/t_tilemul.c:80:            "types. Default is 1.\n"
../clblas/src/library/blas/gens/tests/t_tilemul.c:571:            C.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:572:            C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:583:            C.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:584:            C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:598:            C.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:599:            C_naive.d[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:610:            C.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:611:            C_naive.f[i] = 0.0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:699:                    val.s[0] = C_naive.d2[i * N + j].s[0] * alpha.d2.s[0] -
../clblas/src/library/blas/gens/tests/t_tilemul.c:700:                            C_naive.d2[i * N + j].s[1] * alpha.d2.s[1];
../clblas/src/library/blas/gens/tests/t_tilemul.c:701:                    val.s[1] = C_naive.d2[i * N + j].s[0] * alpha.d2.s[1] +
../clblas/src/library/blas/gens/tests/t_tilemul.c:702:                            C_naive.d2[i * N + j].s[1] * alpha.d2.s[0];
../clblas/src/library/blas/gens/tests/t_tilemul.c:744:                    val.s[0] = C_naive.f2[i * N + j].s[0] * alpha.f2.s[0] -
../clblas/src/library/blas/gens/tests/t_tilemul.c:745:                            C_naive.f2[i * N + j].s[1] * alpha.f2.s[1];
../clblas/src/library/blas/gens/tests/t_tilemul.c:746:                    val.s[1] = C_naive.f2[i * N + j].s[0] * alpha.f2.s[1] +
../clblas/src/library/blas/gens/tests/t_tilemul.c:747:                            C_naive.f2[i * N + j].s[1] * alpha.f2.s[0];
../clblas/src/library/blas/gens/tests/t_tilemul.c:784:                        printf("(%4.1f, %4.1f) ", aik.s[0], aik.s[1]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:789:                        printf("%4.1f ", aik);
../clblas/src/library/blas/gens/tests/t_tilemul.c:801:                        printf("(%4.1f, %4.1f) ", bkj.s[0], bkj.s[1]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:806:                        printf("%4.1f ", bkj);
../clblas/src/library/blas/gens/tests/t_tilemul.c:816:                        printf("(%4.1f, %4.1f) ",
../clblas/src/library/blas/gens/tests/t_tilemul.c:821:                        printf("%4.1f ", C_naive.f[i * N + j]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:831:                        printf("(%4.1f, %4.1f) ",
../clblas/src/library/blas/gens/tests/t_tilemul.c:835:                        printf("%4.1f ", C.f[i * N + j]);
../clblas/src/library/blas/gens/tests/t_tilemul.c:952:                alpha.f2.s[0] = 1;
../clblas/src/library/blas/gens/tests/t_tilemul.c:953:                alpha.f2.s[1] = 0;
../clblas/src/library/blas/gens/tests/t_tilemul.c:957:                alpha.d2.s[0] = 1;
../clblas/src/library/blas/gens/tests/t_tilemul.c:958:                alpha.d2.s[1] = 0;




More information about the debian-science-maintainers mailing list