[Pkg-opencl-devel] [beignet] 45/66: Imported Upstream version 0.2+git20130710+613e829
Andreas Beckmann
anbe at moszumanska.debian.org
Fri Oct 31 07:27:07 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch master
in repository beignet.
commit 00202174a6b5a23e3c4157b204fe0dac1f8d1f9b
Author: Simon Richter <sjr at debian.org>
Date: Wed Jul 10 15:25:14 2013 +0200
Imported Upstream version 0.2+git20130710+613e829
---
backend/src/llvm/llvm_gen_backend.cpp | 17 +++++++++++
backend/src/ocl_stdlib.h | 53 +++++++++++++++++++++++++++++++++++
kernels/compiler_degrees.cl | 4 +++
kernels/compiler_global_constant.cl | 3 +-
kernels/compiler_mad24.cl | 4 +++
kernels/compiler_mul24.cl | 4 +++
kernels/compiler_radians.cl | 4 +++
src/cl_gt_device.h | 2 +-
utests/CMakeLists.txt | 4 +++
utests/compiler_degrees.cpp | 32 +++++++++++++++++++++
utests/compiler_mad24.cpp | 41 +++++++++++++++++++++++++++
utests/compiler_mul24.cpp | 36 ++++++++++++++++++++++++
utests/compiler_radians.cpp | 32 +++++++++++++++++++++
13 files changed, 234 insertions(+), 2 deletions(-)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index cebe0f4..c8c5484 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -818,6 +818,23 @@ namespace gbe
uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
return ir::Register(reg);
}
+ if (isa<ConstantExpr>(CPV)) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV);
+ GBE_ASSERT(CE->isGEPWithNoNotionalOverIndexing());
+ auto pointer = CE->getOperand(0);
+ auto offset1 = dyn_cast<ConstantInt>(CE->getOperand(1));
+ GBE_ASSERT(offset1->getZExtValue() == 0);
+ auto offset2 = dyn_cast<ConstantInt>(CE->getOperand(2));
+ int type_size = pointer->getType()->getTypeID() == Type::TypeID::DoubleTyID ? sizeof(double) : sizeof(int);
+ int type_offset = offset2->getSExtValue() * type_size;
+ auto pointer_name = pointer->getName().str();
+ ir::Register pointer_reg = ir::Register(unit.getConstantSet().getConstant(pointer_name).getReg());
+ ir::Register offset_reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.LOADI(ir::Type::TYPE_S32, offset_reg, ctx.newIntegerImmediate(type_offset, ir::Type::TYPE_S32));
+ ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.ADD(ir::Type::TYPE_S32, reg, pointer_reg, offset_reg);
+ return reg;
+ }
const ir::ImmediateIndex immIndex = this->newImmediate(CPV, elemID);
const ir::Immediate imm = ctx.getImmediate(immIndex);
const ir::Register reg = ctx.reg(getFamily(imm.type));
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 09f92d0..0c78c8e 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4408,6 +4408,46 @@ DEF(16)
#undef DEC8
#undef DEC16
+INLINE_OVERLOADABLE int mul24(int a, int b) { return ((a << 8) >> 8) * ((b << 8) >> 8); }
+INLINE_OVERLOADABLE uint mul24(uint a, uint b) { return (a & 0xFFFFFF) * (b & 0xFFFFFF); }
+#define DEC2(type) INLINE_OVERLOADABLE type##2 mul24(type##2 a, type##2 b) { return (type##2)(mul24(a.s0, b.s0), mul24(a.s1, b.s1)); }
+#define DEC3(type) INLINE_OVERLOADABLE type##3 mul24(type##3 a, type##3 b) { return (type##3)(mul24(a.s0, b.s0), mul24(a.s1, b.s1), mul24(a.s2, b.s2)); }
+#define DEC4(type) INLINE_OVERLOADABLE type##4 mul24(type##4 a, type##4 b) { return (type##4)(mul24(a.s0, b.s0), mul24(a.s1, b.s1), mul24(a.s2, b.s2), mul24(a.s3, b.s3)); }
+#define DEC8(type) INLINE_OVERLOADABLE type##8 mul24(type##8 a, type##8 b) { return (type##8)(mul24(a.s0, b.s0), mul24(a.s1, b.s1), mul24(a.s2, b.s2), mul24(a.s3, b.s3), mul24(a.s4, b.s4), mul24(a.s5, b.s5), mul24(a.s6, b.s6), mul24(a.s7, b.s7)); }
+#define DEC16(type) INLINE_OVERLOADABLE type##16 mul24(type##16 a, type##16 b) { return (type##16)(mul24(a.s0, b.s0), mul24(a.s1, b.s1), mul24(a.s2, b.s2), mul24(a.s3, b.s3), mul24(a.s4, b.s4), mul24(a.s5, b.s5), mul24(a.s6, b.s6), mul24(a.s7, b.s7), mul24(a.s8, b.s8), mul24(a.s9, b.s9), mul24(a.sa, b.sa), mul24(a.sb, b.sb), mul24(a.sc, b.sc), mul24(a.sd, b.sd), mul24(a.se, b.se), mul24(a.sf, b.sf)); }
+#define DEF(n) DEC##n(int); DEC##n(uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
+INLINE_OVERLOADABLE int mad24(int a, int b, int c) { return mul24(a, b) + c; }
+INLINE_OVERLOADABLE uint mad24(uint a, uint b, uint c) { return mul24(a, b) + c; }
+#define DEC2(type) INLINE_OVERLOADABLE type##2 mad24(type##2 a, type##2 b, type##2 c) { return (type##2)(mad24(a.s0, b.s0, c.s0), mad24(a.s1, b.s1, c.s1)); }
+#define DEC3(type) INLINE_OVERLOADABLE type##3 mad24(type##3 a, type##3 b, type##3 c) { return (type##3)(mad24(a.s0, b.s0, c.s0), mad24(a.s1, b.s1, c.s1), mad24(a.s2, b.s2, c.s2)); }
+#define DEC4(type) INLINE_OVERLOADABLE type##4 mad24(type##4 a, type##4 b, type##4 c) { return (type##4)(mad24(a.s0, b.s0, c.s0), mad24(a.s1, b.s1, c.s1), mad24(a.s2, b.s2, c.s2), mad24(a.s3, b.s3, c.s3)); }
+#define DEC8(type) INLINE_OVERLOADABLE type##8 mad24(type##8 a, type##8 b, type##8 c) { return (type##8)(mad24(a.s0, b.s0, c.s0), mad24(a.s1, b.s1, c.s1), mad24(a.s2, b.s2, c.s2), mad24(a.s3, b.s3, c.s3), mad24(a.s4, b.s4, c.s4), mad24(a.s5, b.s5, c.s5), mad24(a.s6, b.s6, c.s6), mad24(a.s7, b.s7, c.s7)); }
+#define DEC16(type) INLINE_OVERLOADABLE type##16 mad24(type##16 a, type##16 b, type##16 c) { return (type##16)(mad24(a.s0, b.s0, c.s0), mad24(a.s1, b.s1, c.s1), mad24(a.s2, b.s2, c.s2), mad24(a.s3, b.s3, c.s3), mad24(a.s4, b.s4, c.s4), mad24(a.s5, b.s5, c.s5), mad24(a.s6, b.s6, c.s6), mad24(a.s7, b.s7, c.s7), mad24(a.s8, b.s8, c.s8), mad24(a.s9, b.s9, c.s9), mad24(a.sa, b.sa, c.sa), mad24(a.sb, b.sb, c.sb), mad24(a.sc, b.sc, c.sc), mad24(a.sd, b.sd, c.sd), mad24(a.se, b.se, c.se), mad24( [...]
+#define DEF(n) DEC##n(int); DEC##n(uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
INLINE_OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); }
INLINE_OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); }
@@ -5019,6 +5059,19 @@ DECL_MIN_MAX_CLAMP(unsigned short)
DECL_MIN_MAX_CLAMP(unsigned char)
#undef DECL_MIN_MAX_CLAMP
+INLINE_OVERLOADABLE float degrees(float radians) { return (180 / M_PI_F) * radians; }
+INLINE_OVERLOADABLE float2 degrees(float2 r) { return (float2)(degrees(r.s0), degrees(r.s1)); }
+INLINE_OVERLOADABLE float3 degrees(float3 r) { return (float3)(degrees(r.s0), degrees(r.s1), degrees(r.s2)); }
+INLINE_OVERLOADABLE float4 degrees(float4 r) { return (float4)(degrees(r.s0), degrees(r.s1), degrees(r.s2), degrees(r.s3)); }
+INLINE_OVERLOADABLE float8 degrees(float8 r) { return (float8)(degrees(r.s0), degrees(r.s1), degrees(r.s2), degrees(r.s3), degrees(r.s4), degrees(r.s5), degrees(r.s6), degrees(r.s7)); }
+INLINE_OVERLOADABLE float16 degrees(float16 r) { return (float16)(degrees(r.s0), degrees(r.s1), degrees(r.s2), degrees(r.s3), degrees(r.s4), degrees(r.s5), degrees(r.s6), degrees(r.s7), degrees(r.s8), degrees(r.s9), degrees(r.sa), degrees(r.sb), degrees(r.sc), degrees(r.sd), degrees(r.se), degrees(r.sf)); }
+INLINE_OVERLOADABLE float radians(float degrees) { return (M_PI_F / 180) * degrees; }
+INLINE_OVERLOADABLE float2 radians(float2 r) { return (float2)(radians(r.s0), radians(r.s1)); }
+INLINE_OVERLOADABLE float3 radians(float3 r) { return (float3)(radians(r.s0), radians(r.s1), radians(r.s2)); }
+INLINE_OVERLOADABLE float4 radians(float4 r) { return (float4)(radians(r.s0), radians(r.s1), radians(r.s2), radians(r.s3)); }
+INLINE_OVERLOADABLE float8 radians(float8 r) { return (float8)(radians(r.s0), radians(r.s1), radians(r.s2), radians(r.s3), radians(r.s4), radians(r.s5), radians(r.s6), radians(r.s7)); }
+INLINE_OVERLOADABLE float16 radians(float16 r) { return (float16)(radians(r.s0), radians(r.s1), radians(r.s2), radians(r.s3), radians(r.s4), radians(r.s5), radians(r.s6), radians(r.s7), radians(r.s8), radians(r.s9), radians(r.sa), radians(r.sb), radians(r.sc), radians(r.sd), radians(r.se), radians(r.sf)); }
+
INLINE_OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
INLINE_OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
INLINE_OVERLOADABLE float __gen_ocl_internal_maxmag(float x, float y) {
diff --git a/kernels/compiler_degrees.cl b/kernels/compiler_degrees.cl
new file mode 100644
index 0000000..5fad995
--- /dev/null
+++ b/kernels/compiler_degrees.cl
@@ -0,0 +1,4 @@
+kernel void compiler_degrees(global float *src, global float *dst) {
+ int i = get_global_id(0);
+ dst[i] = degrees(src[i]);
+}
diff --git a/kernels/compiler_global_constant.cl b/kernels/compiler_global_constant.cl
index 5e2e0b4..5db58d6 100644
--- a/kernels/compiler_global_constant.cl
+++ b/kernels/compiler_global_constant.cl
@@ -1,9 +1,10 @@
constant int m[3] = {71,72,73};
constant int n = 1;
+constant int o[3] = {1, 1, 1};
__kernel void
compiler_global_constant(__global int *dst, int e, int r)
{
int id = (int)get_global_id(0);
- dst[id] = m[id%3] * n + e + r;
+ dst[id] = m[id%3] * n * o[2] + e + r;
}
diff --git a/kernels/compiler_mad24.cl b/kernels/compiler_mad24.cl
new file mode 100644
index 0000000..04bb2c5
--- /dev/null
+++ b/kernels/compiler_mad24.cl
@@ -0,0 +1,4 @@
+kernel void compiler_mad24(global int *src1, global int *src2, global int *src3, global int *dst) {
+ int i = get_global_id(0);
+ dst[i] = mad24(src1[i], src2[i], src3[i]);
+}
diff --git a/kernels/compiler_mul24.cl b/kernels/compiler_mul24.cl
new file mode 100644
index 0000000..b69dda0
--- /dev/null
+++ b/kernels/compiler_mul24.cl
@@ -0,0 +1,4 @@
+kernel void compiler_mul24(global int *src1, global int *src2, global int *dst) {
+ int i = get_global_id(0);
+ dst[i] = mul24(src1[i], src2[i]);
+}
diff --git a/kernels/compiler_radians.cl b/kernels/compiler_radians.cl
new file mode 100644
index 0000000..1f79481
--- /dev/null
+++ b/kernels/compiler_radians.cl
@@ -0,0 +1,4 @@
+kernel void compiler_radians(global float *src, global float *dst) {
+ int i = get_global_id(0);
+ dst[i] = radians(src[i]);
+}
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index a535452..8fe863a 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -70,7 +70,7 @@ DECL_INFO_STRING(name, "Intel HD Graphics Family")
DECL_INFO_STRING(vendor, "Intel")
DECL_INFO_STRING(version, OCL_VERSION_STRING)
DECL_INFO_STRING(profile, "FULL_PROFILE")
-DECL_INFO_STRING(opencl_c_version, "OpenCL 1.10")
+DECL_INFO_STRING(opencl_c_version, "OpenCL C 1.1")
DECL_INFO_STRING(extensions, "")
DECL_INFO_STRING(built_in_kernels, "")
DECL_INFO_STRING(driver_version, LIBCL_VERSION_STRING)
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index fafacb5..621acad 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -30,6 +30,7 @@ set (utests_sources
compiler_copy_image.cpp
compiler_copy_image_3d.cpp
compiler_copy_buffer_row.cpp
+ compiler_degrees.cpp
compiler_step.cpp
compiler_fabs.cpp
compiler_abs.cpp
@@ -56,7 +57,10 @@ set (utests_sources
compiler_lower_return2.cpp
compiler_mad_hi.cpp
compiler_mul_hi.cpp
+ compiler_mad24.cpp
+ compiler_mul24.cpp
compiler_multiple_kernels.cpp
+ compiler_radians.cpp
compiler_rhadd.cpp
compiler_rotate.cpp
compiler_saturate.cpp
diff --git a/utests/compiler_degrees.cpp b/utests/compiler_degrees.cpp
new file mode 100644
index 0000000..7a17ca7
--- /dev/null
+++ b/utests/compiler_degrees.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_degrees(void)
+{
+ const int n = 32;
+ float src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_degrees");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; ++i) {
+ src[i] = ((float *)buf_data[0])[i] = rand() * 0.01f;
+ }
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < n; ++i) {
+ OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * (180 / 3.141592653589793F));
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_degrees);
diff --git a/utests/compiler_mad24.cpp b/utests/compiler_mad24.cpp
new file mode 100644
index 0000000..a3890a1
--- /dev/null
+++ b/utests/compiler_mad24.cpp
@@ -0,0 +1,41 @@
+#include "utest_helper.hpp"
+
+void compiler_mad24(void)
+{
+ const int n = 32;
+ int src1[n], src2[n], src3[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_mad24");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ for (int i = 0; i < n; ++i) {
+ src1[i] = ((int*)buf_data[0])[i] = rand();
+ src2[i] = ((int*)buf_data[1])[i] = rand();
+ src3[i] = ((int*)buf_data[2])[i] = rand();
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(3);
+ for (int i = 0; i < n; ++i)
+ OCL_ASSERT(((int*)buf_data[3])[i] == ((src1[i] << 8) >> 8) * ((src2[i] << 8) >> 8) + src3[i]);
+ OCL_UNMAP_BUFFER(3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mad24);
diff --git a/utests/compiler_mul24.cpp b/utests/compiler_mul24.cpp
new file mode 100644
index 0000000..8a36947
--- /dev/null
+++ b/utests/compiler_mul24.cpp
@@ -0,0 +1,36 @@
+#include "utest_helper.hpp"
+
+void compiler_mul24(void)
+{
+ const int n = 32;
+ int src1[n], src2[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_mul24");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < n; ++i) {
+ src1[i] = ((int*)buf_data[0])[i] = rand();
+ src2[i] = ((int*)buf_data[1])[i] = rand();
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(2);
+ for (int i = 0; i < n; ++i)
+ OCL_ASSERT(((int*)buf_data[2])[i] == ((src1[i] << 8) >> 8) * ((src2[i] << 8) >> 8));
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mul24);
diff --git a/utests/compiler_radians.cpp b/utests/compiler_radians.cpp
new file mode 100644
index 0000000..882477e
--- /dev/null
+++ b/utests/compiler_radians.cpp
@@ -0,0 +1,32 @@
+#include "utest_helper.hpp"
+
+void compiler_radians(void)
+{
+ const int n = 32;
+ float src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_radians");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; ++i) {
+ src[i] = ((float *)buf_data[0])[i] = rand() * 0.01f;
+ }
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(1);
+ for (int i = 0; i < n; ++i) {
+ OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * (3.141592653589793F / 180));
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_radians);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-devel
mailing list