[Pkg-opencl-devel] [beignet] 27/47: Imported Debian patch 0.2+git20130710+613e829-1
Andreas Beckmann
anbe at moszumanska.debian.org
Fri Oct 31 21:45:49 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch master
in repository beignet.
commit de1333ecf9f1986e0b803bd5679f183fbb844b8e
Author: Simon Richter <sjr at debian.org>
Date: Wed Jul 10 15:25:14 2013 +0200
Imported Debian patch 0.2+git20130710+613e829-1
---
debian/changelog | 6 +
...0001-support-built-in-function-smoothstep.patch | 39 +++
debian/patches/0002-test-function-smoothstep.patch | 105 ++++++++
.../0003-support-built-in-function-bitselect.patch | 51 ++++
.../0004-test-built-in-function-bitselect.patch | 97 +++++++
.../0005-add-built-in-function-mad_sat.patch | 78 ++++++
debian/patches/0006-test-function-mad_sat.patch | 91 +++++++
debian/patches/0007-built-in-function-sign.patch | 44 ++++
.../patches/0008-test-built-in-function-sign.patch | 92 +++++++
...ectorial-built-in-functions-min-max-clamp.patch | 68 +++++
.../0010-improve-clCreateContext-conformance.patch | 87 ++++++
...-clEnqueueMapBuffer-and-clCreateBuffer-AP.patch | 293 +++++++++++++++++++++
debian/patches/0012-support-clGetImageInfo.patch | 115 ++++++++
.../0013-Add-vector-argument-test-case.patch | 75 ++++++
...d-OpenCL-1.2-definitions-required-for-ICD.patch | 97 +++++++
debian/patches/series | 14 +-
16 files changed, 1351 insertions(+), 1 deletion(-)
diff --git a/debian/changelog b/debian/changelog
index 2a809ab..6e17b80 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+beignet (0.2+git20130710+613e829-1) experimental; urgency=low
+
+ * New upstream release
+
+ -- Simon Richter <sjr at debian.org> Wed, 10 Jul 2013 15:25:14 +0200
+
beignet (0.2-2) unstable; urgency=low
* Patch OpenCL C version
diff --git a/debian/patches/0001-support-built-in-function-smoothstep.patch b/debian/patches/0001-support-built-in-function-smoothstep.patch
new file mode 100644
index 0000000..86886a1
--- /dev/null
+++ b/debian/patches/0001-support-built-in-function-smoothstep.patch
@@ -0,0 +1,39 @@
+From 0b85fd00b45b03468f950beacb9c6658f7187c5f Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Tue, 9 Jul 2013 09:41:58 +0800
+Subject: [PATCH 01/14] support built-in function "smoothstep"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ backend/src/ocl_stdlib.h | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
+index 0c78c8e..ce03ed8 100644
+--- a/backend/src/ocl_stdlib.h
++++ b/backend/src/ocl_stdlib.h
+@@ -5072,6 +5072,11 @@ INLINE_OVERLOADABLE float4 radians(float4 r) { return (float4)(radians(r.s0), ra
+ INLINE_OVERLOADABLE float8 radians(float8 r) { return (float8)(radians(r.s0), radians(r.s1), radians(r.s2), radians(r.s3), radians(r.s4), radians(r.s5), radians(r.s6), radians(r.s7)); }
+ INLINE_OVERLOADABLE float16 radians(float16 r) { return (float16)(radians(r.s0), radians(r.s1), radians(r.s2), radians(r.s3), radians(r.s4), radians(r.s5), radians(r.s6), radians(r.s7), radians(r.s8), radians(r.s9), radians(r.sa), radians(r.sb), radians(r.sc), radians(r.sd), radians(r.se), radians(r.sf)); }
+
++INLINE_OVERLOADABLE float smoothstep(float e0, float e1, float x) {
++ x = clamp((x - e0) / (e1 - e0), 0.f, 1.f);
++ return x * x * (3 - 2 * x);
++}
++
+ INLINE_OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
+ INLINE_OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
+ INLINE_OVERLOADABLE float __gen_ocl_internal_maxmag(float x, float y) {
+@@ -5423,6 +5428,7 @@ DECL_VECTOR_2OP(rootn, float, int);
+ }
+ DECL_VECTOR_3OP(mad, float);
+ DECL_VECTOR_3OP(mix, float);
++DECL_VECTOR_3OP(smoothstep, float);
+ #undef DECL_VECTOR_3OP
+
+ // mix requires more variants
+--
+1.7.10.4
+
diff --git a/debian/patches/0002-test-function-smoothstep.patch b/debian/patches/0002-test-function-smoothstep.patch
new file mode 100644
index 0000000..9b68aaf
--- /dev/null
+++ b/debian/patches/0002-test-function-smoothstep.patch
@@ -0,0 +1,105 @@
+From 95927c71477fc34063a33d4511b9bba5bebd6971 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Tue, 9 Jul 2013 09:41:59 +0800
+Subject: [PATCH 02/14] test function "smoothstep"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ kernels/compiler_smoothstep.cl | 4 +++
+ utests/CMakeLists.txt | 1 +
+ utests/compiler_smoothstep.cpp | 58 ++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 63 insertions(+)
+ create mode 100644 kernels/compiler_smoothstep.cl
+ create mode 100644 utests/compiler_smoothstep.cpp
+
+diff --git a/kernels/compiler_smoothstep.cl b/kernels/compiler_smoothstep.cl
+new file mode 100644
+index 0000000..d3b7da4
+--- /dev/null
++++ b/kernels/compiler_smoothstep.cl
+@@ -0,0 +1,4 @@
++kernel void compiler_smoothstep(global float *src1, global float *src2, global float *src3, global float *dst) {
++ int i = get_global_id(0);
++ dst[i] = smoothstep(src1[i], src2[i], src3[i]);
++}
+diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
+index 621acad..26c0540 100644
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -67,6 +67,7 @@ set (utests_sources
+ compiler_saturate_sub.cpp
+ compiler_shift_right.cpp
+ compiler_short_scatter.cpp
++ compiler_smoothstep.cpp
+ compiler_uint2_copy.cpp
+ compiler_uint3_copy.cpp
+ compiler_uint8_copy.cpp
+diff --git a/utests/compiler_smoothstep.cpp b/utests/compiler_smoothstep.cpp
+new file mode 100644
+index 0000000..760063b
+--- /dev/null
++++ b/utests/compiler_smoothstep.cpp
+@@ -0,0 +1,58 @@
++#include <cmath>
++#include "utest_helper.hpp"
++
++float cpu(float e0, float e1, float x)
++{
++ x = (x - e0) / (e1 - e0);
++ if (x >= 1)
++ x = 1.f;
++ if (x <= 0)
++ x = 0.f;
++ return x * x * (3 - 2 * x);
++}
++
++void compiler_smoothstep(void)
++{
++ const int n = 32;
++ float src1[n], src2[n], src3[n];
++
++ // Setup kernel and buffers
++ OCL_CREATE_KERNEL("compiler_smoothstep");
++ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL);
++ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
++ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
++ globals[0] = n;
++ locals[0] = 16;
++
++ OCL_MAP_BUFFER(0);
++ OCL_MAP_BUFFER(1);
++ OCL_MAP_BUFFER(2);
++ for (int i = 0; i < n; ++i) {
++ float a = 0.1f * (rand() & 15) - 0.75f;
++ float b = a + 0.1f * (rand() & 15);
++ float c = 0.1f * (rand() & 15) - 0.75f;
++ src1[i] = ((float*)buf_data[0])[i] = a;
++ src2[i] = ((float*)buf_data[1])[i] = b;
++ src3[i] = ((float*)buf_data[2])[i] = c;
++ }
++ OCL_UNMAP_BUFFER(0);
++ OCL_UNMAP_BUFFER(1);
++ OCL_UNMAP_BUFFER(2);
++
++ OCL_NDRANGE(1);
++
++ OCL_MAP_BUFFER(3);
++ for (int i = 0; i < n; ++i) {
++ float a = ((float*)buf_data[3])[i];
++ float b = cpu(src1[i], src2[i], src3[i]);
++ OCL_ASSERT(fabsf(a - b) < 1e-4f);
++ }
++ OCL_UNMAP_BUFFER(3);
++}
++
++MAKE_UTEST_FROM_FUNCTION(compiler_smoothstep);
+--
+1.7.10.4
+
diff --git a/debian/patches/0003-support-built-in-function-bitselect.patch b/debian/patches/0003-support-built-in-function-bitselect.patch
new file mode 100644
index 0000000..1005039
--- /dev/null
+++ b/debian/patches/0003-support-built-in-function-bitselect.patch
@@ -0,0 +1,51 @@
+From 66d4d36b06eaaa7c49131f93617b433e2583b701 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 09:29:41 +0800
+Subject: [PATCH 03/14] support built-in function "bitselect"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ backend/src/ocl_stdlib.h | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
+index ce03ed8..ed96e5e 100644
+--- a/backend/src/ocl_stdlib.h
++++ b/backend/src/ocl_stdlib.h
+@@ -4290,6 +4290,31 @@ DEC(16);
+ #undef DEC4
+ #undef DEC8
+ #undef DEC16
++
++#define DEF(type) INLINE_OVERLOADABLE type bitselect(type a, type b, type c) { return (a & ~c) | (b & c); }
++DEF(char); DEF(uchar); DEF(short); DEF(ushort); DEF(int); DEF(uint)
++#undef DEF
++INLINE_OVERLOADABLE float bitselect(float a, float b, float c) {
++ return as_float(bitselect(as_int(a), as_int(b), as_int(c)));
++}
++#define DEC2(type) INLINE_OVERLOADABLE type##2 bitselect(type##2 a, type##2 b, type##2 c) { return (type##2)(bitselect(a.s0, b.s0, c.s0), bitselect(a.s1, b.s1, c.s1)); }
++#define DEC3(type) INLINE_OVERLOADABLE type##3 bitselect(type##3 a, type##3 b, type##3 c) { return (type##3)(bitselect(a.s0, b.s0, c.s0), bitselect(a.s1, b.s1, c.s1), bitselect(a.s2, b.s2, c.s2)); }
++#define DEC4(type) INLINE_OVERLOADABLE type##4 bitselect(type##4 a, type##4 b, type##4 c) { return (type##4)(bitselect(a.s0, b.s0, c.s0), bitselect(a.s1, b.s1, c.s1), bitselect(a.s2, b.s2, c.s2), bitselect(a.s3, b.s3, c.s3)); }
++#define DEC8(type) INLINE_OVERLOADABLE type##8 bitselect(type##8 a, type##8 b, type##8 c) { return (type##8)(bitselect(a.s0, b.s0, c.s0), bitselect(a.s1, b.s1, c.s1), bitselect(a.s2, b.s2, c.s2), bitselect(a.s3, b.s3, c.s3), bitselect(a.s4, b.s4, c.s4), bitselect(a.s5, b.s5, c.s5), bitselect(a.s6, b.s6, c.s6), bitselect(a.s7, b.s7, c.s7)); }
++#define DEC16(type) INLINE_OVERLOADABLE type##16 bitselect(type##16 a, type##16 b, type##16 c) { return (type##16)(bitselect(a.s0, b.s0, c.s0), bitselect(a.s1, b.s1, c.s1), bitselect(a.s2, b.s2, c.s2), bitselect(a.s3, b.s3, c.s3), bitselect(a.s4, b.s4, c.s4), bitselect(a.s5, b.s5, c.s5), bitselect(a.s6, b.s6, c.s6), bitselect(a.s7, b.s7, c.s7), bitselect(a.s8, b.s8, c.s8), bitselect(a.s9, b.s9, c.s9), bitselect(a.sa, b.sa, c.sa), bitselect(a.sb, b.sb, c.sb), bitselect(a.sc, b.sc, c.sc), [...]
++#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint); DEC##n(float)
++DEF(2)
++DEF(3)
++DEF(4)
++DEF(8)
++DEF(16)
++#undef DEF
++#undef DEC2
++#undef DEC3
++#undef DEC4
++#undef DEC8
++#undef DEC16
++
+ /////////////////////////////////////////////////////////////////////////////
+ // Integer built-in functions
+ /////////////////////////////////////////////////////////////////////////////
+--
+1.7.10.4
+
diff --git a/debian/patches/0004-test-built-in-function-bitselect.patch b/debian/patches/0004-test-built-in-function-bitselect.patch
new file mode 100644
index 0000000..3744bb3
--- /dev/null
+++ b/debian/patches/0004-test-built-in-function-bitselect.patch
@@ -0,0 +1,97 @@
+From bf5863c0f76601451637f5a029166a8af56a2d6a Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 09:29:42 +0800
+Subject: [PATCH 04/14] test built-in function "bitselect"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ kernels/builtin_bitselect.cl | 4 ++++
+ utests/CMakeLists.txt | 1 +
+ utests/builtin_bitselect.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 55 insertions(+)
+ create mode 100644 kernels/builtin_bitselect.cl
+ create mode 100644 utests/builtin_bitselect.cpp
+
+diff --git a/kernels/builtin_bitselect.cl b/kernels/builtin_bitselect.cl
+new file mode 100644
+index 0000000..9b60cbe
+--- /dev/null
++++ b/kernels/builtin_bitselect.cl
+@@ -0,0 +1,4 @@
++kernel void builtin_bitselect(global float *src1, global float *src2, global float *src3, global float *dst) {
++ int i = get_global_id(0);
++ dst[i] = bitselect(src1[i], src2[i], src3[i]);
++}
+diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
+index 26c0540..2e18b2c 100644
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -100,6 +100,7 @@ set (utests_sources
+ compiler_vector_load_store.cpp
+ compiler_cl_finish.cpp
+ get_cl_info.cpp
++ builtin_bitselect.cpp
+ buildin_work_dim.cpp
+ builtin_global_size.cpp
+ runtime_createcontext.cpp
+diff --git a/utests/builtin_bitselect.cpp b/utests/builtin_bitselect.cpp
+new file mode 100644
+index 0000000..37fb8df
+--- /dev/null
++++ b/utests/builtin_bitselect.cpp
+@@ -0,0 +1,50 @@
++#include "utest_helper.hpp"
++
++int as_int(float f) {
++ void *p = &f;
++ return *(int *)p;
++}
++
++int cpu(int a, int b, int c) {
++ return (a & ~c) | (b & c);
++}
++
++void builtin_bitselect(void)
++{
++ const int n = 32;
++ float src1[n], src2[n], src3[n];
++
++ // Setup kernel and buffers
++ OCL_CREATE_KERNEL("builtin_bitselect");
++ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL);
++ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
++ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
++ globals[0] = n;
++ locals[0] = 16;
++
++ OCL_MAP_BUFFER(0);
++ OCL_MAP_BUFFER(1);
++ OCL_MAP_BUFFER(2);
++ for (int i = 0; i < n; ++i) {
++ src1[i] = ((float*)buf_data[0])[i] = rand() * 0.1f;
++ src2[i] = ((float*)buf_data[1])[i] = rand() * 0.1f;
++ src3[i] = ((float*)buf_data[2])[i] = rand() * 0.1f;
++ }
++ OCL_UNMAP_BUFFER(0);
++ OCL_UNMAP_BUFFER(1);
++ OCL_UNMAP_BUFFER(2);
++
++ OCL_NDRANGE(1);
++
++ OCL_MAP_BUFFER(3);
++ for (int i = 0; i < n; ++i)
++ OCL_ASSERT(((int*)buf_data[3])[i] == cpu(as_int(src1[i]), as_int(src2[i]), as_int(src3[i])));
++ OCL_UNMAP_BUFFER(3);
++}
++
++MAKE_UTEST_FROM_FUNCTION(builtin_bitselect);
+--
+1.7.10.4
+
diff --git a/debian/patches/0005-add-built-in-function-mad_sat.patch b/debian/patches/0005-add-built-in-function-mad_sat.patch
new file mode 100644
index 0000000..19e111e
--- /dev/null
+++ b/debian/patches/0005-add-built-in-function-mad_sat.patch
@@ -0,0 +1,78 @@
+From 5c3d525d759789fbc130e408a03ff7d9d1b6b213 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 10:09:39 +0800
+Subject: [PATCH 05/14] add built-in function "mad_sat"
+To: beignet at lists.freedesktop.org
+
+currently only "char, uchar, short, ushort" types of arguments are supported
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ backend/src/ocl_stdlib.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 50 insertions(+)
+
+diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
+index ed96e5e..d1f963a 100644
+--- a/backend/src/ocl_stdlib.h
++++ b/backend/src/ocl_stdlib.h
+@@ -4473,6 +4473,56 @@ DEF(16)
+ #undef DEC8
+ #undef DEC16
+
++INLINE_OVERLOADABLE char mad_sat(char a, char b, char c) {
++ int x = (int)a * (int)b + (int)c;
++ if (x > 127)
++ x = 127;
++ if (x < -128)
++ x = -128;
++ return x;
++}
++
++INLINE_OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c) {
++ uint x = (uint)a * (uint)b + (uint)c;
++ if (x > 255)
++ x = 255;
++ return x;
++}
++
++INLINE_OVERLOADABLE short mad_sat(short a, short b, short c) {
++ int x = (int)a * (int)b + (int)c;
++ if (x > 32767)
++ x = 32767;
++ if (x < -32768)
++ x = -32768;
++ return x;
++}
++
++INLINE_OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c) {
++ uint x = (uint)a * (uint)b + (uint)c;
++ if (x > 65535)
++ x = 65535;
++ return x;
++}
++
++#define DEC2(type) INLINE_OVERLOADABLE type##2 mad_sat(type##2 a, type##2 b, type##2 c) { return (type##2)(mad_sat(a.s0, b.s0, c.s0), mad_sat(a.s1, b.s1, c.s1)); }
++#define DEC3(type) INLINE_OVERLOADABLE type##3 mad_sat(type##3 a, type##3 b, type##3 c) { return (type##3)(mad_sat(a.s0, b.s0, c.s0), mad_sat(a.s1, b.s1, c.s1), mad_sat(a.s2, b.s2, c.s2)); }
++#define DEC4(type) INLINE_OVERLOADABLE type##4 mad_sat(type##4 a, type##4 b, type##4 c) { return (type##4)(mad_sat(a.s0, b.s0, c.s0), mad_sat(a.s1, b.s1, c.s1), mad_sat(a.s2, b.s2, c.s2), mad_sat(a.s3, b.s3, c.s3)); }
++#define DEC8(type) INLINE_OVERLOADABLE type##8 mad_sat(type##8 a, type##8 b, type##8 c) { return (type##8)(mad_sat(a.s0, b.s0, c.s0), mad_sat(a.s1, b.s1, c.s1), mad_sat(a.s2, b.s2, c.s2), mad_sat(a.s3, b.s3, c.s3), mad_sat(a.s4, b.s4, c.s4), mad_sat(a.s5, b.s5, c.s5), mad_sat(a.s6, b.s6, c.s6), mad_sat(a.s7, b.s7, c.s7)); }
++#define DEC16(type) INLINE_OVERLOADABLE type##16 mad_sat(type##16 a, type##16 b, type##16 c) { return (type##16)(mad_sat(a.s0, b.s0, c.s0), mad_sat(a.s1, b.s1, c.s1), mad_sat(a.s2, b.s2, c.s2), mad_sat(a.s3, b.s3, c.s3), mad_sat(a.s4, b.s4, c.s4), mad_sat(a.s5, b.s5, c.s5), mad_sat(a.s6, b.s6, c.s6), mad_sat(a.s7, b.s7, c.s7), mad_sat(a.s8, b.s8, c.s8), mad_sat(a.s9, b.s9, c.s9), mad_sat(a.sa, b.sa, c.sa), mad_sat(a.sb, b.sb, c.sb), mad_sat(a.sc, b.sc, c.sc), mad_sat(a.sd, b.sd, c.sd), [...]
++#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort)
++DEF(2)
++DEF(3)
++DEF(4)
++DEF(8)
++DEF(16)
++#undef DEF
++#undef DEC2
++#undef DEC3
++#undef DEC4
++#undef DEC8
++#undef DEC16
++
+ INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
+ INLINE_OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); }
+ INLINE_OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); }
+--
+1.7.10.4
+
diff --git a/debian/patches/0006-test-function-mad_sat.patch b/debian/patches/0006-test-function-mad_sat.patch
new file mode 100644
index 0000000..80e967e
--- /dev/null
+++ b/debian/patches/0006-test-function-mad_sat.patch
@@ -0,0 +1,91 @@
+From 8d57036849621ac975141b9b67bcb7c1900c7ed1 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 10:09:40 +0800
+Subject: [PATCH 06/14] test function "mad_sat"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ kernels/builtin_mad_sat.cl | 4 ++++
+ utests/CMakeLists.txt | 1 +
+ utests/builtin_mad_sat.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 49 insertions(+)
+ create mode 100644 kernels/builtin_mad_sat.cl
+ create mode 100644 utests/builtin_mad_sat.cpp
+
+diff --git a/kernels/builtin_mad_sat.cl b/kernels/builtin_mad_sat.cl
+new file mode 100644
+index 0000000..1739a4d
+--- /dev/null
++++ b/kernels/builtin_mad_sat.cl
+@@ -0,0 +1,4 @@
++kernel void builtin_mad_sat(global short *src1, global short *src2, global short *src3, global short *dst) {
++ short i = get_global_id(0);
++ dst[i] = mad_sat(src1[i], src2[i], src3[i]);
++}
+diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
+index 2e18b2c..1cdbd24 100644
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -101,6 +101,7 @@ set (utests_sources
+ compiler_cl_finish.cpp
+ get_cl_info.cpp
+ builtin_bitselect.cpp
++ builtin_mad_sat.cpp
+ buildin_work_dim.cpp
+ builtin_global_size.cpp
+ runtime_createcontext.cpp
+diff --git a/utests/builtin_mad_sat.cpp b/utests/builtin_mad_sat.cpp
+new file mode 100644
+index 0000000..ed9a558
+--- /dev/null
++++ b/utests/builtin_mad_sat.cpp
+@@ -0,0 +1,44 @@
++#include "utest_helper.hpp"
++
++void builtin_mad_sat(void)
++{
++ const int n = 32;
++ short src1[n], src2[n], src3[n];
++srand(0);
++ // Setup kernel and buffers
++ OCL_CREATE_KERNEL("builtin_mad_sat");
++ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(short), NULL);
++ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
++ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(short), NULL);
++ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(short), NULL);
++ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
++ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
++ globals[0] = n;
++ locals[0] = 16;
++
++ OCL_MAP_BUFFER(0);
++ OCL_MAP_BUFFER(1);
++ OCL_MAP_BUFFER(2);
++ for (int i = 0; i < n; ++i) {
++ src1[i] = ((short*)buf_data[0])[i] = rand();
++ src2[i] = ((short*)buf_data[1])[i] = rand();
++ src3[i] = ((short*)buf_data[2])[i] = rand();
++ }
++ OCL_UNMAP_BUFFER(0);
++ OCL_UNMAP_BUFFER(1);
++ OCL_UNMAP_BUFFER(2);
++
++ OCL_NDRANGE(1);
++
++ OCL_MAP_BUFFER(3);
++ for (int i = 0; i < n; ++i) {
++ int a = (int)src1[i] * (int)src2[i] + (int)src3[i];
++ a = a > 0x7FFF ? 0x7FFF : (a < -0x8000 ? -0x8000 : a);
++ OCL_ASSERT(((short*)buf_data[3])[i] == (short)a);
++ }
++ OCL_UNMAP_BUFFER(3);
++}
++
++MAKE_UTEST_FROM_FUNCTION(builtin_mad_sat);
+--
+1.7.10.4
+
diff --git a/debian/patches/0007-built-in-function-sign.patch b/debian/patches/0007-built-in-function-sign.patch
new file mode 100644
index 0000000..598d06d
--- /dev/null
+++ b/debian/patches/0007-built-in-function-sign.patch
@@ -0,0 +1,44 @@
+From c093b0644719ba8be67cf009a4e0adeb5daba8aa Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 12:42:02 +0800
+Subject: [PATCH 07/14] built-in function "sign"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ backend/src/ocl_stdlib.h | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
+index d1f963a..c806b08 100644
+--- a/backend/src/ocl_stdlib.h
++++ b/backend/src/ocl_stdlib.h
+@@ -5152,6 +5152,16 @@ INLINE_OVERLOADABLE float smoothstep(float e0, float e1, float x) {
+ return x * x * (3 - 2 * x);
+ }
+
++INLINE_OVERLOADABLE float sign(float x) {
++ if(x > 0)
++ return 1;
++ if(x < 0)
++ return -1;
++ if(x == -0.f)
++ return -0.f;
++ return 0.f;
++}
++
+ INLINE_OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
+ INLINE_OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
+ INLINE_OVERLOADABLE float __gen_ocl_internal_maxmag(float x, float y) {
+@@ -5357,6 +5367,7 @@ DECL_UNTYPED_RW_ALL(float)
+ dst.s89abcdef = NAME(v.s89abcdef);\
+ return dst;\
+ }
++DECL_VECTOR_1OP(sign, float);
+ DECL_VECTOR_1OP(native_cos, float);
+ DECL_VECTOR_1OP(__gen_ocl_internal_cospi, float);
+ DECL_VECTOR_1OP(__gen_ocl_internal_cosh, float);
+--
+1.7.10.4
+
diff --git a/debian/patches/0008-test-built-in-function-sign.patch b/debian/patches/0008-test-built-in-function-sign.patch
new file mode 100644
index 0000000..e9cd7d1
--- /dev/null
+++ b/debian/patches/0008-test-built-in-function-sign.patch
@@ -0,0 +1,92 @@
+From 93484fc1236af3323535914f6794fbc77844f8fe Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 12:42:03 +0800
+Subject: [PATCH 08/14] test built-in function "sign"
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ kernels/builtin_sign.cl | 4 ++++
+ utests/CMakeLists.txt | 1 +
+ utests/builtin_sign.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 50 insertions(+)
+ create mode 100644 kernels/builtin_sign.cl
+ create mode 100644 utests/builtin_sign.cpp
+
+diff --git a/kernels/builtin_sign.cl b/kernels/builtin_sign.cl
+new file mode 100644
+index 0000000..ff9a66b
+--- /dev/null
++++ b/kernels/builtin_sign.cl
+@@ -0,0 +1,4 @@
++kernel void builtin_sign(global float *src, global float *dst) {
++ int i = get_global_id(0);
++ dst[i] = sign(src[i]);
++}
+diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
+index 1cdbd24..14ff6b1 100644
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -102,6 +102,7 @@ set (utests_sources
+ get_cl_info.cpp
+ builtin_bitselect.cpp
+ builtin_mad_sat.cpp
++ builtin_sign.cpp
+ buildin_work_dim.cpp
+ builtin_global_size.cpp
+ runtime_createcontext.cpp
+diff --git a/utests/builtin_sign.cpp b/utests/builtin_sign.cpp
+new file mode 100644
+index 0000000..7014790
+--- /dev/null
++++ b/utests/builtin_sign.cpp
+@@ -0,0 +1,45 @@
++#include <cmath>
++#include "utest_helper.hpp"
++
++void builtin_sign(void)
++{
++ const int n = 32;
++ float src[n];
++
++ // Setup kernel and buffers
++ OCL_CREATE_KERNEL("builtin_sign");
++ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
++ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
++ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++ globals[0] = n;
++ locals[0] = 16;
++
++ OCL_MAP_BUFFER(0);
++ src[0] = nanf("");
++ src[1] = 0.f;
++ src[2] = -0.f;
++ for (int i = 3; i < n; ++i) {
++ src[i] = ((float*)buf_data[0])[i] = (rand() & 15) * 0.1 - 0.75;
++ }
++ OCL_UNMAP_BUFFER(0);
++
++ OCL_NDRANGE(1);
++
++ OCL_MAP_BUFFER(1);
++ float *dst = (float*)buf_data[1];
++ OCL_ASSERT(dst[0] == 0);
++ OCL_ASSERT(dst[1] == 0.f);
++ OCL_ASSERT(dst[2] == -0.f);
++ for (int i = 3; i < n; ++i) {
++ if (src[i] == 0.f)
++ OCL_ASSERT(dst[i] == 0.f);
++ else if (src[i] == -0.f)
++ OCL_ASSERT(dst[i] == -0.f);
++ else
++ OCL_ASSERT(dst[i] == (src[i] > 0 ? 1 : -1));
++ }
++ OCL_UNMAP_BUFFER(1);
++}
++
++MAKE_UTEST_FROM_FUNCTION(builtin_sign);
+--
+1.7.10.4
+
diff --git a/debian/patches/0009-fix-vectorial-built-in-functions-min-max-clamp.patch b/debian/patches/0009-fix-vectorial-built-in-functions-min-max-clamp.patch
new file mode 100644
index 0000000..24199f0
--- /dev/null
+++ b/debian/patches/0009-fix-vectorial-built-in-functions-min-max-clamp.patch
@@ -0,0 +1,68 @@
+From f3d775a8716307f32fb9c12cfa7a3e6404537f22 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 12:38:58 +0800
+Subject: [PATCH 09/14] fix vectorial built-in functions "min, max, clamp"
+To: beignet at lists.freedesktop.org
+
+vectorial versions of "min,max,clamp" was missing.
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ backend/src/ocl_stdlib.h | 40 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 40 insertions(+)
+
+diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
+index c806b08..fbdc703 100644
+--- a/backend/src/ocl_stdlib.h
++++ b/backend/src/ocl_stdlib.h
+@@ -5133,6 +5133,46 @@ DECL_MIN_MAX_CLAMP(uint)
+ DECL_MIN_MAX_CLAMP(unsigned short)
+ DECL_MIN_MAX_CLAMP(unsigned char)
+ #undef DECL_MIN_MAX_CLAMP
++#define DEC2(func, type) INLINE_OVERLOADABLE type##2 func(type##2 a, type##2 b) { return (type##2)(func(a.s0, b.s0), func(a.s1, b.s1)); }
++#define DEC3(func, type) INLINE_OVERLOADABLE type##3 func(type##3 a, type##3 b) { return (type##3)(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2)); }
++#define DEC4(func, type) INLINE_OVERLOADABLE type##4 func(type##4 a, type##4 b) { return (type##4)(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3)); }
++#define DEC8(func, type) INLINE_OVERLOADABLE type##8 func(type##8 a, type##8 b) { return (type##8)(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7)); }
++#define DEC16(func, type) INLINE_OVERLOADABLE type##16 func(type##16 a, type##16 b) { return (type##16)(func(a.s0, b.s0), func(a.s1, b.s1), func(a.s2, b.s2), func(a.s3, b.s3), func(a.s4, b.s4), func(a.s5, b.s5), func(a.s6, b.s6), func(a.s7, b.s7), func(a.s8, b.s8), func(a.s9, b.s9), func(a.sa, b.sa), func(a.sb, b.sb), func(a.sc, b.sc), func(a.sd, b.sd), func(a.se, b.se), func(a.sf, b.sf)); }
++#define DEF(func, n) DEC##n(func, char); DEC##n(func, uchar); DEC##n(func, short); DEC##n(func, ushort); DEC##n(func, int); DEC##n(func, uint)
++DEF(max, 2)
++DEF(max, 3)
++DEF(max, 4)
++DEF(max, 8)
++DEF(max, 16)
++DEF(min, 2)
++DEF(min, 3)
++DEF(min, 4)
++DEF(min, 8)
++DEF(min, 16)
++#undef DEF
++#undef DEC2
++#undef DEC3
++#undef DEC4
++#undef DEC8
++#undef DEC16
++
++#define DEC2(type) INLINE_OVERLOADABLE type##2 clamp(type##2 a, type##2 b, type##2 c) { return (type##2)(clamp(a.s0, b.s0, c.s0), clamp(a.s1, b.s1, c.s1)); }
++#define DEC3(type) INLINE_OVERLOADABLE type##3 clamp(type##3 a, type##3 b, type##3 c) { return (type##3)(clamp(a.s0, b.s0, c.s0), clamp(a.s1, b.s1, c.s1), clamp(a.s2, b.s2, c.s2)); }
++#define DEC4(type) INLINE_OVERLOADABLE type##4 clamp(type##4 a, type##4 b, type##4 c) { return (type##4)(clamp(a.s0, b.s0, c.s0), clamp(a.s1, b.s1, c.s1), clamp(a.s2, b.s2, c.s2), clamp(a.s3, b.s3, c.s3)); }
++#define DEC8(type) INLINE_OVERLOADABLE type##8 clamp(type##8 a, type##8 b, type##8 c) { return (type##8)(clamp(a.s0, b.s0, c.s0), clamp(a.s1, b.s1, c.s1), clamp(a.s2, b.s2, c.s2), clamp(a.s3, b.s3, c.s3), clamp(a.s4, b.s4, c.s4), clamp(a.s5, b.s5, c.s5), clamp(a.s6, b.s6, c.s6), clamp(a.s7, b.s7, c.s7)); }
++#define DEC16(type) INLINE_OVERLOADABLE type##16 clamp(type##16 a, type##16 b, type##16 c) { return (type##16)(clamp(a.s0, b.s0, c.s0), clamp(a.s1, b.s1, c.s1), clamp(a.s2, b.s2, c.s2), clamp(a.s3, b.s3, c.s3), clamp(a.s4, b.s4, c.s4), clamp(a.s5, b.s5, c.s5), clamp(a.s6, b.s6, c.s6), clamp(a.s7, b.s7, c.s7), clamp(a.s8, b.s8, c.s8), clamp(a.s9, b.s9, c.s9), clamp(a.sa, b.sa, c.sa), clamp(a.sb, b.sb, c.sb), clamp(a.sc, b.sc, c.sc), clamp(a.sd, b.sd, c.sd), clamp(a.se, b.se, c.se), clamp [...]
++#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint); DEC##n(float)
++DEF(2)
++DEF(3)
++DEF(4)
++DEF(8)
++DEF(16)
++#undef DEF
++#undef DEC2
++#undef DEC3
++#undef DEC4
++#undef DEC8
++#undef DEC16
+
+ INLINE_OVERLOADABLE float degrees(float radians) { return (180 / M_PI_F) * radians; }
+ INLINE_OVERLOADABLE float2 degrees(float2 r) { return (float2)(degrees(r.s0), degrees(r.s1)); }
+--
+1.7.10.4
+
diff --git a/debian/patches/0010-improve-clCreateContext-conformance.patch b/debian/patches/0010-improve-clCreateContext-conformance.patch
new file mode 100644
index 0000000..5f7469a
--- /dev/null
+++ b/debian/patches/0010-improve-clCreateContext-conformance.patch
@@ -0,0 +1,87 @@
+From f65dba3126bdf735c74820e04f75d91b4000264f Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 13:22:01 +0800
+Subject: [PATCH 10/14] improve clCreateContext conformance
+To: beignet at lists.freedesktop.org
+
+OpenCL specification says "clCreateContext" function returns
+CL_INVALID_PROPERTY if the same property name appears more than once.
+
+But "clCreateContext" did not follow that.
+
+This patch uses a local temp integer to track whether a property name has
+appeared. If so, returns CL_INVALID_PROPERTY.
+
+This patch makes Piglit test case "clCreateContext" pass.
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ src/cl_context.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/src/cl_context.c b/src/cl_context.c
+index 338706b..a48436c 100644
+--- a/src/cl_context.c
++++ b/src/cl_context.c
+@@ -36,10 +36,22 @@
+ #include <assert.h>
+ #include <string.h>
+
++#define CHECK(var) \
++ if (var) \
++ return CL_INVALID_PROPERTY; \
++ else \
++ var = 1;
++
+ static cl_int
+ cl_context_properties_process(const cl_context_properties *prop,
+ struct _cl_context_prop *cl_props, cl_uint * prop_len)
+ {
++ int set_cl_context_platform = 0,
++ set_cl_gl_context_khr = 0,
++ set_cl_egl_display_khr = 0,
++ set_cl_glx_display_khr = 0,
++ set_cl_wgl_hdc_khr = 0,
++ set_cl_cgl_sharegroup_khr = 0;
+ cl_int err = CL_SUCCESS;
+
+ cl_props->gl_type = CL_GL_NOSHARE;
+@@ -52,6 +64,7 @@ cl_context_properties_process(const cl_context_properties *prop,
+ while(*prop) {
+ switch (*prop) {
+ case CL_CONTEXT_PLATFORM:
++ CHECK (set_cl_context_platform);
+ cl_props->platform_id = *(prop + 1);
+ if (UNLIKELY((cl_platform_id) cl_props->platform_id != intel_platform)) {
+ err = CL_INVALID_PLATFORM;
+@@ -59,21 +72,26 @@ cl_context_properties_process(const cl_context_properties *prop,
+ }
+ break;
+ case CL_GL_CONTEXT_KHR:
++ CHECK (set_cl_gl_context_khr);
+ cl_props->gl_context = *(prop + 1);
+ break;
+ case CL_EGL_DISPLAY_KHR:
++ CHECK (set_cl_egl_display_khr);
+ cl_props->gl_type = CL_GL_EGL_DISPLAY;
+ cl_props->egl_display = *(prop + 1);
+ break;
+ case CL_GLX_DISPLAY_KHR:
++ CHECK (set_cl_glx_display_khr);
+ cl_props->gl_type = CL_GL_GLX_DISPLAY;
+ cl_props->glx_display = *(prop + 1);
+ break;
+ case CL_WGL_HDC_KHR:
++ CHECK (set_cl_wgl_hdc_khr);
+ cl_props->gl_type = CL_GL_WGL_HDC;
+ cl_props->wgl_hdc = *(prop + 1);
+ break;
+ case CL_CGL_SHAREGROUP_KHR:
++ CHECK (set_cl_cgl_sharegroup_khr);
+ cl_props->gl_type = CL_GL_CGL_SHAREGROUP;
+ cl_props->cgl_sharegroup = *(prop + 1);
+ break;
+--
+1.7.10.4
+
diff --git a/debian/patches/0011-Improve-the-clEnqueueMapBuffer-and-clCreateBuffer-AP.patch b/debian/patches/0011-Improve-the-clEnqueueMapBuffer-and-clCreateBuffer-AP.patch
new file mode 100644
index 0000000..f198ebd
--- /dev/null
+++ b/debian/patches/0011-Improve-the-clEnqueueMapBuffer-and-clCreateBuffer-AP.patch
@@ -0,0 +1,293 @@
+From 291dcddec786533226185f9fcd10fada1dc10fa0 Mon Sep 17 00:00:00 2001
+From: Junyan He <junyan.he at linux.intel.com>
+Date: Wed, 10 Jul 2013 15:50:44 +0800
+Subject: [PATCH 11/14] Improve the clEnqueueMapBuffer and clCreateBuffer API
+To: beignet at lists.freedesktop.org
+
+In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
+CL_MEM_USE_HOST_PTR flag support.
+CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
+CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
+1> The host_ptr specified in clCreateBuffer is guaranteed to
+contain the latest bits in the region being mapped when the
+clEnqueueMapBuffer command has completed.
+2> The pointer value returned by clEnqueueMapBuffer will be
+derived from the host_ptr specified when the buffer object is created.
+
+We improve the clEnqueueMapBuffer to setup a map for the mapped
+address and do the data sync problem based on the address when
+mapped and unmapped.
+
+Signed-off-by: Junyan He <junyan.he at linux.intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ src/cl_api.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
+ src/cl_mem.c | 43 ++++++++++++++++----
+ src/cl_mem.h | 10 +++++
+ 3 files changed, 169 insertions(+), 9 deletions(-)
+
+diff --git a/src/cl_api.c b/src/cl_api.c
+index dc52f0a..115c07a 100644
+--- a/src/cl_api.c
++++ b/src/cl_api.c
+@@ -1467,7 +1467,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
+ cl_int * errcode_ret)
+ {
+ void *ptr = NULL;
++ void *mem_ptr = NULL;
+ cl_int err = CL_SUCCESS;
++ int slot = -1;
+
+ CHECK_QUEUE(command_queue);
+ CHECK_MEM(buffer);
+@@ -1500,10 +1502,66 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
+
+ ptr = (char*)ptr + offset;
+
++ if(buffer->flags & CL_MEM_USE_HOST_PTR) {
++ assert(buffer->host_ptr);
++ memcpy(buffer->host_ptr + offset, ptr, size);
++ mem_ptr = buffer->host_ptr + offset;
++ } else {
++ mem_ptr = ptr;
++ }
++
++ /* Record the mapped address. */
++ if (!buffer->mapped_ptr_sz) {
++ buffer->mapped_ptr_sz = 16;
++ buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
++ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
++ if (!buffer->mapped_ptr) {
++ cl_mem_unmap_auto (buffer);
++ err = CL_OUT_OF_HOST_MEMORY;
++ ptr = NULL;
++ goto error;
++ }
++
++ memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
++ slot = 0;
++ } else {
++ int i = 0;
++ for (; i < buffer->mapped_ptr_sz; i++) {
++ if (buffer->mapped_ptr[i].ptr == NULL) {
++ slot = i;
++ break;
++ }
++ }
++
++ if (i == buffer->mapped_ptr_sz) {
++ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
++ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
++ if (!new_ptr) {
++ cl_mem_unmap_auto (buffer);
++ err = CL_OUT_OF_HOST_MEMORY;
++ ptr = NULL;
++ goto error;
++ }
++ memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
++ memcpy(new_ptr, buffer->mapped_ptr,
++ buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
++ slot = buffer->mapped_ptr_sz;
++ buffer->mapped_ptr_sz *= 2;
++ free(buffer->mapped_ptr);
++ buffer->mapped_ptr = new_ptr;
++ }
++ }
++
++ assert(slot != -1);
++ buffer->mapped_ptr[slot].ptr = mem_ptr;
++ buffer->mapped_ptr[slot].v_ptr = ptr;
++ buffer->mapped_ptr[slot].size = size;
++ buffer->map_ref++;
++
+ error:
+ if (errcode_ret)
+ *errcode_ret = err;
+- return ptr;
++ return mem_ptr;
+ }
+
+ void *
+@@ -1578,7 +1636,70 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
+ const cl_event * event_wait_list,
+ cl_event * event)
+ {
+- return cl_mem_unmap_auto(memobj);
++ cl_int err = CL_SUCCESS;
++ int i;
++ size_t mapped_size = 0;
++ void * v_ptr = NULL;
++
++ CHECK_QUEUE(command_queue);
++ CHECK_MEM(memobj);
++ if (command_queue->ctx != memobj->ctx) {
++ err = CL_INVALID_CONTEXT;
++ goto error;
++ }
++
++ assert(memobj->mapped_ptr_sz >= memobj->map_ref);
++ INVALID_VALUE_IF(!mapped_ptr);
++ for (i = 0; i < memobj->mapped_ptr_sz; i++) {
++ if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
++ memobj->mapped_ptr[i].ptr = NULL;
++ mapped_size = memobj->mapped_ptr[i].size;
++ v_ptr = memobj->mapped_ptr[i].v_ptr;
++ memobj->mapped_ptr[i].size = 0;
++ memobj->mapped_ptr[i].v_ptr = NULL;
++ memobj->map_ref--;
++ break;
++ }
++ }
++ /* can not find a mapped address? */
++ INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
++
++ if (memobj->flags & CL_MEM_USE_HOST_PTR) {
++ assert(mapped_ptr >= memobj->host_ptr &&
++ mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
++ /* Sync the data. */
++ memcpy(v_ptr, mapped_ptr, mapped_size);
++ } else {
++ assert(v_ptr == mapped_ptr);
++ }
++
++ cl_mem_unmap_auto(memobj);
++
++ /* shrink the mapped slot. */
++ if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
++ int j = 0;
++ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
++ sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
++ if (!new_ptr) {
++ /* Just do nothing. */
++ goto error;
++ }
++ memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
++
++ for (i = 0; i < memobj->mapped_ptr_sz; i++) {
++ if (memobj->mapped_ptr[i].ptr) {
++ new_ptr[j] = memobj->mapped_ptr[i];
++ j++;
++ assert(j < memobj->mapped_ptr_sz/2);
++ }
++ }
++ memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
++ free(memobj->mapped_ptr);
++ memobj->mapped_ptr = new_ptr;
++ }
++
++error:
++ return err;
+ }
+
+ cl_int
+diff --git a/src/cl_mem.c b/src/cl_mem.c
+index 5465aa9..ce70305 100644
+--- a/src/cl_mem.c
++++ b/src/cl_mem.c
+@@ -106,10 +106,6 @@ cl_mem_allocate(cl_context ctx,
+ cl_ulong max_mem_size;
+
+ assert(ctx);
+- FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
+- "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
+- FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
+- "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */
+
+ if ((err = cl_get_device_info(ctx->device,
+ CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+@@ -172,11 +168,35 @@ cl_mem_new(cl_context ctx,
+ void *data,
+ cl_int *errcode_ret)
+ {
++ /* Possible mem type combination:
++ CL_MEM_ALLOC_HOST_PTR
++ CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
++ CL_MEM_USE_HOST_PTR
++ CL_MEM_COPY_HOST_PTR */
++
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+
+- /* Check flags consistency */
+- if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
++ /* This flag is valid only if host_ptr is not NULL */
++ if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
++ flags & CL_MEM_USE_HOST_PTR) &&
++ data == NULL)) {
++ err = CL_INVALID_HOST_PTR;
++ goto error;
++ }
++
++ /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
++ are mutually exclusive. */
++ if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
++ flags & CL_MEM_USE_HOST_PTR)) {
++ err = CL_INVALID_HOST_PTR;
++ goto error;
++ }
++
++ /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
++ are mutually exclusive. */
++ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
++ flags & CL_MEM_USE_HOST_PTR)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+@@ -187,9 +207,12 @@ cl_mem_new(cl_context ctx,
+ goto error;
+
+ /* Copy the data if required */
+- if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
++ if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
+ cl_buffer_subdata(mem->bo, 0, sz, data);
+
++ if (flags & CL_MEM_USE_HOST_PTR)
++ mem->host_ptr = data;
++
+ exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+@@ -418,6 +441,12 @@ cl_mem_delete(cl_mem mem)
+ pthread_mutex_unlock(&mem->ctx->buffer_lock);
+ cl_context_delete(mem->ctx);
+
++ /* Someone still mapped? */
++ assert(mem->map_ref);
++
++ if (mem->mapped_ptr)
++ free(mem->mapped_ptr);
++
+ cl_free(mem);
+ }
+
+diff --git a/src/cl_mem.h b/src/cl_mem.h
+index c204992..6d98698 100644
+--- a/src/cl_mem.h
++++ b/src/cl_mem.h
+@@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
+ CL_TILE_Y = 2
+ } cl_image_tiling_t;
+
++typedef struct _cl_mapped_ptr {
++ void * ptr;
++ void * v_ptr;
++ size_t size;
++}cl_mapped_ptr;
++
+ /* Used for buffers and images */
+ struct _cl_mem {
+ DEFINE_ICD(dispatch)
+@@ -68,6 +74,10 @@ struct _cl_mem {
+ uint32_t intel_fmt; /* format to provide in the surface state */
+ uint32_t bpp; /* number of bytes per pixel */
+ cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
++ void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
++ cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
++ int mapped_ptr_sz; /* The array size of mapped_ptr. */
++ int map_ref; /* The mapped count. */
+ };
+
+ /* Query information about a memory object */
+--
+1.7.10.4
+
diff --git a/debian/patches/0012-support-clGetImageInfo.patch b/debian/patches/0012-support-clGetImageInfo.patch
new file mode 100644
index 0000000..d20c7a0
--- /dev/null
+++ b/debian/patches/0012-support-clGetImageInfo.patch
@@ -0,0 +1,115 @@
+From 7ad035f67bfc65e53cd1d67cfec0ca958f44f497 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 10 Jul 2013 14:20:57 +0800
+Subject: [PATCH 12/14] support clGetImageInfo
+To: beignet at lists.freedesktop.org
+
+clGetImageInfo() is an OpenCL API. It returns information of an image.
+
+This patch makes Piglit test case "clGetImageInfo" pass.
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ src/cl_api.c | 7 +++++--
+ src/cl_mem.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ src/cl_mem.h | 3 +++
+ 3 files changed, 59 insertions(+), 2 deletions(-)
+
+diff --git a/src/cl_api.c b/src/cl_api.c
+index 115c07a..20cbc1e 100644
+--- a/src/cl_api.c
++++ b/src/cl_api.c
+@@ -573,8 +573,11 @@ clGetImageInfo(cl_mem image,
+ void * param_value,
+ size_t * param_value_size_ret)
+ {
+- NOT_IMPLEMENTED;
+- return 0;
++ return cl_get_image_info(image,
++ param_name,
++ param_value_size,
++ param_value,
++ param_value_size_ret);
+ }
+
+ cl_int
+diff --git a/src/cl_mem.c b/src/cl_mem.c
+index ce70305..6167dd6 100644
+--- a/src/cl_mem.c
++++ b/src/cl_mem.c
+@@ -90,6 +90,57 @@ cl_get_mem_object_info(cl_mem mem,
+ return CL_SUCCESS;
+ }
+
++LOCAL cl_int
++cl_get_image_info(cl_mem mem,
++ cl_image_info param_name,
++ size_t param_value_size,
++ void *param_value,
++ size_t *param_value_size_ret)
++{
++ if(!mem || !mem->is_image)
++ return CL_INVALID_MEM_OBJECT;
++
++ switch(param_name)
++ {
++ FIELD_SIZE(IMAGE_FORMAT, cl_image_format);
++ FIELD_SIZE(IMAGE_ELEMENT_SIZE, size_t);
++ FIELD_SIZE(IMAGE_ROW_PITCH, size_t);
++ FIELD_SIZE(IMAGE_SLICE_PITCH, size_t);
++ FIELD_SIZE(IMAGE_WIDTH, size_t);
++ FIELD_SIZE(IMAGE_HEIGHT, size_t);
++ FIELD_SIZE(IMAGE_DEPTH, size_t);
++ default:
++ return CL_INVALID_VALUE;
++ }
++
++ switch(param_name)
++ {
++ case CL_IMAGE_FORMAT:
++ *(cl_image_format *)param_value = mem->fmt;
++ break;
++ case CL_IMAGE_ELEMENT_SIZE:
++ *(size_t *)param_value = mem->bpp;
++ break;
++ case CL_IMAGE_ROW_PITCH:
++ *(size_t *)param_value = mem->row_pitch;
++ break;
++ case CL_IMAGE_SLICE_PITCH:
++ *(size_t *)param_value = mem->slice_pitch;
++ break;
++ case CL_IMAGE_WIDTH:
++ *(size_t *)param_value = mem->w;
++ break;
++ case CL_IMAGE_HEIGHT:
++ *(size_t *)param_value = mem->h;
++ break;
++ case CL_IMAGE_DEPTH:
++ *(size_t *)param_value = mem->depth;
++ break;
++ }
++
++ return CL_SUCCESS;
++}
++
+ #undef FIELD_SIZE
+
+ static cl_mem
+diff --git a/src/cl_mem.h b/src/cl_mem.h
+index 6d98698..66518a6 100644
+--- a/src/cl_mem.h
++++ b/src/cl_mem.h
+@@ -83,6 +83,9 @@ struct _cl_mem {
+ /* Query information about a memory object */
+ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, size_t, void *, size_t *);
+
++/* Query information about an image */
++extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *);
++
+ /* Create a new memory object and initialize it with possible user data */
+ extern cl_mem cl_mem_new(cl_context, cl_mem_flags, size_t, void*, cl_int*);
+
+--
+1.7.10.4
+
diff --git a/debian/patches/0013-Add-vector-argument-test-case.patch b/debian/patches/0013-Add-vector-argument-test-case.patch
new file mode 100644
index 0000000..faa365a
--- /dev/null
+++ b/debian/patches/0013-Add-vector-argument-test-case.patch
@@ -0,0 +1,75 @@
+From 4a6741f9031cb6d105023a01401504dbac95695a Mon Sep 17 00:00:00 2001
+From: Yang Rong <rong.r.yang at intel.com>
+Date: Thu, 16 May 2013 12:36:35 +0800
+Subject: [PATCH 13/14] Add vector argument test case.
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Yang Rong <rong.r.yang at intel.com>
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ kernels/compiler_function_argument2.cl | 6 ++++++
+ utests/CMakeLists.txt | 1 +
+ utests/compiler_function_argument2.cpp | 26 ++++++++++++++++++++++++++
+ 3 files changed, 33 insertions(+)
+ create mode 100644 kernels/compiler_function_argument2.cl
+ create mode 100644 utests/compiler_function_argument2.cpp
+
+diff --git a/kernels/compiler_function_argument2.cl b/kernels/compiler_function_argument2.cl
+new file mode 100644
+index 0000000..0985dbd
+--- /dev/null
++++ b/kernels/compiler_function_argument2.cl
+@@ -0,0 +1,6 @@
++__kernel void
++compiler_function_argument2(__global int *dst, int4 value)
++{
++ int id = (int)get_global_id(0);
++ dst[id] = value.w;
++}
+diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
+index 14ff6b1..ea034cc 100644
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -41,6 +41,7 @@ set (utests_sources
+ compiler_fill_image_3d_2.cpp
+ compiler_function_argument0.cpp
+ compiler_function_argument1.cpp
++ compiler_function_argument2.cpp
+ compiler_function_argument.cpp
+ compiler_function_constant0.cpp
+ compiler_function_constant1.cpp
+diff --git a/utests/compiler_function_argument2.cpp b/utests/compiler_function_argument2.cpp
+new file mode 100644
+index 0000000..1e398a9
+--- /dev/null
++++ b/utests/compiler_function_argument2.cpp
+@@ -0,0 +1,26 @@
++#include "utest_helper.hpp"
++
++struct int4 {int x,y,z,w;};
++void compiler_function_argument2(void)
++{
++ const size_t n = 2048;
++ const int4 value = {31, 32, 33, 34};
++
++ // Setup kernel and buffers
++ OCL_CREATE_KERNEL("compiler_function_argument2");
++ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
++ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++ OCL_SET_ARG(1, sizeof(int4), &value);
++
++ // Run the kernel
++ globals[0] = n;
++ locals[0] = 16;
++ OCL_NDRANGE(1);
++ OCL_MAP_BUFFER(0);
++
++ // Check results
++ for (uint32_t i = 0; i < n; ++i)
++ OCL_ASSERT(((int*)buf_data[0])[i] == value.w);
++}
++
++MAKE_UTEST_FROM_FUNCTION(compiler_function_argument2);
+--
+1.7.10.4
+
diff --git a/debian/patches/0014-Readd-OpenCL-1.2-definitions-required-for-ICD.patch b/debian/patches/0014-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
new file mode 100644
index 0000000..bdd657c
--- /dev/null
+++ b/debian/patches/0014-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
@@ -0,0 +1,97 @@
+From 9ce43445e101f8a48adb4a641d7f601f226d0362 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Wed, 19 Jun 2013 11:30:36 +0200
+Subject: [PATCH 14/14] Readd OpenCL 1.2 definitions required for ICD
+To: beignet at lists.freedesktop.org
+
+The definition for the ICD dispatch table requires a few additional
+definitions from OpenCL 1.2.
+
+Signed-off-by: Simon Richter <Simon.Richter at hogyros.de>
+---
+ include/CL/cl.h | 15 +++++++++++++++
+ include/CL/cl_platform.h | 2 ++
+ src/cl_mem.h | 12 ------------
+ 3 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/include/CL/cl.h b/include/CL/cl.h
+index 4355e74..a7f25d1 100644
+--- a/include/CL/cl.h
++++ b/include/CL/cl.h
+@@ -67,6 +67,7 @@ typedef cl_uint cl_channel_type;
+ typedef cl_bitfield cl_mem_flags;
+ typedef cl_uint cl_mem_object_type;
+ typedef cl_uint cl_mem_info;
++typedef cl_bitfield cl_mem_migration_flags;
+ typedef cl_uint cl_image_info;
+ typedef cl_uint cl_buffer_create_type;
+ typedef cl_uint cl_addressing_mode;
+@@ -75,8 +76,10 @@ typedef cl_uint cl_sampler_info;
+ typedef cl_bitfield cl_map_flags;
+ typedef cl_uint cl_program_info;
+ typedef cl_uint cl_program_build_info;
++typedef intptr_t cl_device_partition_property;
+ typedef cl_int cl_build_status;
+ typedef cl_uint cl_kernel_info;
++typedef cl_uint cl_kernel_arg_info;
+ typedef cl_uint cl_kernel_work_group_info;
+ typedef cl_uint cl_event_info;
+ typedef cl_uint cl_command_type;
+@@ -87,6 +90,18 @@ typedef struct _cl_image_format {
+ cl_channel_type image_channel_data_type;
+ } cl_image_format;
+
++typedef struct _cl_image_desc {
++ cl_mem_object_type image_type;
++ size_t image_width;
++ size_t image_height;
++ size_t image_depth;
++ size_t image_array_size;
++ size_t image_row_pitch;
++ size_t image_slice_pitch;
++ cl_uint num_mip_levels;
++ cl_uint num_samples;
++ cl_mem buffer;
++} cl_image_desc;
+
+ typedef struct _cl_buffer_region {
+ size_t origin;
+diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
+index 043b048..9a2f17a 100644
+--- a/include/CL/cl_platform.h
++++ b/include/CL/cl_platform.h
+@@ -58,6 +58,8 @@ extern "C" {
+ #define CL_EXT_SUFFIX__VERSION_1_0
+ #define CL_API_SUFFIX__VERSION_1_1
+ #define CL_EXT_SUFFIX__VERSION_1_1
++ #define CL_API_SUFFIX__VERSION_1_2
++ #define CL_EXT_SUFFIX__VERSION_1_2
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #endif
+
+diff --git a/src/cl_mem.h b/src/cl_mem.h
+index 66518a6..d00743f 100644
+--- a/src/cl_mem.h
++++ b/src/cl_mem.h
+@@ -29,18 +29,6 @@
+ #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+ #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+ #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+-typedef struct _cl_image_desc {
+- cl_mem_object_type image_type;
+- size_t image_width;
+- size_t image_height;
+- size_t image_depth;
+- size_t image_array_size;
+- size_t image_row_pitch;
+- size_t image_slice_pitch;
+- cl_uint num_mip_levels;
+- cl_uint num_samples;
+- cl_mem buffer;
+-} cl_image_desc;
+ #endif
+
+ typedef enum cl_image_tiling {
+--
+1.7.10.4
+
diff --git a/debian/patches/series b/debian/patches/series
index a4fd86a..5674dd4 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,4 +3,16 @@ flags
khronos
deprecated-in-utest
private
-opencl-c-version
+0001-support-built-in-function-smoothstep.patch
+0002-test-function-smoothstep.patch
+0003-support-built-in-function-bitselect.patch
+0004-test-built-in-function-bitselect.patch
+0005-add-built-in-function-mad_sat.patch
+0006-test-function-mad_sat.patch
+0007-built-in-function-sign.patch
+0008-test-built-in-function-sign.patch
+0009-fix-vectorial-built-in-functions-min-max-clamp.patch
+0010-improve-clCreateContext-conformance.patch
+0011-Improve-the-clEnqueueMapBuffer-and-clCreateBuffer-AP.patch
+0012-support-clGetImageInfo.patch
+0013-Add-vector-argument-test-case.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-devel
mailing list