[Pkg-opencl-devel] [beignet] 19/47: Imported Debian patch 0.1+git20130619+42967d2-1

Andreas Beckmann anbe at moszumanska.debian.org
Fri Oct 31 21:45:48 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit 35e5a1c6b34c9f127dee7f94b6cbd11385e901e6
Author: Simon Richter <sjr at debian.org>
Date:   Wed Jun 19 20:48:03 2013 +0200

    Imported Debian patch 0.1+git20130619+42967d2-1
---
 debian/changelog                                   |    8 +
 debian/control                                     |    7 +-
 ...0001-Generate-all-supported-as_-functions.patch |   68 +-
 .../0002-Define-all-convert_-functions.patch       |   43 +-
 ...ng-and-ulong-types-to-generated-functions.patch |  141 +-
 .../0004-Add-vector-argument-test-case.patch       |   35 +-
 .../0005-Fix-several-typos-in-unit-test.patch      |   58 +
 debian/patches/0006-Support-64-bit-float.patch     |  842 ++++
 .../patches/0007-test-case-for-64-bit-float.patch  |  159 +
 ...d-OpenCL-1.2-definitions-required-for-ICD.patch |   95 +
 ...hr_fp64-extension-for-OpenCL-stdlib-heade.patch |   33 +
 .../patches/0010-Define-double-vector-types.patch  |   23 +
 ...ration-of-convert_-and-as_-functions-for-.patch | 1430 ++++++
 .../0012-GBE-Fixed-one-bug-in-scalarize-pass.patch |   40 +
 debian/patches/debug                               |    6 +-
 debian/patches/deprecated-in-utest                 |   17 +
 debian/patches/flags                               |   12 +-
 debian/patches/khronos                             | 4724 ++++++++++++++++++--
 debian/patches/private                             |   14 +-
 debian/patches/series                              |    8 +
 debian/source/include-binaries                     |  152 +
 21 files changed, 7268 insertions(+), 647 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 37da78a..bc2b5ab 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+beignet (0.1+git20130619+42967d2-1) unstable; urgency=low
+
+  * New upstream release
+  * Build against Mesa 9
+  * Enable GL sharing extension
+
+ -- Simon Richter <sjr at debian.org>  Wed, 19 Jun 2013 20:48:03 +0200
+
 beignet (0.1+git20130614+89b5e40-2) unstable; urgency=low
 
   * Add Ubuntu support
diff --git a/debian/control b/debian/control
index a426429..ca0d315 100644
--- a/debian/control
+++ b/debian/control
@@ -7,10 +7,9 @@ Build-Depends: debhelper (>= 9), cmake, pkg-config,
  llvm-dev (>= 1:3.2),
  libclang-dev (>= 1:3.2) | libclang-dev (>= 3.2),
  libclang-dev (>= 1:3.2) | libclang-dev (<< 1:0),
- libclang-dev (>= 1:3.2) | libgl1-mesa-dev (>= 9),
- libclang-dev (>= 1:3.2) | libegl1-mesa-dev (>= 9),
- libclang-dev (>= 1:3.2) | libgbm-dev (>= 9)
-Build-Conflicts: libegl1-mesa-dev (<< 9), libgbm-dev (<< 9)
+ libgl1-mesa-dev (>= 9),
+ libegl1-mesa-dev (>= 9),
+ libgbm-dev (>= 9)
 Standards-Version: 3.9.4
 Section: libs
 Homepage: http://cgit.freedesktop.org/beignet/
diff --git a/debian/patches/0001-Generate-all-supported-as_-functions.patch b/debian/patches/0001-Generate-all-supported-as_-functions.patch
index 45276c2..d2a8b75 100644
--- a/debian/patches/0001-Generate-all-supported-as_-functions.patch
+++ b/debian/patches/0001-Generate-all-supported-as_-functions.patch
@@ -1,7 +1,7 @@
-From a1926ba22c15aee973d651d700fdc7b94cd8bf4d Mon Sep 17 00:00:00 2001
+From d276ed9d54e7026a777c80048a91e8dd078c4319 Mon Sep 17 00:00:00 2001
 From: Simon Richter <Simon.Richter at hogyros.de>
 Date: Mon, 13 May 2013 22:43:34 +0200
-Subject: [PATCH 1/4] Generate all supported as_* functions
+Subject: [PATCH 01/12] Generate all supported as_* functions
 To: beignet at lists.freedesktop.org
 
 This adds support for all reinterpreting type conversions currently
@@ -21,11 +21,10 @@ update_as.sh script.
  create mode 100755 backend/src/update.sh
  create mode 100755 backend/src/update_as.sh
 
-diff --git a/backend/src/gen_as.sh b/backend/src/gen_as.sh
-new file mode 100755
-index 0000000..76fedf8
---- /dev/null
-+++ b/backend/src/gen_as.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/gen_as.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/gen_as.sh	2013-06-19 21:04:37.838666758 +0200
 @@ -0,0 +1,83 @@
 +#! /bin/sh -e
 +
@@ -110,11 +109,10 @@ index 0000000..76fedf8
 +        done
 +
 +done
-diff --git a/backend/src/genconfig.sh b/backend/src/genconfig.sh
-new file mode 100644
-index 0000000..60edafd
---- /dev/null
-+++ b/backend/src/genconfig.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:37.838666758 +0200
 @@ -0,0 +1,11 @@
 +#! /bin/false
 +# This is to be sourced by the generation scripts
@@ -127,17 +125,22 @@ index 0000000..60edafd
 +
 +## No user serviceable parts below here
 +
-diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
-index 46b81e1..d00de44 100644
---- a/backend/src/ocl_stdlib.h
-+++ b/backend/src/ocl_stdlib.h
-@@ -91,15 +91,1138 @@ typedef size_t __event_t;
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:22.050667462 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:37.838666758 +0200
+@@ -91,15 +91,1138 @@
  /////////////////////////////////////////////////////////////////////////////
  // OpenCL conversions & type casting
  /////////////////////////////////////////////////////////////////////////////
 -union type_cast_4_b {
 -  float f;
 -  uchar4 u4;
+-};
+-uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
+-    union type_cast_4_b u;
+-    u.f = f;
+-    return u.u4;
 +
 +// ##BEGIN_AS##
 +union _type_cast_1_b {
@@ -1104,11 +1107,7 @@ index 46b81e1..d00de44 100644
 +  short16 _short16;
 +  ushort16 _ushort16;
 +  float8 _float8;
- };
--uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
--    union type_cast_4_b u;
--    u.f = f;
--    return u.u4;
++};
 +
 +INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
 +  union _type_cast_32_b u;
@@ -1138,7 +1137,7 @@ index 46b81e1..d00de44 100644
 +  union _type_cast_32_b u;
 +  u._uint8 = v;
 +  return u._int8;
- }
++}
 +
 +INLINE OVERLOADABLE short16 as_short16(uint8 v) {
 +  union _type_cast_32_b u;
@@ -1270,26 +1269,24 @@ index 46b81e1..d00de44 100644
 +  union _type_cast_64_b u;
 +  u._float16 = v;
 +  return u._uint16;
-+}
+ }
 +
 +// ##END_AS##
 +
  #define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
      return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
   }
-diff --git a/backend/src/update.sh b/backend/src/update.sh
-new file mode 100755
-index 0000000..4f9af8c
---- /dev/null
-+++ b/backend/src/update.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/update.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/update.sh	2013-06-19 21:04:37.838666758 +0200
 @@ -0,0 +1,2 @@
 +#! /bin/sh -e
 +./update_as.sh
-diff --git a/backend/src/update_as.sh b/backend/src/update_as.sh
-new file mode 100755
-index 0000000..54b4191
---- /dev/null
-+++ b/backend/src/update_as.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/update_as.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/update_as.sh	2013-06-19 21:04:37.838666758 +0200
 @@ -0,0 +1,11 @@
 +#! /bin/sh -e
 +
@@ -1302,6 +1299,3 @@ index 0000000..54b4191
 +exec >&2
 +
 +mv $STDLIB_HEADER.tmp $STDLIB_HEADER
--- 
-1.7.10.4
-
diff --git a/debian/patches/0002-Define-all-convert_-functions.patch b/debian/patches/0002-Define-all-convert_-functions.patch
index a0e5c20..9db6b85 100644
--- a/debian/patches/0002-Define-all-convert_-functions.patch
+++ b/debian/patches/0002-Define-all-convert_-functions.patch
@@ -1,7 +1,7 @@
-From 1900bf07f138edbf956e01618c304a7b10c59a9b Mon Sep 17 00:00:00 2001
+From 05cfdc1a307a209295263322121b63c5d244d613 Mon Sep 17 00:00:00 2001
 From: Simon Richter <Simon.Richter at hogyros.de>
 Date: Tue, 14 May 2013 17:04:56 +0200
-Subject: [PATCH 2/4] Define all convert_* functions.
+Subject: [PATCH 02/12] Define all convert_* functions.
 To: beignet at lists.freedesktop.org
 
 These functions convert between vectors of the same length by casting each
@@ -15,11 +15,10 @@ member in turn.
  create mode 100755 backend/src/gen_convert.sh
  create mode 100755 backend/src/update_convert.sh
 
-diff --git a/backend/src/gen_convert.sh b/backend/src/gen_convert.sh
-new file mode 100755
-index 0000000..74fc73c
---- /dev/null
-+++ b/backend/src/gen_convert.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/gen_convert.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/gen_convert.sh	2013-06-19 21:04:39.250666695 +0200
 @@ -0,0 +1,52 @@
 +#! /bin/sh -e
 +
@@ -73,11 +72,11 @@ index 0000000..74fc73c
 +                done
 +        done
 +done
-diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
-index d00de44..913917f 100644
---- a/backend/src/ocl_stdlib.h
-+++ b/backend/src/ocl_stdlib.h
-@@ -1223,29 +1223,849 @@ INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:37.838666758 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:39.250666695 +0200
+@@ -1223,29 +1223,849 @@
  
  // ##END_AS##
  
@@ -950,19 +949,18 @@ index d00de44..913917f 100644
  /////////////////////////////////////////////////////////////////////////////
  // OpenCL preprocessor directives & macros
  /////////////////////////////////////////////////////////////////////////////
-diff --git a/backend/src/update.sh b/backend/src/update.sh
-index 4f9af8c..0e5f8c0 100755
---- a/backend/src/update.sh
-+++ b/backend/src/update.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/update.sh
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/update.sh	2013-06-19 21:04:37.838666758 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/update.sh	2013-06-19 21:04:39.250666695 +0200
 @@ -1,2 +1,3 @@
  #! /bin/sh -e
  ./update_as.sh
 +./update_convert.sh
-diff --git a/backend/src/update_convert.sh b/backend/src/update_convert.sh
-new file mode 100755
-index 0000000..f1fcd36
---- /dev/null
-+++ b/backend/src/update_convert.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/update_convert.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/backend/src/update_convert.sh	2013-06-19 21:04:39.250666695 +0200
 @@ -0,0 +1,11 @@
 +#! /bin/sh -e
 +
@@ -975,6 +973,3 @@ index 0000000..f1fcd36
 +exec >&2
 +
 +mv $STDLIB_HEADER.tmp $STDLIB_HEADER
--- 
-1.7.10.4
-
diff --git a/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch b/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
index 7e7f81d..83358df 100644
--- a/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
+++ b/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
@@ -1,7 +1,7 @@
-From 44161ff1568479390464c0b0a282f5aeeb86915d Mon Sep 17 00:00:00 2001
+From f3c047e020d2e9f6d84cbcc10b2391d980572e3d Mon Sep 17 00:00:00 2001
 From: Simon Richter <Simon.Richter at hogyros.de>
 Date: Tue, 14 May 2013 17:04:57 +0200
-Subject: [PATCH 3/4] Add long and ulong types to generated functions.
+Subject: [PATCH 03/12] Add long and ulong types to generated functions.
 To: beignet at lists.freedesktop.org
 
 This enables all generated functions for 64 bit integers.
@@ -10,10 +10,10 @@ This enables all generated functions for 64 bit integers.
  backend/src/ocl_stdlib.h | 1248 +++++++++++++++++++++++++++++++++++++++++++++-
  2 files changed, 1234 insertions(+), 16 deletions(-)
 
-diff --git a/backend/src/genconfig.sh b/backend/src/genconfig.sh
-index 60edafd..a3ba3f9 100644
---- a/backend/src/genconfig.sh
-+++ b/backend/src/genconfig.sh
+Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/genconfig.sh	2013-06-19 21:04:37.838666758 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:40.598666635 +0200
 @@ -2,7 +2,7 @@
  # This is to be sourced by the generation scripts
  
@@ -23,11 +23,11 @@ index 60edafd..a3ba3f9 100644
  
  # Supported vector lengths
  VECTOR_LENGTHS="1 2 3 4 8 16"
-diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
-index 913917f..2f55184 100644
---- a/backend/src/ocl_stdlib.h
-+++ b/backend/src/ocl_stdlib.h
-@@ -486,6 +486,8 @@ INLINE OVERLOADABLE short3 as_short3(ushort3 v) {
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:39.250666695 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:40.602666635 +0200
+@@ -486,6 +486,8 @@
  }
  
  union _type_cast_8_b {
@@ -36,7 +36,7 @@ index 913917f..2f55184 100644
    int2 _int2;
    uint2 _uint2;
    short4 _short4;
-@@ -495,6 +497,114 @@ union _type_cast_8_b {
+@@ -495,6 +497,114 @@
    float2 _float2;
  };
  
@@ -151,7 +151,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
    union _type_cast_8_b u;
    u._int2 = v;
-@@ -531,6 +641,18 @@ INLINE OVERLOADABLE float2 as_float2(int2 v) {
+@@ -531,6 +641,18 @@
    return u._float2;
  }
  
@@ -170,7 +170,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(uint2 v) {
    union _type_cast_8_b u;
    u._uint2 = v;
-@@ -567,6 +689,18 @@ INLINE OVERLOADABLE float2 as_float2(uint2 v) {
+@@ -567,6 +689,18 @@
    return u._float2;
  }
  
@@ -189,7 +189,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(short4 v) {
    union _type_cast_8_b u;
    u._short4 = v;
-@@ -603,6 +737,18 @@ INLINE OVERLOADABLE float2 as_float2(short4 v) {
+@@ -603,6 +737,18 @@
    return u._float2;
  }
  
@@ -208,7 +208,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
    union _type_cast_8_b u;
    u._ushort4 = v;
-@@ -639,6 +785,18 @@ INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
+@@ -639,6 +785,18 @@
    return u._float2;
  }
  
@@ -227,7 +227,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(char8 v) {
    union _type_cast_8_b u;
    u._char8 = v;
-@@ -675,6 +833,18 @@ INLINE OVERLOADABLE float2 as_float2(char8 v) {
+@@ -675,6 +833,18 @@
    return u._float2;
  }
  
@@ -246,7 +246,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
    union _type_cast_8_b u;
    u._uchar8 = v;
-@@ -711,6 +881,18 @@ INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
+@@ -711,6 +881,18 @@
    return u._float2;
  }
  
@@ -265,7 +265,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 as_int2(float2 v) {
    union _type_cast_8_b u;
    u._float2 = v;
-@@ -790,6 +972,8 @@ INLINE OVERLOADABLE uint3 as_uint3(float3 v) {
+@@ -790,6 +972,8 @@
  }
  
  union _type_cast_16_b {
@@ -274,7 +274,7 @@ index 913917f..2f55184 100644
    int4 _int4;
    uint4 _uint4;
    short8 _short8;
-@@ -799,6 +983,114 @@ union _type_cast_16_b {
+@@ -799,6 +983,114 @@
    float4 _float4;
  };
  
@@ -389,7 +389,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
    union _type_cast_16_b u;
    u._int4 = v;
-@@ -835,6 +1127,18 @@ INLINE OVERLOADABLE float4 as_float4(int4 v) {
+@@ -835,6 +1127,18 @@
    return u._float4;
  }
  
@@ -408,7 +408,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(uint4 v) {
    union _type_cast_16_b u;
    u._uint4 = v;
-@@ -871,6 +1175,18 @@ INLINE OVERLOADABLE float4 as_float4(uint4 v) {
+@@ -871,6 +1175,18 @@
    return u._float4;
  }
  
@@ -427,7 +427,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(short8 v) {
    union _type_cast_16_b u;
    u._short8 = v;
-@@ -907,6 +1223,18 @@ INLINE OVERLOADABLE float4 as_float4(short8 v) {
+@@ -907,6 +1223,18 @@
    return u._float4;
  }
  
@@ -446,7 +446,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
    union _type_cast_16_b u;
    u._ushort8 = v;
-@@ -943,6 +1271,18 @@ INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
+@@ -943,6 +1271,18 @@
    return u._float4;
  }
  
@@ -465,7 +465,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(char16 v) {
    union _type_cast_16_b u;
    u._char16 = v;
-@@ -979,6 +1319,18 @@ INLINE OVERLOADABLE float4 as_float4(char16 v) {
+@@ -979,6 +1319,18 @@
    return u._float4;
  }
  
@@ -484,7 +484,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
    union _type_cast_16_b u;
    u._uchar16 = v;
-@@ -1015,6 +1367,18 @@ INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
+@@ -1015,6 +1367,18 @@
    return u._float4;
  }
  
@@ -503,7 +503,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 as_int4(float4 v) {
    union _type_cast_16_b u;
    u._float4 = v;
-@@ -1051,7 +1415,26 @@ INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) {
+@@ -1051,7 +1415,26 @@
    return u._uchar16;
  }
  
@@ -530,7 +530,7 @@ index 913917f..2f55184 100644
    int8 _int8;
    uint8 _uint8;
    short16 _short16;
-@@ -1059,30 +1442,126 @@ union _type_cast_32_b {
+@@ -1059,30 +1442,126 @@
    float8 _float8;
  };
  
@@ -664,7 +664,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 as_int8(uint8 v) {
    union _type_cast_32_b u;
    u._uint8 = v;
-@@ -1107,6 +1586,18 @@ INLINE OVERLOADABLE float8 as_float8(uint8 v) {
+@@ -1107,6 +1586,18 @@
    return u._float8;
  }
  
@@ -683,7 +683,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 as_int8(short16 v) {
    union _type_cast_32_b u;
    u._short16 = v;
-@@ -1131,6 +1622,18 @@ INLINE OVERLOADABLE float8 as_float8(short16 v) {
+@@ -1131,6 +1622,18 @@
    return u._float8;
  }
  
@@ -702,7 +702,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
    union _type_cast_32_b u;
    u._ushort16 = v;
-@@ -1155,6 +1658,18 @@ INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
+@@ -1155,6 +1658,18 @@
    return u._float8;
  }
  
@@ -721,7 +721,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 as_int8(float8 v) {
    union _type_cast_32_b u;
    u._float8 = v;
-@@ -1180,11 +1695,73 @@ INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) {
+@@ -1180,11 +1695,73 @@
  }
  
  union _type_cast_64_b {
@@ -795,7 +795,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
    union _type_cast_64_b u;
    u._int16 = v;
-@@ -1197,6 +1774,18 @@ INLINE OVERLOADABLE float16 as_float16(int16 v) {
+@@ -1197,6 +1774,18 @@
    return u._float16;
  }
  
@@ -814,7 +814,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 as_int16(uint16 v) {
    union _type_cast_64_b u;
    u._uint16 = v;
-@@ -1209,6 +1798,18 @@ INLINE OVERLOADABLE float16 as_float16(uint16 v) {
+@@ -1209,6 +1798,18 @@
    return u._float16;
  }
  
@@ -833,7 +833,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 as_int16(float16 v) {
    union _type_cast_64_b u;
    u._float16 = v;
-@@ -1221,9 +1822,98 @@ INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
+@@ -1221,9 +1822,98 @@
    return u._uint16;
  }
  
@@ -932,7 +932,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
    return (uint2)((uint)(v.s0), (uint)(v.s1));
  }
-@@ -1248,6 +1938,14 @@ INLINE OVERLOADABLE float2 convert_float2(int2 v) {
+@@ -1248,6 +1938,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -947,7 +947,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1272,6 +1970,14 @@ INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
+@@ -1272,6 +1970,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -962,7 +962,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(short2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1296,6 +2002,14 @@ INLINE OVERLOADABLE float2 convert_float2(short2 v) {
+@@ -1296,6 +2002,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -977,7 +977,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1320,6 +2034,14 @@ INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
+@@ -1320,6 +2034,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -992,7 +992,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(char2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1344,6 +2066,14 @@ INLINE OVERLOADABLE float2 convert_float2(char2 v) {
+@@ -1344,6 +2066,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -1007,7 +1007,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1368,6 +2098,14 @@ INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
+@@ -1368,6 +2098,14 @@
    return (float2)((float)(v.s0), (float)(v.s1));
  }
  
@@ -1022,7 +1022,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int2 convert_int2(float2 v) {
    return (int2)((int)(v.s0), (int)(v.s1));
  }
-@@ -1376,20 +2114,92 @@ INLINE OVERLOADABLE uint2 convert_uint2(float2 v) {
+@@ -1376,20 +2114,92 @@
    return (uint2)((uint)(v.s0), (uint)(v.s1));
  }
  
@@ -1123,7 +1123,7 @@ index 913917f..2f55184 100644
  }
  
  INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
-@@ -1416,6 +2226,14 @@ INLINE OVERLOADABLE float3 convert_float3(int3 v) {
+@@ -1416,6 +2226,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1138,7 +1138,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1440,6 +2258,14 @@ INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
+@@ -1440,6 +2258,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1153,7 +1153,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(short3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1464,6 +2290,14 @@ INLINE OVERLOADABLE float3 convert_float3(short3 v) {
+@@ -1464,6 +2290,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1168,7 +1168,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1488,6 +2322,14 @@ INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
+@@ -1488,6 +2322,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1183,7 +1183,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(char3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1512,6 +2354,14 @@ INLINE OVERLOADABLE float3 convert_float3(char3 v) {
+@@ -1512,6 +2354,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1198,7 +1198,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1536,6 +2386,14 @@ INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
+@@ -1536,6 +2386,14 @@
    return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
  }
  
@@ -1213,7 +1213,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int3 convert_int3(float3 v) {
    return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
  }
-@@ -1560,6 +2418,78 @@ INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) {
+@@ -1560,6 +2418,78 @@
    return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
  }
  
@@ -1292,7 +1292,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
    return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
  }
-@@ -1584,6 +2514,14 @@ INLINE OVERLOADABLE float4 convert_float4(int4 v) {
+@@ -1584,6 +2514,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1307,7 +1307,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1608,6 +2546,14 @@ INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
+@@ -1608,6 +2546,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1322,7 +1322,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(short4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1632,6 +2578,14 @@ INLINE OVERLOADABLE float4 convert_float4(short4 v) {
+@@ -1632,6 +2578,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1337,7 +1337,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1656,6 +2610,14 @@ INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
+@@ -1656,6 +2610,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1352,7 +1352,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(char4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1680,6 +2642,14 @@ INLINE OVERLOADABLE float4 convert_float4(char4 v) {
+@@ -1680,6 +2642,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1367,7 +1367,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1704,6 +2674,14 @@ INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
+@@ -1704,6 +2674,14 @@
    return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
  }
  
@@ -1382,7 +1382,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int4 convert_int4(float4 v) {
    return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
  }
-@@ -1728,6 +2706,78 @@ INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) {
+@@ -1728,6 +2706,78 @@
    return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
  }
  
@@ -1461,7 +1461,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
    return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
  }
-@@ -1752,6 +2802,14 @@ INLINE OVERLOADABLE float8 convert_float8(int8 v) {
+@@ -1752,6 +2802,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1476,7 +1476,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1776,6 +2834,14 @@ INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
+@@ -1776,6 +2834,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1491,7 +1491,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(short8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1800,6 +2866,14 @@ INLINE OVERLOADABLE float8 convert_float8(short8 v) {
+@@ -1800,6 +2866,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1506,7 +1506,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1824,6 +2898,14 @@ INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
+@@ -1824,6 +2898,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1521,7 +1521,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(char8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1848,6 +2930,14 @@ INLINE OVERLOADABLE float8 convert_float8(char8 v) {
+@@ -1848,6 +2930,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1536,7 +1536,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1872,6 +2962,14 @@ INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
+@@ -1872,6 +2962,14 @@
    return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
  }
  
@@ -1551,7 +1551,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int8 convert_int8(float8 v) {
    return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
  }
-@@ -1896,6 +2994,78 @@ INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) {
+@@ -1896,6 +2994,78 @@
    return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
  }
  
@@ -1630,7 +1630,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
    return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
  }
-@@ -1920,6 +3090,14 @@ INLINE OVERLOADABLE float16 convert_float16(int16 v) {
+@@ -1920,6 +3090,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1645,7 +1645,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
-@@ -1944,6 +3122,14 @@ INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
+@@ -1944,6 +3122,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1660,7 +1660,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(short16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
-@@ -1968,6 +3154,14 @@ INLINE OVERLOADABLE float16 convert_float16(short16 v) {
+@@ -1968,6 +3154,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1675,7 +1675,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
-@@ -1992,6 +3186,14 @@ INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
+@@ -1992,6 +3186,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1690,7 +1690,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(char16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
-@@ -2016,6 +3218,14 @@ INLINE OVERLOADABLE float16 convert_float16(char16 v) {
+@@ -2016,6 +3218,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1705,7 +1705,7 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
-@@ -2040,6 +3250,14 @@ INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
+@@ -2040,6 +3250,14 @@
    return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
  }
  
@@ -1720,6 +1720,3 @@ index 913917f..2f55184 100644
  INLINE OVERLOADABLE int16 convert_int16(float16 v) {
    return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
  }
--- 
-1.7.10.4
-
diff --git a/debian/patches/0004-Add-vector-argument-test-case.patch b/debian/patches/0004-Add-vector-argument-test-case.patch
index cf43d98..ed1564d 100644
--- a/debian/patches/0004-Add-vector-argument-test-case.patch
+++ b/debian/patches/0004-Add-vector-argument-test-case.patch
@@ -1,7 +1,7 @@
-From e9f476243902f2f3989c880030b267c384d7c040 Mon Sep 17 00:00:00 2001
+From b5563b40490e799465a597dab817c9e603c24028 Mon Sep 17 00:00:00 2001
 From: Yang Rong <rong.r.yang at intel.com>
 Date: Thu, 16 May 2013 12:36:35 +0800
-Subject: [PATCH 4/4] Add vector argument test case.
+Subject: [PATCH 04/12] Add vector argument test case.
 To: beignet at lists.freedesktop.org
 
 Signed-off-by: Yang Rong <rong.r.yang at intel.com>
@@ -13,11 +13,10 @@ Signed-off-by: Yang Rong <rong.r.yang at intel.com>
  create mode 100644 kernels/compiler_function_argument2.cl
  create mode 100644 utests/compiler_function_argument2.cpp
 
-diff --git a/kernels/compiler_function_argument2.cl b/kernels/compiler_function_argument2.cl
-new file mode 100644
-index 0000000..0985dbd
---- /dev/null
-+++ b/kernels/compiler_function_argument2.cl
+Index: beignet-0.1+git20130619+42967d2/kernels/compiler_function_argument2.cl
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/kernels/compiler_function_argument2.cl	2013-06-19 21:04:43.270666516 +0200
 @@ -0,0 +1,6 @@
 +__kernel void
 +compiler_function_argument2(__global int *dst, int4 value)
@@ -25,11 +24,11 @@ index 0000000..0985dbd
 +  int id = (int)get_global_id(0);
 +  dst[id] = value.w;
 +}
-diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
-index e5c03ee..f0bbe46 100644
---- a/utests/CMakeLists.txt
-+++ b/utests/CMakeLists.txt
-@@ -34,6 +34,7 @@ set (utests_sources
+Index: beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/utests/CMakeLists.txt	2013-06-19 21:03:26.278669949 +0200
++++ beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt	2013-06-19 21:04:43.270666516 +0200
+@@ -34,6 +34,7 @@
    compiler_fill_image_3d_2.cpp
    compiler_function_argument0.cpp
    compiler_function_argument1.cpp
@@ -37,11 +36,10 @@ index e5c03ee..f0bbe46 100644
    compiler_function_argument.cpp
    compiler_function_constant0.cpp
    compiler_function_constant1.cpp
-diff --git a/utests/compiler_function_argument2.cpp b/utests/compiler_function_argument2.cpp
-new file mode 100644
-index 0000000..1e398a9
---- /dev/null
-+++ b/utests/compiler_function_argument2.cpp
+Index: beignet-0.1+git20130619+42967d2/utests/compiler_function_argument2.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/utests/compiler_function_argument2.cpp	2013-06-19 21:04:43.270666516 +0200
 @@ -0,0 +1,26 @@
 +#include "utest_helper.hpp"
 +
@@ -69,6 +67,3 @@ index 0000000..1e398a9
 +}
 +
 +MAKE_UTEST_FROM_FUNCTION(compiler_function_argument2);
--- 
-1.7.10.4
-
diff --git a/debian/patches/0005-Fix-several-typos-in-unit-test.patch b/debian/patches/0005-Fix-several-typos-in-unit-test.patch
new file mode 100644
index 0000000..ac23650
--- /dev/null
+++ b/debian/patches/0005-Fix-several-typos-in-unit-test.patch
@@ -0,0 +1,58 @@
+From 050e16612260137274a71e3abe3bbcf607cc2f86 Mon Sep 17 00:00:00 2001
+From: Ruiling Song <ruiling.song at intel.com>
+Date: Wed, 19 Jun 2013 10:04:54 +0800
+Subject: [PATCH 05/12] Fix several typos in unit test.
+To: beignet at lists.freedesktop.org
+
+compiler_sub_bytes and compiler_sub_shorts
+
+Signed-off-by: Ruiling Song <ruiling.song at intel.com>
+---
+ utests/compiler_sub_bytes.cpp  |    4 ++--
+ utests/compiler_sub_shorts.cpp |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+Index: beignet-0.1+git20130619+42967d2/utests/compiler_sub_bytes.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/utests/compiler_sub_bytes.cpp	2013-06-19 21:03:25.994669961 +0200
++++ beignet-0.1+git20130619+42967d2/utests/compiler_sub_bytes.cpp	2013-06-19 21:04:44.686666453 +0200
+@@ -11,7 +11,7 @@
+   for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[0])[i] = (int8_t) rand();
+   for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[1])[i] = (int8_t) rand();
+   OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
+-  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
++  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[1]);
+   OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int8_t), NULL);
+ 
+   // Run the kernel
+@@ -25,7 +25,7 @@
+   // Check result
+   OCL_MAP_BUFFER(2);
+   for (uint32_t i = 0; i < n; ++i)
+-    OCL_ASSERT(((int8_t*)buf_data[2])[i] = ((int8_t*)buf_data[0])[i] - ((int8_t*)buf_data[1])[i]);
++    OCL_ASSERT(((int8_t*)buf_data[2])[i] == (int8_t)(((int8_t*)buf_data[0])[i] - ((int8_t*)buf_data[1])[i]));
+   free(buf_data[0]);
+   free(buf_data[1]);
+   buf_data[0] = buf_data[1] = NULL;
+Index: beignet-0.1+git20130619+42967d2/utests/compiler_sub_shorts.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/utests/compiler_sub_shorts.cpp	2013-06-19 21:03:25.994669961 +0200
++++ beignet-0.1+git20130619+42967d2/utests/compiler_sub_shorts.cpp	2013-06-19 21:04:44.686666453 +0200
+@@ -11,7 +11,7 @@
+   for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[0])[i] = (int16_t) rand();
+   for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[1])[i] = (int16_t) rand();
+   OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
+-  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
++  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[1]);
+   OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int16_t), NULL);
+ 
+   // Run the kernel
+@@ -25,7 +25,7 @@
+   // Check result
+   OCL_MAP_BUFFER(2);
+   for (uint32_t i = 0; i < n; ++i)
+-    OCL_ASSERT(((int16_t*)buf_data[2])[i] = ((int16_t*)buf_data[0])[i] - ((int16_t*)buf_data[1])[i]);
++    OCL_ASSERT(((int16_t*)buf_data[2])[i] == (int16_t)(((int16_t*)buf_data[0])[i] - ((int16_t*)buf_data[1])[i]));
+   free(buf_data[0]);
+   free(buf_data[1]);
+   buf_data[0] = buf_data[1] = NULL;
diff --git a/debian/patches/0006-Support-64-bit-float.patch b/debian/patches/0006-Support-64-bit-float.patch
new file mode 100644
index 0000000..9ca7880
--- /dev/null
+++ b/debian/patches/0006-Support-64-bit-float.patch
@@ -0,0 +1,842 @@
+From 3f27c4e6648ee4f98f27de6beaba713ee3c35985 Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 19 Jun 2013 12:40:35 +0800
+Subject: [PATCH 06/12] Support 64-bit float
+To: beignet at lists.freedesktop.org
+
+support arithmetic, store, load, and 64-bit float immediate
+
+example:
+
+  kernel void f(global double *src, global double *dst) {
+    int i = get_global_id(0);
+    double d = 1.234567890123456789;
+    dst[i] = d * (src[i] + d);
+  }
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+---
+ backend/src/backend/gen_context.cpp                |   17 +++
+ backend/src/backend/gen_context.hpp                |    2 +
+ backend/src/backend/gen_defs.hpp                   |    2 +
+ backend/src/backend/gen_encoder.cpp                |  140 ++++++++++++++++++-
+ backend/src/backend/gen_encoder.hpp                |    6 +
+ .../src/backend/gen_insn_gen7_schedule_info.hxx    |    2 +
+ backend/src/backend/gen_insn_selection.cpp         |  146 ++++++++++++++++++--
+ backend/src/backend/gen_insn_selection.hxx         |    4 +
+ backend/src/backend/gen_reg_allocation.cpp         |    1 -
+ backend/src/backend/gen_register.hpp               |   90 +++++++++++-
+ 10 files changed, 393 insertions(+), 17 deletions(-)
+
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_context.cpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.cpp	2013-06-19 21:04:46.030666393 +0200
+@@ -139,6 +139,7 @@
+     const GenRegister src = ra->genReg(insn.src(0));
+     switch (insn.opcode) {
+       case SEL_OP_MOV: p->MOV(dst, src); break;
++      case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src); break;
+       case SEL_OP_NOT: p->NOT(dst, src); break;
+       case SEL_OP_RNDD: p->RNDD(dst, src); break;
+       case SEL_OP_RNDU: p->RNDU(dst, src); break;
+@@ -153,6 +154,7 @@
+     const GenRegister src0 = ra->genReg(insn.src(0));
+     const GenRegister src1 = ra->genReg(insn.src(1));
+     switch (insn.opcode) {
++      case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
+       case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
+       case SEL_OP_AND:  p->AND(dst, src0, src1); break;
+       case SEL_OP_OR:   p->OR (dst, src0, src1);  break;
+@@ -269,6 +271,14 @@
+     p->pop();
+   }
+ 
++  void GenContext::emitReadFloat64Instruction(const SelectionInstruction &insn) {
++    const GenRegister dst = ra->genReg(insn.dst(0));
++    const GenRegister src = ra->genReg(insn.src(0));
++    const uint32_t bti = insn.extra.function;
++    const uint32_t elemNum = insn.extra.elem;
++    p->READ_FLOAT64(dst, src, bti, elemNum);
++  }
++
+   void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) {
+     const GenRegister dst = ra->genReg(insn.dst(0));
+     const GenRegister src = ra->genReg(insn.src(0));
+@@ -277,6 +287,13 @@
+     p->UNTYPED_READ(dst, src, bti, elemNum);
+   }
+ 
++  void GenContext::emitWriteFloat64Instruction(const SelectionInstruction &insn) {
++    const GenRegister src = ra->genReg(insn.src(0));
++    const uint32_t bti = insn.extra.function;
++    const uint32_t elemNum = insn.extra.elem;
++    p->WRITE_FLOAT64(src, bti, elemNum);
++  }
++
+   void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
+     const GenRegister src = ra->genReg(insn.src(0));
+     const uint32_t bti = insn.extra.function;
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_context.hpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.hpp	2013-06-19 21:04:46.030666393 +0200
+@@ -87,6 +87,8 @@
+     void emitBarrierInstruction(const SelectionInstruction &insn);
+     void emitFenceInstruction(const SelectionInstruction &insn);
+     void emitMathInstruction(const SelectionInstruction &insn);
++    void emitReadFloat64Instruction(const SelectionInstruction &insn);
++    void emitWriteFloat64Instruction(const SelectionInstruction &insn);
+     void emitUntypedReadInstruction(const SelectionInstruction &insn);
+     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
+     void emitByteGatherInstruction(const SelectionInstruction &insn);
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_defs.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_defs.hpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_defs.hpp	2013-06-19 21:04:46.030666393 +0200
+@@ -215,6 +215,7 @@
+ #define GEN_TYPE_VF  5 /* packed float vector, immediates only? */
+ #define GEN_TYPE_HF  6
+ #define GEN_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
++#define GEN_TYPE_DF  6
+ #define GEN_TYPE_F   7
+ 
+ #define GEN_ARF_NULL                  0x00
+@@ -303,6 +304,7 @@
+ #define GEN_BYTE_SCATTER_BYTE   0
+ #define GEN_BYTE_SCATTER_WORD   1
+ #define GEN_BYTE_SCATTER_DWORD  2
++#define GEN_BYTE_SCATTER_QWORD  3
+ 
+ #define GEN_SAMPLER_RETURN_FORMAT_FLOAT32     0
+ #define GEN_SAMPLER_RETURN_FORMAT_UINT32      2
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_encoder.cpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.cpp	2013-06-19 21:04:46.030666393 +0200
+@@ -355,6 +355,64 @@
+     0
+   };
+ 
++  void GenEncoder::READ_FLOAT64(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
++    int w = curr.execWidth;
++    GenRegister r = GenRegister::retype(GenRegister::suboffset(src, w*2), GEN_TYPE_UD);
++    GenRegister hdr = GenRegister::h2(r);
++    GenRegister imm4 = GenRegister::immud(4);
++    push();
++      curr.execWidth = 8;
++      MOV(hdr,                            GenRegister::ud8grf(src.nr, 0));
++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
++      if (w == 16) {
++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(src.nr, 4));
++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
++      }
++    pop();
++    UNTYPED_READ(dst, hdr, bti, 1);
++    push();
++      curr.execWidth = 8;
++      MOV(hdr, w == 16 ? GenRegister::ud8grf(src.nr+1, 0) : GenRegister::retype(GenRegister::offset(src, 0, 16), GEN_TYPE_UD));
++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
++      if (w == 16) {
++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(src.nr + 1, 4));
++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
++      }
++    pop();
++    UNTYPED_READ(GenRegister::offset(dst, w / 8), hdr, bti, 1);
++  }
++
++  void GenEncoder::WRITE_FLOAT64(GenRegister msg, uint32_t bti, uint32_t elemNum) {
++    int w = curr.execWidth;
++    GenRegister r = GenRegister::retype(GenRegister::suboffset(msg, w*3), GEN_TYPE_UD);
++    r.type = GEN_TYPE_UD;
++    GenRegister hdr = GenRegister::h2(r);
++    GenRegister data = GenRegister::offset(r, w / 8);
++    GenRegister imm4 = GenRegister::immud(4);
++    push();
++      curr.execWidth = 8;
++      MOV(hdr,                            GenRegister::ud8grf(msg.nr, 0));
++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
++      if (w == 16) {
++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(msg.nr, 4));
++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
++      }
++    pop();
++    MOV(data, GenRegister::ud16grf(msg.nr + w / 8, 0));
++    UNTYPED_WRITE(hdr, bti, 1);
++    push();
++      curr.execWidth = 8;
++      MOV(hdr, w == 16 ? GenRegister::ud8grf(msg.nr+1, 0) : GenRegister::retype(GenRegister::offset(msg, 0, 16), GEN_TYPE_UD));
++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
++      if (w == 16) {
++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(msg.nr+1, 4));
++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
++      }
++    pop();
++    MOV(data, GenRegister::ud16grf(msg.nr + w / 4, 0));
++    UNTYPED_WRITE(hdr, bti, 1);
++  }
++
+   void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     assert(elemNum >= 1 || elemNum <= 4);
+@@ -467,7 +525,19 @@
+   }
+ 
+   INLINE void alu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src) {
+-     if (needToSplitAlu1(p, dst, src) == false) {
++     if (dst.isdf() && src.isdf()) {
++       int w = p->curr.execWidth;
++       p->push();
++       GenInstruction *insn = p->next(opcode);
++       p->setHeader(insn);
++       p->setDst(insn, dst);
++       p->setSrc0(insn, src);
++       insn = p->next(opcode);
++       p->setHeader(insn);
++       p->setDst(insn, GenRegister::suboffset(dst, w / 2));
++       p->setSrc0(insn, GenRegister::suboffset(src, w / 2));
++       p->pop();
++     } else if (needToSplitAlu1(p, dst, src) == false) {
+        GenInstruction *insn = p->next(opcode);
+        p->setHeader(insn);
+        p->setDst(insn, dst);
+@@ -499,7 +569,21 @@
+                    GenRegister src0,
+                    GenRegister src1)
+   {
+-    if (needToSplitAlu2(p, dst, src0, src1) == false) {
++    if (dst.isdf() && src0.isdf() && src1.isdf()) {
++       int w = p->curr.execWidth;
++       p->push();
++       GenInstruction *insn = p->next(opcode);
++       p->setHeader(insn);
++       p->setDst(insn, dst);
++       p->setSrc0(insn, src0);
++       p->setSrc1(insn, src1);
++       insn = p->next(opcode);
++       p->setHeader(insn);
++       p->setDst(insn, GenRegister::suboffset(dst, w / 2));
++       p->setSrc0(insn, GenRegister::suboffset(src0, w / 2));
++       p->setSrc1(insn, GenRegister::suboffset(src1, w / 2));
++       p->pop();
++    } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
+        GenInstruction *insn = p->next(opcode);
+        p->setHeader(insn);
+        p->setDst(insn, dst);
+@@ -620,6 +704,58 @@
+     alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
+   }
+ 
++  void GenEncoder::LOAD_DF_IMM(GenRegister dest, GenRegister src0) {
++    union { double d; unsigned u[2]; } u;
++    u.d = src0.value.df;
++    GenRegister r = GenRegister::retype(dest, GEN_TYPE_UD);
++    push();
++    curr.execWidth = 1;
++    MOV(r, GenRegister::immud(u.u[1]));
++    MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[0]));
++    pop();
++    r.type = GEN_TYPE_DF;
++    r.vstride = GEN_VERTICAL_STRIDE_0;
++    r.width = GEN_WIDTH_1;
++    r.hstride = GEN_HORIZONTAL_STRIDE_0;
++    push();
++    MOV(dest, r);
++    pop();
++  }
++
++  void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
++    int w = curr.execWidth;
++    if (src0.isdf()) {
++      push();
++      curr.execWidth = 16;
++      MOV(dest, src0);
++      if (w == 16)
++        MOV(GenRegister::QnPhysical(dest, w / 4), GenRegister::QnPhysical(src0, w / 4));
++      pop();
++    } else {
++      GenRegister r0 = GenRegister::h2(r);
++      push();
++      curr.execWidth = 8;
++      MOV(r0, src0);
++      MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 4));
++      pop();
++      push();
++      curr.execWidth = 16;
++      MOV(dest, r);
++      pop();
++      if (w == 16) {
++        push();
++        curr.execWidth = 8;
++        MOV(r0, GenRegister::suboffset(src0, 8));
++        MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 12));
++        pop();
++        push();
++        curr.execWidth = 16;
++        MOV(GenRegister::suboffset(dest, 8), r);
++        pop();
++      }
++    }
++  }
++
+   ALU1(MOV)
+   ALU1(RNDZ)
+   ALU1(RNDE)
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_encoder.hpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.hpp	2013-06-19 21:04:46.030666393 +0200
+@@ -113,6 +113,8 @@
+     ALU2(LINE)
+     ALU2(PLN)
+     ALU3(MAD)
++    ALU1(LOAD_DF_IMM);
++    ALU2(MOV_DF);
+ #undef ALU1
+ #undef ALU2
+ #undef ALU3
+@@ -132,6 +134,10 @@
+     void NOP(void);
+     /*! Wait instruction (used for the barrier) */
+     void WAIT(void);
++    /*! Read 64-bits float arrays */
++    void READ_FLOAT64(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
++    /*! Write 64-bits float arrays */
++    void WRITE_FLOAT64(GenRegister src, uint32_t bti, uint32_t elemNum);
+     /*! Untyped read (upto 4 channels) */
+     void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+     /*! Untyped write (upto 4 channels) */
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_gen7_schedule_info.hxx	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_gen7_schedule_info.hxx	2013-06-19 21:04:46.030666393 +0200
+@@ -12,6 +12,8 @@
+ DECL_GEN7_SCHEDULE(Math,            20,        4,        2)
+ DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
+ DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
++DECL_GEN7_SCHEDULE(ReadFloat64,     80,        1,        1)
++DECL_GEN7_SCHEDULE(WriteFloat64,    80,        1,        1)
+ DECL_GEN7_SCHEDULE(UntypedRead,     80,        1,        1)
+ DECL_GEN7_SCHEDULE(UntypedWrite,    80,        1,        1)
+ DECL_GEN7_SCHEDULE(ByteGather,      80,        1,        1)
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_selection.cpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.cpp	2013-06-19 21:04:46.034666393 +0200
+@@ -129,6 +129,7 @@
+       case TYPE_S32: return GEN_TYPE_D;
+       case TYPE_U32: return GEN_TYPE_UD;
+       case TYPE_FLOAT: return GEN_TYPE_F;
++      case TYPE_DOUBLE: return GEN_TYPE_DF;
+       default: NOT_SUPPORTED; return GEN_TYPE_F;
+     }
+   }
+@@ -166,11 +167,13 @@
+ 
+   bool SelectionInstruction::isRead(void) const {
+     return this->opcode == SEL_OP_UNTYPED_READ ||
++           this->opcode == SEL_OP_READ_FLOAT64 ||
+            this->opcode == SEL_OP_BYTE_GATHER;
+   }
+ 
+   bool SelectionInstruction::isWrite(void) const {
+     return this->opcode == SEL_OP_UNTYPED_WRITE ||
++           this->opcode == SEL_OP_WRITE_FLOAT64 ||
+            this->opcode == SEL_OP_BYTE_SCATTER;
+   }
+ 
+@@ -406,6 +409,8 @@
+ #define ALU3(OP) \
+   INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
+     ALU1(MOV)
++    ALU2(MOV_DF)
++    ALU1(LOAD_DF_IMM)
+     ALU1(RNDZ)
+     ALU1(RNDE)
+     ALU2(SEL)
+@@ -449,6 +454,10 @@
+     void NOP(void);
+     /*! Wait instruction (used for the barrier) */
+     void WAIT(void);
++    /*! Read 64 bits float array */
++    void READ_FLOAT64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
++    /*! Write 64 bits float array */
++    void WRITE_FLOAT64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti);
+     /*! Untyped read (up to 4 elements) */
+     void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+     /*! Untyped write (up to 4 elements) */
+@@ -610,20 +619,23 @@
+ 
+   ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
+     SelectionBlock *block = insn->parent;
+-    const uint32_t simdWidth = ctx.getSimdWidth();
++    uint32_t simdWidth = ctx.getSimdWidth();
+     ir::Register tmp;
++    ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family;
++    int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F;
++    GenRegister gr;
+ 
+     // This will append the temporary register in the instruction block
+     this->block = block;
+-    tmp = this->reg(ir::FAMILY_DWORD);
++    tmp = this->reg(f);
+ 
+     // Generate the MOV instruction and replace the register in the instruction
+     SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
+-    mov->dst(0) = GenRegister::retype(insn->dst(regID), GEN_TYPE_F);
++    mov->dst(0) = GenRegister::retype(insn->dst(regID), genType);
+     mov->state = GenInstructionState(simdWidth);
+-    insn->dst(regID) = mov->src(0) = GenRegister::fxgrf(simdWidth, tmp);
++    gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp);
++    insn->dst(regID) = mov->src(0) = gr;
+     insn->append(*mov);
+-
+     return tmp;
+   }
+ 
+@@ -657,6 +669,7 @@
+       case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break;
+       case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break;
+       case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break;
++      case FAMILY_QWORD: SEL_REG(df16grf, df8grf, df1grf); break;
+       default: NOT_SUPPORTED;
+     }
+     GBE_ASSERT(false);
+@@ -719,6 +732,33 @@
+   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
+   void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
+ 
++  void Selection::Opaque::READ_FLOAT64(Reg addr,
++                                       const GenRegister *dst,
++                                       uint32_t elemNum,
++                                       uint32_t bti)
++  {
++    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ_FLOAT64, elemNum, 1);
++    SelectionVector *srcVector = this->appendVector();
++    SelectionVector *dstVector = this->appendVector();
++
++    // Regular instruction to encode
++    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
++      insn->dst(elemID) = dst[elemID];
++    insn->src(0) = addr;
++    insn->extra.function = bti;
++    insn->extra.elem = elemNum;
++
++    // Sends require contiguous allocation
++    dstVector->regNum = elemNum;
++    dstVector->isSrc = 0;
++    dstVector->reg = &insn->dst(0);
++
++    // Source cannot be scalar (yet)
++    srcVector->regNum = 1;
++    srcVector->isSrc = 1;
++    srcVector->reg = &insn->src(0);
++  }
++
+   void Selection::Opaque::UNTYPED_READ(Reg addr,
+                                        const GenRegister *dst,
+                                        uint32_t elemNum,
+@@ -746,6 +786,27 @@
+     srcVector->reg = &insn->src(0);
+   }
+ 
++  void Selection::Opaque::WRITE_FLOAT64(Reg addr,
++                                        const GenRegister *src,
++                                        uint32_t elemNum,
++                                        uint32_t bti)
++  {
++    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE_FLOAT64, 0, elemNum+1);
++    SelectionVector *vector = this->appendVector();
++
++    // Regular instruction to encode
++    insn->src(0) = addr;
++    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
++      insn->src(elemID+1) = src[elemID];
++    insn->extra.function = bti;
++    insn->extra.elem = elemNum;
++
++    // Sends require contiguous allocation for the sources
++    vector->regNum = elemNum+1;
++    vector->reg = &insn->src(0);
++    vector->isSrc = 1;
++  }
++
+   void Selection::Opaque::UNTYPED_WRITE(Reg addr,
+                                         const GenRegister *src,
+                                         uint32_t elemNum,
+@@ -1092,6 +1153,15 @@
+   // Implementation of all patterns
+   ///////////////////////////////////////////////////////////////////////////
+ 
++  bool canGetRegisterFromImmediate(const ir::Instruction &insn) {
++    using namespace ir;
++    const auto &childInsn = cast<LoadImmInstruction>(insn);
++    const auto &imm = childInsn.getImmediate();
++    if(imm.type != TYPE_DOUBLE)
++      return true;
++    return false;
++  }
++
+   GenRegister getRegisterFromImmediate(ir::Immediate imm)
+   {
+     using namespace ir;
+@@ -1103,6 +1173,7 @@
+       case TYPE_S16: return  GenRegister::immw(imm.data.s16);
+       case TYPE_U8:  return GenRegister::immuw(imm.data.u8);
+       case TYPE_S8:  return GenRegister::immw(imm.data.s8);
++      case TYPE_DOUBLE: return GenRegister::immdf(imm.data.f64);
+       default: NOT_SUPPORTED; return GenRegister::immuw(0);
+     }
+   }
+@@ -1146,7 +1217,13 @@
+       const GenRegister src = sel.selReg(insn.getSrc(0));
+       switch (opcode) {
+         case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
+-        case ir::OP_MOV: sel.MOV(dst, src); break;
++        case ir::OP_MOV:
++          if (dst.isdf()) {
++            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
++            sel.MOV_DF(dst, src, sel.selReg(r));
++          } else
++            sel.MOV(dst, src);
++          break;
+         case ir::OP_RNDD: sel.RNDD(dst, src); break;
+         case ir::OP_RNDE: sel.RNDE(dst, src); break;
+         case ir::OP_RNDU: sel.RNDU(dst, src); break;
+@@ -1225,14 +1302,14 @@
+       SelectionDAG *dag1 = dag.child[1];
+ 
+       // Right source can always be an immediate
+-      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
++      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+         const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+         src0 = sel.selReg(insn.getSrc(0), type);
+         src1 = getRegisterFromImmediate(childInsn.getImmediate());
+         if (dag0) dag0->isRoot = 1;
+       }
+       // Left source cannot be immediate but it is OK if we can commute
+-      else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI) {
++      else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag0->insn)) {
+         const auto &childInsn = cast<LoadImmInstruction>(dag0->insn);
+         src0 = sel.selReg(insn.getSrc(1), type);
+         src1 = getRegisterFromImmediate(childInsn.getImmediate());
+@@ -1268,7 +1345,7 @@
+         case OP_SHR: sel.SHR(dst, src0, src1); break;
+         case OP_ASR: sel.ASR(dst, src0, src1); break;
+         case OP_MUL:
+-          if (type == TYPE_FLOAT)
++          if (type == TYPE_FLOAT || type == TYPE_DOUBLE)
+             sel.MUL(dst, src0, src1);
+           else if (type == TYPE_U32 || type == TYPE_S32) {
+             sel.pop();
+@@ -1599,6 +1676,7 @@
+         case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break;
+         case TYPE_U8:  sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break;
+         case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
++        case TYPE_DOUBLE: sel.LOAD_DF_IMM(dst, GenRegister::immdf(imm.data.f64)); break;
+         default: NOT_SUPPORTED;
+       }
+       sel.pop();
+@@ -1650,6 +1728,8 @@
+   INLINE uint32_t getByteScatterGatherSize(ir::Type type) {
+     using namespace ir;
+     switch (type) {
++      case TYPE_DOUBLE:
++        return GEN_BYTE_SCATTER_QWORD;
+       case TYPE_FLOAT:
+       case TYPE_U32:
+       case TYPE_S32:
+@@ -1681,6 +1761,22 @@
+       sel.UNTYPED_READ(addr, dst.data(), valueNum, bti);
+     }
+ 
++    void emitReadFloat64(Selection::Opaque &sel,
++                         const ir::LoadInstruction &insn,
++                         GenRegister addr,
++                         uint32_t bti) const
++    {
++      using namespace ir;
++      const uint32_t valueNum = insn.getValueNum();
++      vector<GenRegister> dst(valueNum);
++      for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
++        dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
++      dst.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
++      if (sel.ctx.getSimdWidth() == 16)
++        dst.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
++      sel.READ_FLOAT64(addr, dst.data(), dst.size(), bti);
++    }
++
+     void emitByteGather(Selection::Opaque &sel,
+                         const ir::LoadInstruction &insn,
+                         const uint32_t elemSize,
+@@ -1732,6 +1828,8 @@
+       const uint32_t elemSize = getByteScatterGatherSize(type);
+       if (insn.getAddressSpace() == MEM_CONSTANT)
+         this->emitIndirectMove(sel, insn, address);
++      else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
++        this->emitReadFloat64(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
+       else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+         this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
+       else {
+@@ -1762,6 +1860,25 @@
+       sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti);
+     }
+ 
++    void emitWriteFloat64(Selection::Opaque &sel,
++                          const ir::StoreInstruction &insn,
++                          uint32_t bti) const
++    {
++      using namespace ir;
++      const uint32_t valueNum = insn.getValueNum();
++      const uint32_t addrID = ir::StoreInstruction::addressIndex;
++      GenRegister addr;
++      vector<GenRegister> value(valueNum);
++
++      addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);
++      for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
++        value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F);
++      value.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
++      if (sel.ctx.getSimdWidth() == 16)
++        value.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
++      sel.WRITE_FLOAT64(addr, value.data(), value.size(), bti);
++    }
++
+     void emitByteScatter(Selection::Opaque &sel,
+                          const ir::StoreInstruction &insn,
+                          const uint32_t elemSize,
+@@ -1791,7 +1908,9 @@
+       const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
+       const Type type = insn.getValueType();
+       const uint32_t elemSize = getByteScatterGatherSize(type);
+-      if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
++      if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
++        this->emitWriteFloat64(sel, insn, bti);
++      else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+         this->emitUntypedWrite(sel, insn, bti);
+       else {
+         const GenRegister address = sel.selReg(insn.getAddress());
+@@ -1839,7 +1958,7 @@
+       SelectionDAG *dag1 = dag.child[1];
+ 
+       // Right source can always be an immediate
+-      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
++      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+         const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+         src0 = sel.selReg(insn.getSrc(0), type);
+         src1 = getRegisterFromImmediate(childInsn.getImmediate());
+@@ -1873,7 +1992,7 @@
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+ 
+       // We need two instructions to make the conversion
+-      if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) {
++      if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
+         GenRegister unpacked;
+         if (dstFamily == FAMILY_WORD) {
+           const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
+@@ -1886,6 +2005,9 @@
+         }
+         sel.MOV(unpacked, src);
+         sel.MOV(dst, unpacked);
++      } else if (dst.isdf()) {
++        ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
++        sel.MOV_DF(dst, src, sel.selReg(r));
+       } else
+         sel.MOV(dst, src);
+       return true;
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.hxx
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_selection.hxx	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.hxx	2013-06-19 21:04:46.034666393 +0200
+@@ -1,5 +1,7 @@
+ DECL_SELECTION_IR(LABEL, LabelInstruction)
+ DECL_SELECTION_IR(MOV, UnaryInstruction)
++DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
++DECL_SELECTION_IR(LOAD_DF_IMM, UnaryInstruction)
+ DECL_SELECTION_IR(NOT, UnaryInstruction)
+ DECL_SELECTION_IR(LZD, UnaryInstruction)
+ DECL_SELECTION_IR(RNDZ, UnaryInstruction)
+@@ -32,6 +34,8 @@
+ DECL_SELECTION_IR(FENCE, FenceInstruction)
+ DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
+ DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
++DECL_SELECTION_IR(READ_FLOAT64, ReadFloat64Instruction)
++DECL_SELECTION_IR(WRITE_FLOAT64, WriteFloat64Instruction)
+ DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
+ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
+ DECL_SELECTION_IR(SAMPLE, SampleInstruction)
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_reg_allocation.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_reg_allocation.cpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_reg_allocation.cpp	2013-06-19 21:04:46.034666393 +0200
+@@ -458,7 +458,6 @@
+   }
+ 
+   bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
+-
+     // Perform the linear scan allocator
+     const uint32_t regNum = ctx.sel->getRegNum();
+     for (uint32_t startID = 0; startID < regNum; ++startID) {
+Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_register.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_register.hpp	2013-06-19 21:03:25.726669973 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_register.hpp	2013-06-19 21:04:46.034666393 +0200
+@@ -69,11 +69,12 @@
+   /*! Type size in bytes for each Gen type */
+   INLINE int typeSize(uint32_t type) {
+     switch(type) {
++      case GEN_TYPE_DF:
++        return 8;
+       case GEN_TYPE_UD:
+       case GEN_TYPE_D:
+       case GEN_TYPE_F:
+         return 4;
+-      case GEN_TYPE_HF:
+       case GEN_TYPE_UW:
+       case GEN_TYPE_W:
+         return 2;
+@@ -192,6 +193,7 @@
+ 
+     /*! For immediates or virtual register */
+     union {
++      double df;
+       float f;
+       int32_t d;
+       uint32_t ud;
+@@ -211,6 +213,31 @@
+     uint32_t quarter:1;      //!< To choose which part we want (Q1 / Q2)
+     uint32_t address_mode:1; //!< direct or indirect
+ 
++    static INLINE GenRegister offset(GenRegister reg, int nr, int subnr = 0) {
++      GenRegister r = reg;
++      r.nr += nr;
++      r.subnr += subnr;
++      return r;
++    }
++
++    INLINE bool isimmdf(void) const {
++      if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
++        return true;
++      return false;
++    }
++
++    INLINE bool isdf(void) const {
++      if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
++        return true;
++      return false;
++    }
++
++    static INLINE GenRegister h2(GenRegister reg) {
++      GenRegister r = reg;
++      r.hstride = GEN_HORIZONTAL_STRIDE_2;
++      return r;
++    }
++
+     static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
+       GBE_ASSERT(reg.physical == 0);
+       if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+@@ -293,6 +320,18 @@
+       return reg;
+     }
+ 
++    static INLINE GenRegister df16(uint32_t file, ir::Register reg) {
++      return retype(vec16(file, reg), GEN_TYPE_DF);
++    }
++
++    static INLINE GenRegister df8(uint32_t file, ir::Register reg) {
++      return retype(vec8(file, reg), GEN_TYPE_DF);
++    }
++
++    static INLINE GenRegister df1(uint32_t file, ir::Register reg) {
++      return retype(vec1(file, reg), GEN_TYPE_DF);
++    }
++
+     static INLINE GenRegister ud16(uint32_t file, ir::Register reg) {
+       return retype(vec16(file, reg), GEN_TYPE_UD);
+     }
+@@ -371,6 +410,12 @@
+                          GEN_HORIZONTAL_STRIDE_0);
+     }
+ 
++    static INLINE GenRegister immdf(double df) {
++      GenRegister immediate = imm(GEN_TYPE_DF);
++      immediate.value.df = df;
++      return immediate;
++    }
++
+     static INLINE GenRegister immf(float f) {
+       GenRegister immediate = imm(GEN_TYPE_F);
+       immediate.value.f = f;
+@@ -448,6 +493,18 @@
+       return vec16(GEN_GENERAL_REGISTER_FILE, reg);
+     }
+ 
++    static INLINE GenRegister df1grf(ir::Register reg) {
++      return df1(GEN_GENERAL_REGISTER_FILE, reg);
++    }
++
++    static INLINE GenRegister df8grf(ir::Register reg) {
++      return df8(GEN_GENERAL_REGISTER_FILE, reg);
++    }
++
++    static INLINE GenRegister df16grf(ir::Register reg) {
++      return df16(GEN_GENERAL_REGISTER_FILE, reg);
++    }
++
+     static INLINE GenRegister ud16grf(ir::Register reg) {
+       return ud16(GEN_GENERAL_REGISTER_FILE, reg);
+     }
+@@ -609,10 +666,26 @@
+     }
+ 
+     static INLINE GenRegister suboffset(GenRegister reg, uint32_t delta) {
+-      reg.subnr += delta * typeSize(reg.type);
++      if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
++        reg.subnr += delta * typeSize(reg.type);
++        reg.nr += reg.subnr / 32;
++        reg.subnr %= 32;
++      }
+       return reg;
+     }
+ 
++    static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) {
++      return retype(vec16(file, nr, subnr), GEN_TYPE_DF);
++    }
++
++    static INLINE GenRegister df8(uint32_t file, uint32_t nr, uint32_t subnr) {
++      return retype(vec8(file, nr, subnr), GEN_TYPE_DF);
++    }
++
++    static INLINE GenRegister df1(uint32_t file, uint32_t nr, uint32_t subnr) {
++      return retype(vec1(file, nr, subnr), GEN_TYPE_DF);
++    }
++
+     static INLINE GenRegister ud16(uint32_t file, uint32_t nr, uint32_t subnr) {
+       return retype(vec16(file, nr, subnr), GEN_TYPE_UD);
+     }
+@@ -685,6 +758,18 @@
+       return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+     }
+ 
++    static INLINE GenRegister df16grf(uint32_t nr, uint32_t subnr) {
++      return df16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
++    }
++
++    static INLINE GenRegister df8grf(uint32_t nr, uint32_t subnr) {
++      return df8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
++    }
++
++    static INLINE GenRegister df1grf(uint32_t nr, uint32_t subnr) {
++      return df1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
++    }
++
+     static INLINE GenRegister ud16grf(uint32_t nr, uint32_t subnr) {
+       return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+     }
+@@ -790,6 +875,7 @@
+         return SIMD1(values...); \
+       } \
+     }
++    DECL_REG_ENCODER(dfxgrf, df16grf, df8grf, df1grf);
+     DECL_REG_ENCODER(fxgrf, f16grf, f8grf, f1grf);
+     DECL_REG_ENCODER(uwxgrf, uw16grf, uw8grf, uw1grf);
+     DECL_REG_ENCODER(udxgrf, ud16grf, ud8grf, ud1grf);
diff --git a/debian/patches/0007-test-case-for-64-bit-float.patch b/debian/patches/0007-test-case-for-64-bit-float.patch
new file mode 100644
index 0000000..766b885
--- /dev/null
+++ b/debian/patches/0007-test-case-for-64-bit-float.patch
@@ -0,0 +1,159 @@
+From 54eac0d43d1db154d77070bcbf226880e014c30f Mon Sep 17 00:00:00 2001
+From: Homer Hsing <homer.xing at intel.com>
+Date: Wed, 19 Jun 2013 12:45:52 +0800
+Subject: [PATCH 07/12] test case for 64-bit float
+To: beignet at lists.freedesktop.org
+
+Signed-off-by: Homer Hsing <homer.xing at intel.com>
+---
+ kernels/compiler_double.cl   |    7 +++++++
+ kernels/compiler_double_2.cl |    7 +++++++
+ utests/CMakeLists.txt        |    2 ++
+ utests/compiler_double.cpp   |   46 +++++++++++++++++++++++++++++++++++++++++
+ utests/compiler_double_2.cpp |   47 ++++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 109 insertions(+)
+ create mode 100644 kernels/compiler_double.cl
+ create mode 100644 kernels/compiler_double_2.cl
+ create mode 100644 utests/compiler_double.cpp
+ create mode 100644 utests/compiler_double_2.cpp
+
+Index: beignet-0.1+git20130619+42967d2/kernels/compiler_double.cl
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/kernels/compiler_double.cl	2013-06-19 21:04:47.534666326 +0200
+@@ -0,0 +1,7 @@
++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
++kernel void compiler_double(global double *src, global double *dst) {
++  int i = get_global_id(0);
++  double d = 1.234567890123456789;
++  dst[i] = d * (src[i] + d);
++}
++
+Index: beignet-0.1+git20130619+42967d2/kernels/compiler_double_2.cl
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/kernels/compiler_double_2.cl	2013-06-19 21:04:47.534666326 +0200
+@@ -0,0 +1,7 @@
++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
++kernel void compiler_double_2(global float *src, global double *dst) {
++  int i = get_global_id(0);
++  float d = 1.234567890123456789f;
++  dst[i] = d * (d + src[i]);
++}
++
+Index: beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/utests/CMakeLists.txt	2013-06-19 21:04:43.270666516 +0200
++++ beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt	2013-06-19 21:04:47.534666326 +0200
+@@ -27,6 +27,8 @@
+   compiler_copy_image.cpp
+   compiler_copy_image_3d.cpp
+   compiler_copy_buffer_row.cpp
++  compiler_double.cpp
++  compiler_double_2.cpp
+   compiler_fabs.cpp
+   compiler_fill_image.cpp
+   compiler_fill_image0.cpp
+Index: beignet-0.1+git20130619+42967d2/utests/compiler_double.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/utests/compiler_double.cpp	2013-06-19 21:04:47.534666326 +0200
+@@ -0,0 +1,46 @@
++#include <cmath>
++#include "utest_helper.hpp"
++
++static void cpu(int global_id, double *src, double *dst) {
++  double f = src[global_id];
++  double d = 1.234567890123456789;
++  dst[global_id] = d * (f + d);
++}
++
++void compiler_double(void)
++{
++  const size_t n = 16;
++  double cpu_dst[n], cpu_src[n];
++
++  // Setup kernel and buffers
++  OCL_CREATE_KERNEL("compiler_double");
++  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
++  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++  globals[0] = n;
++  locals[0] = 16;
++
++  // Run random tests
++  for (uint32_t pass = 0; pass < 1; ++pass) {
++    OCL_MAP_BUFFER(0);
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      cpu_src[i] = ((double*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
++    OCL_UNMAP_BUFFER(0);
++
++    // Run the kernel on GPU
++    OCL_NDRANGE(1);
++
++    // Run on CPU
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      cpu(i, cpu_src, cpu_dst);
++
++    // Compare
++    OCL_MAP_BUFFER(1);
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4);
++    OCL_UNMAP_BUFFER(1);
++  }
++}
++
++MAKE_UTEST_FROM_FUNCTION(compiler_double);
+Index: beignet-0.1+git20130619+42967d2/utests/compiler_double_2.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130619+42967d2/utests/compiler_double_2.cpp	2013-06-19 21:04:47.534666326 +0200
+@@ -0,0 +1,47 @@
++#include <cmath>
++#include "utest_helper.hpp"
++
++static void cpu(int global_id, float *src, double *dst) {
++  float f = src[global_id];
++  float d = 1.234567890123456789;
++  dst[global_id] = d * (d + f);
++}
++
++void compiler_double_2(void)
++{
++  const size_t n = 16;
++  float cpu_src[n];
++  double cpu_dst[n];
++
++  // Setup kernel and buffers
++  OCL_CREATE_KERNEL("compiler_double_2");
++  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
++  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++  globals[0] = n;
++  locals[0] = 16;
++
++  // Run random tests
++  for (uint32_t pass = 0; pass < 1; ++pass) {
++    OCL_MAP_BUFFER(0);
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
++    OCL_UNMAP_BUFFER(0);
++
++    // Run the kernel on GPU
++    OCL_NDRANGE(1);
++
++    // Run on CPU
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      cpu(i, cpu_src, cpu_dst);
++
++    // Compare
++    OCL_MAP_BUFFER(1);
++    for (int32_t i = 0; i < (int32_t) n; ++i)
++      OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4);
++    OCL_UNMAP_BUFFER(1);
++  }
++}
++
++MAKE_UTEST_FROM_FUNCTION(compiler_double_2);
diff --git a/debian/patches/0008-Readd-OpenCL-1.2-definitions-required-for-ICD.patch b/debian/patches/0008-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
new file mode 100644
index 0000000..2cb9ac2
--- /dev/null
+++ b/debian/patches/0008-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
@@ -0,0 +1,95 @@
+From 4d8c1ce21b4ac760c994401e30294b782a9492a9 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Wed, 19 Jun 2013 11:30:36 +0200
+Subject: [PATCH 08/12] Readd OpenCL 1.2 definitions required for ICD
+To: beignet at lists.freedesktop.org
+
+The definition for the ICD dispatch table requires a few additional
+definitions from OpenCL 1.2.
+---
+ include/CL/cl.h          |   15 +++++++++++++++
+ include/CL/cl_platform.h |    2 ++
+ src/cl_mem.h             |   12 ------------
+ 3 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/include/CL/cl.h b/include/CL/cl.h
+index 4355e74..a7f25d1 100644
+--- a/include/CL/cl.h
++++ b/include/CL/cl.h
+@@ -67,6 +67,7 @@ typedef cl_uint             cl_channel_type;
+ typedef cl_bitfield         cl_mem_flags;
+ typedef cl_uint             cl_mem_object_type;
+ typedef cl_uint             cl_mem_info;
++typedef cl_bitfield         cl_mem_migration_flags;
+ typedef cl_uint             cl_image_info;
+ typedef cl_uint             cl_buffer_create_type;
+ typedef cl_uint             cl_addressing_mode;
+@@ -75,8 +76,10 @@ typedef cl_uint             cl_sampler_info;
+ typedef cl_bitfield         cl_map_flags;
+ typedef cl_uint             cl_program_info;
+ typedef cl_uint             cl_program_build_info;
++typedef intptr_t            cl_device_partition_property;
+ typedef cl_int              cl_build_status;
+ typedef cl_uint             cl_kernel_info;
++typedef cl_uint             cl_kernel_arg_info;
+ typedef cl_uint             cl_kernel_work_group_info;
+ typedef cl_uint             cl_event_info;
+ typedef cl_uint             cl_command_type;
+@@ -87,6 +90,18 @@ typedef struct _cl_image_format {
+     cl_channel_type         image_channel_data_type;
+ } cl_image_format;
+ 
++typedef struct _cl_image_desc {
++    cl_mem_object_type      image_type;
++    size_t                  image_width;
++    size_t                  image_height;
++    size_t                  image_depth;
++    size_t                  image_array_size;
++    size_t                  image_row_pitch;
++    size_t                  image_slice_pitch;
++    cl_uint                 num_mip_levels;
++    cl_uint                 num_samples;
++    cl_mem                  buffer;
++} cl_image_desc;
+ 
+ typedef struct _cl_buffer_region {
+     size_t                  origin;
+diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
+index 043b048..9a2f17a 100644
+--- a/include/CL/cl_platform.h
++++ b/include/CL/cl_platform.h
+@@ -58,6 +58,8 @@ extern "C" {
+     #define CL_EXT_SUFFIX__VERSION_1_0
+     #define CL_API_SUFFIX__VERSION_1_1
+     #define CL_EXT_SUFFIX__VERSION_1_1
++    #define CL_API_SUFFIX__VERSION_1_2
++    #define CL_EXT_SUFFIX__VERSION_1_2
+     #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #endif
+ 
+diff --git a/src/cl_mem.h b/src/cl_mem.h
+index 33ad174..66815fe 100644
+--- a/src/cl_mem.h
++++ b/src/cl_mem.h
+@@ -29,18 +29,6 @@
+ #define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
+ #define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
+ #define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
+-typedef struct _cl_image_desc {
+-    cl_mem_object_type      image_type;
+-    size_t                  image_width;
+-    size_t                  image_height;
+-    size_t                  image_depth;
+-    size_t                  image_array_size;
+-    size_t                  image_row_pitch;
+-    size_t                  image_slice_pitch;
+-    cl_uint                 num_mip_levels;
+-    cl_uint                 num_samples;
+-    cl_mem                  buffer;
+-} cl_image_desc;
+ #endif
+ 
+ typedef enum cl_image_tiling {
+-- 
+1.7.10.4
+
diff --git a/debian/patches/0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch b/debian/patches/0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
new file mode 100644
index 0000000..cd81790
--- /dev/null
+++ b/debian/patches/0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
@@ -0,0 +1,33 @@
+From 80907affc6d24f3580dd59f75d4203df491846ae Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Wed, 19 Jun 2013 11:42:40 +0200
+Subject: [PATCH 09/12] Enable cl_khr_fp64 extension for OpenCL stdlib header
+To: beignet at lists.freedesktop.org
+
+This allows the stdlib header to define overloads for doubles.
+---
+ backend/src/ocl_stdlib.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:40.602666635 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:48.974666262 +0200
+@@ -53,6 +53,8 @@
+ #define private __private
+ #endif
+ 
++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
++
+ /////////////////////////////////////////////////////////////////////////////
+ // OpenCL built-in vector data types
+ /////////////////////////////////////////////////////////////////////////////
+@@ -4430,6 +4432,8 @@
+ DECL_IMAGE(uint4, ui)
+ DECL_IMAGE(float4, f)
+ 
++#pragma OPENCL EXTENSION cl_khr_fp64 : disable
++
+ #undef DECL_IMAGE
+ #undef DECL_READ_IMAGE
+ #undef DECL_READ_IMAGE_NOSAMPLER
diff --git a/debian/patches/0010-Define-double-vector-types.patch b/debian/patches/0010-Define-double-vector-types.patch
new file mode 100644
index 0000000..c042fb8
--- /dev/null
+++ b/debian/patches/0010-Define-double-vector-types.patch
@@ -0,0 +1,23 @@
+From e8d0c6882addfc71dbc9cf3f973d86c24f73c133 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Wed, 19 Jun 2013 11:44:22 +0200
+Subject: [PATCH 10/12] Define double vector types
+To: beignet at lists.freedesktop.org
+
+Add the definition for the "doubleN" vector types
+---
+ backend/src/ocl_stdlib.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:48.974666262 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:50.314666202 +0200
+@@ -72,6 +72,7 @@
+ DEF(long);
+ DEF(ulong);
+ DEF(float);
++DEF(double);
+ #undef DEF
+ /////////////////////////////////////////////////////////////////////////////
+ // OpenCL other built-in data types
diff --git a/debian/patches/0011-Enable-generation-of-convert_-and-as_-functions-for-.patch b/debian/patches/0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
new file mode 100644
index 0000000..26c641b
--- /dev/null
+++ b/debian/patches/0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
@@ -0,0 +1,1430 @@
+From 1b5cfb96810aec74acd09bd01530b59de5155b28 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Wed, 19 Jun 2013 11:45:15 +0200
+Subject: [PATCH 11/12] Enable generation of convert_ and as_ functions for
+ double
+To: beignet at lists.freedesktop.org
+
+---
+ backend/src/genconfig.sh |    2 +-
+ backend/src/ocl_stdlib.h |  774 ++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 775 insertions(+), 1 deletion(-)
+
+Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/genconfig.sh	2013-06-19 21:04:40.598666635 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:51.674666141 +0200
+@@ -2,7 +2,7 @@
+ # This is to be sourced by the generation scripts
+ 
+ # Supported base types and their lengths
+-TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
++TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 double:8 float:4"
+ 
+ # Supported vector lengths
+ VECTOR_LENGTHS="1 2 3 4 8 16"
+Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:50.314666202 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:51.678666141 +0200
+@@ -497,6 +497,7 @@
+   ushort4 _ushort4;
+   char8 _char8;
+   uchar8 _uchar8;
++  double _double;
+   float2 _float2;
+ };
+ 
+@@ -542,6 +543,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(long v) {
+   union _type_cast_8_b u;
+   u._long = v;
+@@ -590,6 +597,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(ulong v) {
+   union _type_cast_8_b u;
+   u._ulong = v;
+@@ -638,6 +651,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(int2 v) {
+   union _type_cast_8_b u;
+   u._int2 = v;
+@@ -686,6 +705,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(uint2 v) {
+   union _type_cast_8_b u;
+   u._uint2 = v;
+@@ -734,6 +759,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(short4 v) {
+   union _type_cast_8_b u;
+   u._short4 = v;
+@@ -782,6 +813,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
+   union _type_cast_8_b u;
+   u._ushort4 = v;
+@@ -830,6 +867,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(char8 v) {
+   union _type_cast_8_b u;
+   u._char8 = v;
+@@ -878,12 +921,72 @@
+   return u._char8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._double;
++}
++
+ INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
+   union _type_cast_8_b u;
+   u._uchar8 = v;
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._ulong;
++}
++
++INLINE OVERLOADABLE int2 as_int2(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(double v) {
++  union _type_cast_8_b u;
++  u._double = v;
++  return u._float2;
++}
++
+ INLINE OVERLOADABLE long as_long(float2 v) {
+   union _type_cast_8_b u;
+   u._float2 = v;
+@@ -932,6 +1035,12 @@
+   return u._uchar8;
+ }
+ 
++INLINE OVERLOADABLE double as_double(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._double;
++}
++
+ union _type_cast_12_b {
+   int3 _int3;
+   uint3 _uint3;
+@@ -983,6 +1092,7 @@
+   ushort8 _ushort8;
+   char16 _char16;
+   uchar16 _uchar16;
++  double2 _double2;
+   float4 _float4;
+ };
+ 
+@@ -1028,6 +1138,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(long2 v) {
+   union _type_cast_16_b u;
+   u._long2 = v;
+@@ -1076,6 +1192,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(ulong2 v) {
+   union _type_cast_16_b u;
+   u._ulong2 = v;
+@@ -1124,6 +1246,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(int4 v) {
+   union _type_cast_16_b u;
+   u._int4 = v;
+@@ -1172,6 +1300,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(uint4 v) {
+   union _type_cast_16_b u;
+   u._uint4 = v;
+@@ -1220,6 +1354,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(short8 v) {
+   union _type_cast_16_b u;
+   u._short8 = v;
+@@ -1268,6 +1408,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
+   union _type_cast_16_b u;
+   u._ushort8 = v;
+@@ -1316,6 +1462,12 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(char16 v) {
+   union _type_cast_16_b u;
+   u._char16 = v;
+@@ -1364,12 +1516,72 @@
+   return u._char16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._double2;
++}
++
+ INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
+   union _type_cast_16_b u;
+   u._uchar16 = v;
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._ulong2;
++}
++
++INLINE OVERLOADABLE int4 as_int4(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(double2 v) {
++  union _type_cast_16_b u;
++  u._double2 = v;
++  return u._float4;
++}
++
+ INLINE OVERLOADABLE long2 as_long2(float4 v) {
+   union _type_cast_16_b u;
+   u._float4 = v;
+@@ -1418,9 +1630,16 @@
+   return u._uchar16;
+ }
+ 
++INLINE OVERLOADABLE double2 as_double2(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._double2;
++}
++
+ union _type_cast_24_b {
+   long3 _long3;
+   ulong3 _ulong3;
++  double3 _double3;
+ };
+ 
+ INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) {
+@@ -1429,12 +1648,36 @@
+   return u._ulong3;
+ }
+ 
++INLINE OVERLOADABLE double3 as_double3(long3 v) {
++  union _type_cast_24_b u;
++  u._long3 = v;
++  return u._double3;
++}
++
+ INLINE OVERLOADABLE long3 as_long3(ulong3 v) {
+   union _type_cast_24_b u;
+   u._ulong3 = v;
+   return u._long3;
+ }
+ 
++INLINE OVERLOADABLE double3 as_double3(ulong3 v) {
++  union _type_cast_24_b u;
++  u._ulong3 = v;
++  return u._double3;
++}
++
++INLINE OVERLOADABLE long3 as_long3(double3 v) {
++  union _type_cast_24_b u;
++  u._double3 = v;
++  return u._long3;
++}
++
++INLINE OVERLOADABLE ulong3 as_ulong3(double3 v) {
++  union _type_cast_24_b u;
++  u._double3 = v;
++  return u._ulong3;
++}
++
+ union _type_cast_32_b {
+   long4 _long4;
+   ulong4 _ulong4;
+@@ -1442,6 +1685,7 @@
+   uint8 _uint8;
+   short16 _short16;
+   ushort16 _ushort16;
++  double4 _double4;
+   float8 _float8;
+ };
+ 
+@@ -1475,6 +1719,12 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(long4 v) {
++  union _type_cast_32_b u;
++  u._long4 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(long4 v) {
+   union _type_cast_32_b u;
+   u._long4 = v;
+@@ -1511,6 +1761,12 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(ulong4 v) {
+   union _type_cast_32_b u;
+   u._ulong4 = v;
+@@ -1547,6 +1803,12 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(int8 v) {
+   union _type_cast_32_b u;
+   u._int8 = v;
+@@ -1583,6 +1845,12 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(uint8 v) {
+   union _type_cast_32_b u;
+   u._uint8 = v;
+@@ -1619,6 +1887,12 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(short16 v) {
+   union _type_cast_32_b u;
+   u._short16 = v;
+@@ -1655,12 +1929,60 @@
+   return u._short16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._double4;
++}
++
+ INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
+   union _type_cast_32_b u;
+   u._ushort16 = v;
+   return u._float8;
+ }
+ 
++INLINE OVERLOADABLE long4 as_long4(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._ulong4;
++}
++
++INLINE OVERLOADABLE int8 as_int8(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(double4 v) {
++  union _type_cast_32_b u;
++  u._double4 = v;
++  return u._float8;
++}
++
+ INLINE OVERLOADABLE long4 as_long4(float8 v) {
+   union _type_cast_32_b u;
+   u._float8 = v;
+@@ -1697,11 +2019,18 @@
+   return u._ushort16;
+ }
+ 
++INLINE OVERLOADABLE double4 as_double4(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._double4;
++}
++
+ union _type_cast_64_b {
+   long8 _long8;
+   ulong8 _ulong8;
+   int16 _int16;
+   uint16 _uint16;
++  double8 _double8;
+   float16 _float16;
+ };
+ 
+@@ -1723,6 +2052,12 @@
+   return u._uint16;
+ }
+ 
++INLINE OVERLOADABLE double8 as_double8(long8 v) {
++  union _type_cast_64_b u;
++  u._long8 = v;
++  return u._double8;
++}
++
+ INLINE OVERLOADABLE float16 as_float16(long8 v) {
+   union _type_cast_64_b u;
+   u._long8 = v;
+@@ -1747,6 +2082,12 @@
+   return u._uint16;
+ }
+ 
++INLINE OVERLOADABLE double8 as_double8(ulong8 v) {
++  union _type_cast_64_b u;
++  u._ulong8 = v;
++  return u._double8;
++}
++
+ INLINE OVERLOADABLE float16 as_float16(ulong8 v) {
+   union _type_cast_64_b u;
+   u._ulong8 = v;
+@@ -1771,6 +2112,12 @@
+   return u._uint16;
+ }
+ 
++INLINE OVERLOADABLE double8 as_double8(int16 v) {
++  union _type_cast_64_b u;
++  u._int16 = v;
++  return u._double8;
++}
++
+ INLINE OVERLOADABLE float16 as_float16(int16 v) {
+   union _type_cast_64_b u;
+   u._int16 = v;
+@@ -1795,12 +2142,48 @@
+   return u._int16;
+ }
+ 
++INLINE OVERLOADABLE double8 as_double8(uint16 v) {
++  union _type_cast_64_b u;
++  u._uint16 = v;
++  return u._double8;
++}
++
+ INLINE OVERLOADABLE float16 as_float16(uint16 v) {
+   union _type_cast_64_b u;
+   u._uint16 = v;
+   return u._float16;
+ }
+ 
++INLINE OVERLOADABLE long8 as_long8(double8 v) {
++  union _type_cast_64_b u;
++  u._double8 = v;
++  return u._long8;
++}
++
++INLINE OVERLOADABLE ulong8 as_ulong8(double8 v) {
++  union _type_cast_64_b u;
++  u._double8 = v;
++  return u._ulong8;
++}
++
++INLINE OVERLOADABLE int16 as_int16(double8 v) {
++  union _type_cast_64_b u;
++  u._double8 = v;
++  return u._int16;
++}
++
++INLINE OVERLOADABLE uint16 as_uint16(double8 v) {
++  union _type_cast_64_b u;
++  u._double8 = v;
++  return u._uint16;
++}
++
++INLINE OVERLOADABLE float16 as_float16(double8 v) {
++  union _type_cast_64_b u;
++  u._double8 = v;
++  return u._float16;
++}
++
+ INLINE OVERLOADABLE long8 as_long8(float16 v) {
+   union _type_cast_64_b u;
+   u._float16 = v;
+@@ -1825,9 +2208,16 @@
+   return u._uint16;
+ }
+ 
++INLINE OVERLOADABLE double8 as_double8(float16 v) {
++  union _type_cast_64_b u;
++  u._float16 = v;
++  return u._double8;
++}
++
+ union _type_cast_128_b {
+   long16 _long16;
+   ulong16 _ulong16;
++  double16 _double16;
+ };
+ 
+ INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) {
+@@ -1836,12 +2226,36 @@
+   return u._ulong16;
+ }
+ 
++INLINE OVERLOADABLE double16 as_double16(long16 v) {
++  union _type_cast_128_b u;
++  u._long16 = v;
++  return u._double16;
++}
++
+ INLINE OVERLOADABLE long16 as_long16(ulong16 v) {
+   union _type_cast_128_b u;
+   u._ulong16 = v;
+   return u._long16;
+ }
+ 
++INLINE OVERLOADABLE double16 as_double16(ulong16 v) {
++  union _type_cast_128_b u;
++  u._ulong16 = v;
++  return u._double16;
++}
++
++INLINE OVERLOADABLE long16 as_long16(double16 v) {
++  union _type_cast_128_b u;
++  u._double16 = v;
++  return u._long16;
++}
++
++INLINE OVERLOADABLE ulong16 as_ulong16(double16 v) {
++  union _type_cast_128_b u;
++  u._double16 = v;
++  return u._ulong16;
++}
++
+ // ##END_AS##
+ 
+ // ##BEGIN_CONVERT##
+@@ -1873,6 +2287,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(long2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(long2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -1905,6 +2323,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(ulong2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(ulong2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -1937,6 +2359,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(int2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(int2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -1969,6 +2395,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(uint2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -2001,6 +2431,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(short2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(short2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -2033,6 +2467,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(ushort2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -2065,6 +2503,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(char2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(char2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+@@ -2097,10 +2539,50 @@
+   return (char2)((char)(v.s0), (char)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(uchar2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(double2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(double2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(double2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(double2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(double2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(double2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(double2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(double2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(double2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
+ INLINE OVERLOADABLE long2 convert_long2(float2 v) {
+   return (long2)((long)(v.s0), (long)(v.s1));
+ }
+@@ -2133,6 +2615,10 @@
+   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE double2 convert_double2(float2 v) {
++  return (double2)((double)(v.s0), (double)(v.s1));
++}
++
+ INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) {
+   return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+ }
+@@ -2161,6 +2647,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(long3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(long3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2193,6 +2683,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(ulong3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(ulong3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2225,6 +2719,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(int3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(int3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2257,6 +2755,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(uint3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2289,6 +2791,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(short3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(short3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2321,6 +2827,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(ushort3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2353,6 +2863,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(char3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(char3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+@@ -2385,10 +2899,50 @@
+   return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(uchar3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(double3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(double3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(double3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(double3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(double3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(double3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(double3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(double3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(double3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
+ INLINE OVERLOADABLE long3 convert_long3(float3 v) {
+   return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+ }
+@@ -2421,6 +2975,10 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE double3 convert_double3(float3 v) {
++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
++}
++
+ INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) {
+   return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+ }
+@@ -2449,6 +3007,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(long4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(long4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2481,6 +3043,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(ulong4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(ulong4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2513,6 +3079,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(int4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(int4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2545,6 +3115,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(uint4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2577,6 +3151,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(short4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(short4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2609,6 +3187,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(ushort4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2641,6 +3223,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(char4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(char4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+@@ -2673,10 +3259,50 @@
+   return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(uchar4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(double4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(double4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(double4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(double4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(double4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(double4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(double4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(double4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(double4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
+ INLINE OVERLOADABLE long4 convert_long4(float4 v) {
+   return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+ }
+@@ -2709,6 +3335,10 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE double4 convert_double4(float4 v) {
++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
++}
++
+ INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) {
+   return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+ }
+@@ -2737,6 +3367,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(long8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(long8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2769,6 +3403,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(ulong8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(ulong8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2801,6 +3439,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(int8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(int8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2833,6 +3475,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(uint8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2865,6 +3511,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(short8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(short8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2897,6 +3547,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(ushort8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2929,6 +3583,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(char8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(char8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+@@ -2961,10 +3619,50 @@
+   return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(uchar8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(double8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(double8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(double8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(double8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(double8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(double8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(double8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(double8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(double8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
+ INLINE OVERLOADABLE long8 convert_long8(float8 v) {
+   return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+ }
+@@ -2997,6 +3695,10 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE double8 convert_double8(float8 v) {
++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
++}
++
+ INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) {
+   return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+ }
+@@ -3025,6 +3727,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(long16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(long16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3057,6 +3763,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(ulong16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(ulong16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3089,6 +3799,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(int16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(int16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3121,6 +3835,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(uint16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3153,6 +3871,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(short16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(short16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3185,6 +3907,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(ushort16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3217,6 +3943,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(char16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(char16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+@@ -3249,10 +3979,50 @@
+   return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(uchar16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(double16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(double16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(double16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(double16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(double16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(double16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(double16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(double16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(double16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
+ INLINE OVERLOADABLE long16 convert_long16(float16 v) {
+   return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+ }
+@@ -3285,6 +4055,10 @@
+   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE double16 convert_double16(float16 v) {
++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
++}
++
+ // ##END_CONVERT##
+ 
+ /////////////////////////////////////////////////////////////////////////////
diff --git a/debian/patches/0012-GBE-Fixed-one-bug-in-scalarize-pass.patch b/debian/patches/0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
new file mode 100644
index 0000000..d0c8f6c
--- /dev/null
+++ b/debian/patches/0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
@@ -0,0 +1,40 @@
+From 4c2f4a53d5c9a9eca0b8d55586fa1d8f070faddf Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Wed, 19 Jun 2013 18:36:30 +0800
+Subject: [PATCH 12/12] GBE: Fixed one bug in scalarize pass
+To: beignet at lists.freedesktop.org
+
+I met segfault at void Scalarize::dce() randomly when I integrate
+a openCL kernel to the chromium's GPU process. After discuss with
+Yang Rong, I found one bug in this function. As it use two loops
+to erase the dead instructions, but it doesn't set the pointer to
+NULL at the first loop when it already erased the instruction. Thus
+at the second loop, when it call (*i)->getParent, the (*i) may
+already be deleted then it may refer a freed region and may cause
+segfault.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/llvm/llvm_scalarize.cpp |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+Index: beignet-0.1+git20130619+42967d2/backend/src/llvm/llvm_scalarize.cpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/llvm/llvm_scalarize.cpp	2013-06-19 21:03:23.570670069 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/llvm/llvm_scalarize.cpp	2013-06-19 21:04:53.078666079 +0200
+@@ -825,11 +825,13 @@
+     //two passes delete for some phinode
+     for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
+       (*i)->dropAllReferences();
+-      if((*i)->use_empty())
++      if((*i)->use_empty()) {
+         (*i)->eraseFromParent();
++        (*i) = NULL;
++      }
+     }
+     for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
+-      if((*i)->getParent())
++      if((*i) && (*i)->getParent())
+         (*i)->eraseFromParent();
+     }
+     deadList.clear();
diff --git a/debian/patches/debug b/debian/patches/debug
index d8e8997..b0f8843 100644
--- a/debian/patches/debug
+++ b/debian/patches/debug
@@ -2,10 +2,10 @@ Description: Enhance debug output
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-05-21
 
-Index: beignet-0.1+git20130521+a7ea35c/src/cl_utils.h
+Index: beignet-0.1+git20130619+42967d2/src/cl_utils.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/src/cl_utils.h	2013-05-21 10:39:13.823946702 +0200
-+++ beignet-0.1+git20130521+a7ea35c/src/cl_utils.h	2013-05-21 10:39:20.751946393 +0200
+--- beignet-0.1+git20130619+42967d2.orig/src/cl_utils.h	2013-06-19 21:04:25.066667328 +0200
++++ beignet-0.1+git20130619+42967d2/src/cl_utils.h	2013-06-19 21:04:28.066667194 +0200
 @@ -80,6 +80,7 @@
  
  #define FATAL(...)                                          \
diff --git a/debian/patches/deprecated-in-utest b/debian/patches/deprecated-in-utest
new file mode 100644
index 0000000..152a2cc
--- /dev/null
+++ b/debian/patches/deprecated-in-utest
@@ -0,0 +1,17 @@
+Description: Utest requires deprecated function names
+Author: Simon Richter <sjr at debian.org>
+Last-Update: 2013-06-19
+
+Index: beignet-0.1+git20130619+42967d2/utests/utest_helper.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/utests/utest_helper.hpp	2013-06-19 21:04:23.714667388 +0200
++++ beignet-0.1+git20130619+42967d2/utests/utest_helper.hpp	2013-06-19 21:04:35.066666882 +0200
+@@ -25,6 +25,8 @@
+ #ifndef __UTEST_HELPER_HPP__
+ #define __UTEST_HELPER_HPP__
+ 
++#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
++
+ #include "CL/cl.h"
+ #include "CL/cl_intel.h"
+ #include "utest.hpp"
diff --git a/debian/patches/flags b/debian/patches/flags
index c90c1d4..ac207f3 100644
--- a/debian/patches/flags
+++ b/debian/patches/flags
@@ -2,10 +2,10 @@ Description: Debian compliant compiler flags handling
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-05-21
 
-Index: beignet-0.1+git20130521+a7ea35c/CMakeLists.txt
+Index: beignet-0.1+git20130619+42967d2/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/CMakeLists.txt	2013-05-21 10:40:02.635944526 +0200
-+++ beignet-0.1+git20130521+a7ea35c/CMakeLists.txt	2013-05-21 10:40:37.351942978 +0200
+--- beignet-0.1+git20130619+42967d2.orig/CMakeLists.txt	2013-06-19 21:04:24.770667341 +0200
++++ beignet-0.1+git20130619+42967d2/CMakeLists.txt	2013-06-19 21:04:31.958667020 +0200
 @@ -18,7 +18,6 @@
  
  INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
@@ -14,10 +14,10 @@ Index: beignet-0.1+git20130521+a7ea35c/CMakeLists.txt
  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake/")
  SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
  SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
-Index: beignet-0.1+git20130521+a7ea35c/backend/CMakeLists.txt
+Index: beignet-0.1+git20130619+42967d2/backend/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/backend/CMakeLists.txt	2013-05-21 10:40:03.103944505 +0200
-+++ beignet-0.1+git20130521+a7ea35c/backend/CMakeLists.txt	2013-05-21 10:40:37.351942978 +0200
+--- beignet-0.1+git20130619+42967d2.orig/backend/CMakeLists.txt	2013-06-19 21:04:24.770667341 +0200
++++ beignet-0.1+git20130619+42967d2/backend/CMakeLists.txt	2013-06-19 21:04:31.958667020 +0200
 @@ -45,39 +45,39 @@
  if (COMPILER STREQUAL "GCC")
    set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -fPIC -Wall")
diff --git a/debian/patches/khronos b/debian/patches/khronos
index 8f4f6ac..0b418f1 100644
--- a/debian/patches/khronos
+++ b/debian/patches/khronos
@@ -2,13 +2,13 @@ Description: Use Khronos Group headers
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-05-21
 
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl_ext.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_ext.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h	2013-05-21 10:41:03.323941820 +0200
-@@ -1,251 +1 @@
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_ext.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl_ext.h	2013-06-19 21:04:33.466666953 +0200
+@@ -1,213 +1 @@
 -/*******************************************************************************
-- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -49,6 +49,9 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
 -	#include <CL/cl.h>
 -#endif
 -
+-/* cl_khr_fp64 extension - no extension #define since it has no functions  */
+-#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+-
 -/* cl_khr_fp16 extension - no extension #define since it has no functions  */
 -#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
 -
@@ -124,48 +127,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
 -    cl_uint *        /* num_platforms */);
 -
 -
--/* Extension: cl_khr_image2D_buffer
-- *
-- * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
-- * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
-- * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
-- * and 2D images created from a buffer.  Similarly, the write_image built-ins are also supported
-- * for 2D images created from a buffer.
-- *
-- * When the 2D image from buffer is created, the client must specify the width,
-- * height, image format (i.e. channel order and channel data type) and optionally the row pitch
-- *
-- * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
-- * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
-- */
--    
--/*************************************
-- * cl_khr_initalize_memory extension *
-- *************************************/
--    
--#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
--    
--    
--/**************************************
-- * cl_khr_terminate_context extension *
-- **************************************/
--    
--#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
--#define CL_CONTEXT_TERMINATE_KHR                    0x2010
--
--#define cl_khr_terminate_context 1
--extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
--
--typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
--    
--    
--/*
-- * Extension: cl_khr_spir
-- *
-- * This extension adds support to create an OpenCL program object from a 
-- * Standard Portable Intermediate Representation (SPIR) instance
-- */
--
 -/******************************************
 -* cl_nv_device_attribute_query extension *
 -******************************************/
@@ -184,6 +145,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
 -*********************************/
 -#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
 -
+-
 -#ifdef CL_VERSION_1_1
 -   /***********************************
 -    * cl_ext_device_fission extension *
@@ -259,13 +221,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
 -
 -#endif /* __CL_EXT_H */
 +#include_next <CL/cl_ext.h>
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/opencl.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/opencl.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/opencl.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/opencl.h	2013-05-21 10:41:03.323941820 +0200
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/opencl.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/opencl.h	2013-06-19 21:04:33.466666953 +0200
 @@ -1,54 +1 @@
 -/*******************************************************************************
-- * Copyright (c) 2008-2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -319,13 +281,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/opencl.h
 -#endif  /* __OPENCL_H   */
 -
 +#include_next <CL/opencl.h>
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl_d3d10.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_d3d10.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h	2013-05-21 10:41:03.323941820 +0200
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_d3d10.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl_d3d10.h	2013-06-19 21:04:33.470666953 +0200
 @@ -1,126 +1 @@
 -/**********************************************************************************
-- * Copyright (c) 2008-2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -439,7 +401,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h
 -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
 -    cl_command_queue command_queue,
 -    cl_uint          num_objects,
--    const cl_mem *   mem_objects,
+-    cl_mem *         mem_objects,
 -    cl_uint          num_events_in_wait_list,
 -    const cl_event * event_wait_list,
 -    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
@@ -451,13 +413,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h
 -#endif  // __OPENCL_CL_D3D10_H
 -
 +#include_next <CL/cl_d3d10.h>
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/cl.h	2013-05-21 10:41:03.327941820 +0200
-@@ -1,1214 +1 @@
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl.h	2013-06-19 21:04:33.474666953 +0200
+@@ -1,998 +1 @@
 -/*******************************************************************************
-- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -479,6 +441,8 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 - ******************************************************************************/
 -
+-/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
+-
 -#ifndef __OPENCL_CL_H
 -#define __OPENCL_CL_H
 -
@@ -514,10 +478,8 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -typedef cl_uint             cl_device_local_mem_type;
 -typedef cl_bitfield         cl_device_exec_capabilities;
 -typedef cl_bitfield         cl_command_queue_properties;
--typedef intptr_t            cl_device_partition_property;
--typedef cl_bitfield         cl_device_affinity_domain;
 -
--typedef intptr_t            cl_context_properties;
+-typedef intptr_t			cl_context_properties;
 -typedef cl_uint             cl_context_info;
 -typedef cl_uint             cl_command_queue_info;
 -typedef cl_uint             cl_channel_order;
@@ -525,7 +487,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -typedef cl_bitfield         cl_mem_flags;
 -typedef cl_uint             cl_mem_object_type;
 -typedef cl_uint             cl_mem_info;
--typedef cl_bitfield         cl_mem_migration_flags;
 -typedef cl_uint             cl_image_info;
 -typedef cl_uint             cl_buffer_create_type;
 -typedef cl_uint             cl_addressing_mode;
@@ -534,43 +495,24 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -typedef cl_bitfield         cl_map_flags;
 -typedef cl_uint             cl_program_info;
 -typedef cl_uint             cl_program_build_info;
--typedef cl_uint             cl_program_binary_type;
 -typedef cl_int              cl_build_status;
 -typedef cl_uint             cl_kernel_info;
--typedef cl_uint             cl_kernel_arg_info;
--typedef cl_uint             cl_kernel_arg_address_qualifier;
--typedef cl_uint             cl_kernel_arg_access_qualifier;
--typedef cl_bitfield         cl_kernel_arg_type_qualifier;
 -typedef cl_uint             cl_kernel_work_group_info;
 -typedef cl_uint             cl_event_info;
 -typedef cl_uint             cl_command_type;
 -typedef cl_uint             cl_profiling_info;
 -
--
 -typedef struct _cl_image_format {
 -    cl_channel_order        image_channel_order;
 -    cl_channel_type         image_channel_data_type;
 -} cl_image_format;
 -
--typedef struct _cl_image_desc {
--    cl_mem_object_type      image_type;
--    size_t                  image_width;
--    size_t                  image_height;
--    size_t                  image_depth;
--    size_t                  image_array_size;
--    size_t                  image_row_pitch;
--    size_t                  image_slice_pitch;
--    cl_uint                 num_mip_levels;
--    cl_uint                 num_samples;
--    cl_mem                  buffer;
--} cl_image_desc;
 -
 -typedef struct _cl_buffer_region {
 -    size_t                  origin;
 -    size_t                  size;
 -} cl_buffer_region;
 -
--
 -/******************************************************************************/
 -
 -/* Error Codes */
@@ -589,11 +531,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_MAP_FAILURE                              -12
 -#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
 -#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
--#define CL_COMPILE_PROGRAM_FAILURE                  -15
--#define CL_LINKER_NOT_AVAILABLE                     -16
--#define CL_LINK_PROGRAM_FAILURE                     -17
--#define CL_DEVICE_PARTITION_FAILED                  -18
--#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
 -
 -#define CL_INVALID_VALUE                            -30
 -#define CL_INVALID_DEVICE_TYPE                      -31
@@ -630,21 +567,14 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_INVALID_MIP_LEVEL                        -62
 -#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
 -#define CL_INVALID_PROPERTY                         -64
--#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
--#define CL_INVALID_COMPILER_OPTIONS                 -66
--#define CL_INVALID_LINKER_OPTIONS                   -67
--#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
 -
 -/* OpenCL Version */
 -#define CL_VERSION_1_0                              1
 -#define CL_VERSION_1_1                              1
--#define CL_VERSION_1_2                              1
 -
 -/* cl_bool */
 -#define CL_FALSE                                    0
 -#define CL_TRUE                                     1
--#define CL_BLOCKING                                 CL_TRUE
--#define CL_NON_BLOCKING                             CL_FALSE
 -
 -/* cl_platform_info */
 -#define CL_PLATFORM_PROFILE                         0x0900
@@ -658,7 +588,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_DEVICE_TYPE_CPU                          (1 << 1)
 -#define CL_DEVICE_TYPE_GPU                          (1 << 2)
 -#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
--#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
 -#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
 -
 -/* cl_device_info */
@@ -712,7 +641,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_DEVICE_VERSION                           0x102F
 -#define CL_DEVICE_EXTENSIONS                        0x1030
 -#define CL_DEVICE_PLATFORM                          0x1031
--#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+-/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
 -/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
 -#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
@@ -724,20 +653,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
 -#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
--#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
--#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
--#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
--#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
--#define CL_DEVICE_PARENT_DEVICE                     0x1042
--#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
--#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
--#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
--#define CL_DEVICE_PARTITION_TYPE                    0x1046
--#define CL_DEVICE_REFERENCE_COUNT                   0x1047
--#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
--#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
--#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
--#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
 -
 -/* cl_device_fp_config - bitfield */
 -#define CL_FP_DENORM                                (1 << 0)
@@ -747,7 +662,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_FP_ROUND_TO_INF                          (1 << 4)
 -#define CL_FP_FMA                                   (1 << 5)
 -#define CL_FP_SOFT_FLOAT                            (1 << 6)
--#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
 -
 -/* cl_device_mem_cache_type */
 -#define CL_NONE                                     0x0
@@ -772,23 +686,8 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_CONTEXT_PROPERTIES                       0x1082
 -#define CL_CONTEXT_NUM_DEVICES                      0x1083
 -
--/* cl_context_properties */
+-/* cl_context_info + cl_context_properties */
 -#define CL_CONTEXT_PLATFORM                         0x1084
--#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
--    
--/* cl_device_partition_property */
--#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
--#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
--#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
--#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
--    
--/* cl_device_affinity_domain */
--#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
--#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
--#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
--#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
--#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
--#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
 -
 -/* cl_command_queue_info */
 -#define CL_QUEUE_CONTEXT                            0x1090
@@ -803,14 +702,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_MEM_USE_HOST_PTR                         (1 << 3)
 -#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
 -#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
--// reserved                                         (1 << 6)    
--#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
--#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
--#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
--
--/* cl_mem_migration_flags - bitfield */
--#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
--#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
 -
 -/* cl_channel_order */
 -#define CL_R                                        0x10B0
@@ -826,8 +717,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_Rx                                       0x10BA
 -#define CL_RGx                                      0x10BB
 -#define CL_RGBx                                     0x10BC
--#define CL_DEPTH                                    0x10BD
--#define CL_DEPTH_STENCIL                            0x10BE
 -
 -/* cl_channel_type */
 -#define CL_SNORM_INT8                               0x10D0
@@ -845,16 +734,11 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_UNSIGNED_INT32                           0x10DC
 -#define CL_HALF_FLOAT                               0x10DD
 -#define CL_FLOAT                                    0x10DE
--#define CL_UNORM_INT24                              0x10DF
 -
 -/* cl_mem_object_type */
 -#define CL_MEM_OBJECT_BUFFER                        0x10F0
 -#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
 -#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
--#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
--#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
--#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
--#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
 -
 -/* cl_mem_info */
 -#define CL_MEM_TYPE                                 0x1100
@@ -875,10 +759,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_IMAGE_WIDTH                              0x1114
 -#define CL_IMAGE_HEIGHT                             0x1115
 -#define CL_IMAGE_DEPTH                              0x1116
--#define CL_IMAGE_ARRAY_SIZE                         0x1117
--#define CL_IMAGE_BUFFER                             0x1118
--#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
--#define CL_IMAGE_NUM_SAMPLES                        0x111A
 -
 -/* cl_addressing_mode */
 -#define CL_ADDRESS_NONE                             0x1130
@@ -901,7 +781,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -/* cl_map_flags - bitfield */
 -#define CL_MAP_READ                                 (1 << 0)
 -#define CL_MAP_WRITE                                (1 << 1)
--#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
 -
 -/* cl_program_info */
 -#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
@@ -911,20 +790,11 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_PROGRAM_SOURCE                           0x1164
 -#define CL_PROGRAM_BINARY_SIZES                     0x1165
 -#define CL_PROGRAM_BINARIES                         0x1166
--#define CL_PROGRAM_NUM_KERNELS                      0x1167
--#define CL_PROGRAM_KERNEL_NAMES                     0x1168
 -
 -/* cl_program_build_info */
 -#define CL_PROGRAM_BUILD_STATUS                     0x1181
 -#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
 -#define CL_PROGRAM_BUILD_LOG                        0x1183
--#define CL_PROGRAM_BINARY_TYPE                      0x1184
--    
--/* cl_program_binary_type */
--#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
--#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
--#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
--#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
 -
 -/* cl_build_status */
 -#define CL_BUILD_SUCCESS                            0
@@ -938,32 +808,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_KERNEL_REFERENCE_COUNT                   0x1192
 -#define CL_KERNEL_CONTEXT                           0x1193
 -#define CL_KERNEL_PROGRAM                           0x1194
--#define CL_KERNEL_ATTRIBUTES                        0x1195
--
--/* cl_kernel_arg_info */
--#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
--#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
--#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
--#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
--#define CL_KERNEL_ARG_NAME                          0x119A
--
--/* cl_kernel_arg_address_qualifier */
--#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
--#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
--#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
--#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
--
--/* cl_kernel_arg_access_qualifier */
--#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
--#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
--#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
--#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
--    
--/* cl_kernel_arg_type_qualifer */
--#define CL_KERNEL_ARG_TYPE_NONE                     0
--#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
--#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
--#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
 -
 -/* cl_kernel_work_group_info */
 -#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
@@ -971,7 +815,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
 -#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
 -#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
--#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
 -
 -/* cl_event_info  */
 -#define CL_EVENT_COMMAND_QUEUE                      0x11D0
@@ -1002,17 +845,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
 -#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
 -#define CL_COMMAND_USER                             0x1204
--#define CL_COMMAND_BARRIER                          0x1205
--#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
--#define CL_COMMAND_FILL_BUFFER                      0x1207
--#define CL_COMMAND_FILL_IMAGE                       0x1208
 -
 -/* command execution status */
 -#define CL_COMPLETE                                 0x0
 -#define CL_RUNNING                                  0x1
 -#define CL_SUBMITTED                                0x2
 -#define CL_QUEUED                                   0x3
--
+-  
 -/* cl_buffer_create_type  */
 -#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
 -
@@ -1051,35 +890,22 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                size_t          /* param_value_size */, 
 -                void *          /* param_value */,
 -                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
--    
--extern CL_API_ENTRY cl_int CL_API_CALL
--clCreateSubDevices(cl_device_id                         /* in_device */,
--                   const cl_device_partition_property * /* properties */,
--                   cl_uint                              /* num_devices */,
--                   cl_device_id *                       /* out_devices */,
--                   cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
 -
--extern CL_API_ENTRY cl_int CL_API_CALL
--clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
--    
--extern CL_API_ENTRY cl_int CL_API_CALL
--clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
--    
 -/* Context APIs  */
 -extern CL_API_ENTRY cl_context CL_API_CALL
 -clCreateContext(const cl_context_properties * /* properties */,
--                cl_uint                 /* num_devices */,
--                const cl_device_id *    /* devices */,
+-                cl_uint                       /* num_devices */,
+-                const cl_device_id *          /* devices */,
 -                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
--                void *                  /* user_data */,
--                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+-                void *                        /* user_data */,
+-                cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_context CL_API_CALL
 -clCreateContextFromType(const cl_context_properties * /* properties */,
--                        cl_device_type          /* device_type */,
+-                        cl_device_type                /* device_type */,
 -                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
--                        void *                  /* user_data */,
--                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+-                        void *                        /* user_data */,
+-                        cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
@@ -1114,6 +940,25 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                      void *                /* param_value */,
 -                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
+-#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+-#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
+-/* 
+- *  WARNING:
+- *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
+- *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
+- *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
+- *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
+- *
+- *  Software developers previously relying on this API are instructed to set the command queue 
+- *  properties when creating the queue, instead. 
+- */
+-extern CL_API_ENTRY cl_int CL_API_CALL
+-clSetCommandQueueProperty(cl_command_queue              /* command_queue */,
+-                          cl_command_queue_properties   /* properties */, 
+-                          cl_bool                        /* enable */,
+-                          cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
+-#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
+-
 -/* Memory Object APIs */
 -extern CL_API_ENTRY cl_mem CL_API_CALL
 -clCreateBuffer(cl_context   /* context */,
@@ -1130,12 +975,26 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                  cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
 -
 -extern CL_API_ENTRY cl_mem CL_API_CALL
--clCreateImage(cl_context              /* context */,
--              cl_mem_flags            /* flags */,
--              const cl_image_format * /* image_format */,
--              const cl_image_desc *   /* image_desc */, 
--              void *                  /* host_ptr */,
--              cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+-clCreateImage2D(cl_context              /* context */,
+-                cl_mem_flags            /* flags */,
+-                const cl_image_format * /* image_format */,
+-                size_t                  /* image_width */,
+-                size_t                  /* image_height */,
+-                size_t                  /* image_row_pitch */, 
+-                void *                  /* host_ptr */,
+-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+-                        
+-extern CL_API_ENTRY cl_mem CL_API_CALL
+-clCreateImage3D(cl_context              /* context */,
+-                cl_mem_flags            /* flags */,
+-                const cl_image_format * /* image_format */,
+-                size_t                  /* image_width */, 
+-                size_t                  /* image_height */,
+-                size_t                  /* image_depth */, 
+-                size_t                  /* image_row_pitch */, 
+-                size_t                  /* image_slice_pitch */, 
+-                void *                  /* host_ptr */,
+-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 -                        
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
@@ -1170,7 +1029,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
 -                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
 -
--/* Sampler APIs */
+-/* Sampler APIs  */
 -extern CL_API_ENTRY cl_sampler CL_API_CALL
 -clCreateSampler(cl_context          /* context */,
 -                cl_bool             /* normalized_coords */, 
@@ -1208,13 +1067,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                          cl_int *                       /* binary_status */,
 -                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
--extern CL_API_ENTRY cl_program CL_API_CALL
--clCreateProgramWithBuiltInKernels(cl_context            /* context */,
--                                  cl_uint               /* num_devices */,
--                                  const cl_device_id *  /* device_list */,
--                                  const char *          /* kernel_names */,
--                                  cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
--
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 -
@@ -1230,30 +1082,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clCompileProgram(cl_program           /* program */,
--                 cl_uint              /* num_devices */,
--                 const cl_device_id * /* device_list */,
--                 const char *         /* options */, 
--                 cl_uint              /* num_input_headers */,
--                 const cl_program *   /* input_headers */,
--                 const char **        /* header_include_names */,
--                 void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
--                 void *               /* user_data */) CL_API_SUFFIX__VERSION_1_2;
--
--extern CL_API_ENTRY cl_program CL_API_CALL
--clLinkProgram(cl_context           /* context */,
--              cl_uint              /* num_devices */,
--              const cl_device_id * /* device_list */,
--              const char *         /* options */, 
--              cl_uint              /* num_input_programs */,
--              const cl_program *   /* input_programs */,
--              void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
--              void *               /* user_data */,
--              cl_int *             /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
--
--
--extern CL_API_ENTRY cl_int CL_API_CALL
--clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
+-clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetProgramInfo(cl_program         /* program */,
@@ -1302,14 +1131,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clGetKernelArgInfo(cl_kernel       /* kernel */,
--                   cl_uint         /* arg_indx */,
--                   cl_kernel_arg_info  /* param_name */,
--                   size_t          /* param_value_size */,
--                   void *          /* param_value */,
--                   size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
--
--extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
 -                         cl_device_id               /* device */,
 -                         cl_kernel_work_group_info  /* param_name */,
@@ -1317,7 +1138,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                         void *                     /* param_value */,
 -                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
--/* Event Object APIs */
+-/* Event Object APIs  */
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clWaitForEvents(cl_uint             /* num_events */,
 -                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
@@ -1349,7 +1170,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
 -                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
 -
--/* Profiling APIs */
+-/* Profiling APIs  */
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetEventProfilingInfo(cl_event            /* event */,
 -                        cl_profiling_info   /* param_name */,
@@ -1370,7 +1191,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                    cl_mem              /* buffer */,
 -                    cl_bool             /* blocking_read */,
 -                    size_t              /* offset */,
--                    size_t              /* size */, 
+-                    size_t              /* cb */, 
 -                    void *              /* ptr */,
 -                    cl_uint             /* num_events_in_wait_list */,
 -                    const cl_event *    /* event_wait_list */,
@@ -1380,8 +1201,8 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
 -                        cl_mem              /* buffer */,
 -                        cl_bool             /* blocking_read */,
--                        const size_t *      /* buffer_offset */,
--                        const size_t *      /* host_offset */, 
+-                        const size_t *      /* buffer_origin */,
+-                        const size_t *      /* host_origin */, 
 -                        const size_t *      /* region */,
 -                        size_t              /* buffer_row_pitch */,
 -                        size_t              /* buffer_slice_pitch */,
@@ -1397,7 +1218,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                     cl_mem             /* buffer */, 
 -                     cl_bool            /* blocking_write */, 
 -                     size_t             /* offset */, 
--                     size_t             /* size */, 
+-                     size_t             /* cb */, 
 -                     const void *       /* ptr */, 
 -                     cl_uint            /* num_events_in_wait_list */, 
 -                     const cl_event *   /* event_wait_list */, 
@@ -1407,8 +1228,8 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
 -                         cl_mem              /* buffer */,
 -                         cl_bool             /* blocking_write */,
--                         const size_t *      /* buffer_offset */,
--                         const size_t *      /* host_offset */, 
+-                         const size_t *      /* buffer_origin */,
+-                         const size_t *      /* host_origin */, 
 -                         const size_t *      /* region */,
 -                         size_t              /* buffer_row_pitch */,
 -                         size_t              /* buffer_slice_pitch */,
@@ -1420,23 +1241,12 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 -                            
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clEnqueueFillBuffer(cl_command_queue   /* command_queue */,
--                    cl_mem             /* buffer */, 
--                    const void *       /* pattern */, 
--                    size_t             /* pattern_size */, 
--                    size_t             /* offset */, 
--                    size_t             /* size */, 
--                    cl_uint            /* num_events_in_wait_list */, 
--                    const cl_event *   /* event_wait_list */, 
--                    cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
--                            
--extern CL_API_ENTRY cl_int CL_API_CALL
 -clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
 -                    cl_mem              /* src_buffer */,
 -                    cl_mem              /* dst_buffer */, 
 -                    size_t              /* src_offset */,
 -                    size_t              /* dst_offset */,
--                    size_t              /* size */, 
+-                    size_t              /* cb */, 
 -                    cl_uint             /* num_events_in_wait_list */,
 -                    const cl_event *    /* event_wait_list */,
 -                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
@@ -1483,16 +1293,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clEnqueueFillImage(cl_command_queue   /* command_queue */,
--                   cl_mem             /* image */, 
--                   const void *       /* fill_color */, 
--                   const size_t *     /* origin[3] */, 
--                   const size_t *     /* region[3] */, 
--                   cl_uint            /* num_events_in_wait_list */, 
--                   const cl_event *   /* event_wait_list */, 
--                   cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
--                            
--extern CL_API_ENTRY cl_int CL_API_CALL
 -clEnqueueCopyImage(cl_command_queue     /* command_queue */,
 -                   cl_mem               /* src_image */,
 -                   cl_mem               /* dst_image */, 
@@ -1531,7 +1331,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                   cl_bool          /* blocking_map */, 
 -                   cl_map_flags     /* map_flags */,
 -                   size_t           /* offset */,
--                   size_t           /* size */,
+-                   size_t           /* cb */,
 -                   cl_uint          /* num_events_in_wait_list */,
 -                   const cl_event * /* event_wait_list */,
 -                   cl_event *       /* event */,
@@ -1560,15 +1360,6 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clEnqueueMigrateMemObjects(cl_command_queue       /* command_queue */,
--                           cl_uint                /* num_mem_objects */,
--                           const cl_mem *         /* mem_objects */,
--                           cl_mem_migration_flags /* flags */,
--                           cl_uint                /* num_events_in_wait_list */,
--                           const cl_event *       /* event_wait_list */,
--                           cl_event *             /* event */) CL_API_SUFFIX__VERSION_1_2;
--
--extern CL_API_ENTRY cl_int CL_API_CALL
 -clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
 -                       cl_kernel        /* kernel */,
 -                       cl_uint          /* work_dim */,
@@ -1588,7 +1379,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
--					  void (CL_CALLBACK * /*user_func*/)(void *), 
+-					  void (CL_CALLBACK *user_func)(void *), 
 -                      void *            /* args */,
 -                      size_t            /* cb_args */, 
 -                      cl_uint           /* num_mem_objects */,
@@ -1599,17 +1390,16 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
--                            cl_uint           /* num_events_in_wait_list */,
--                            const cl_event *  /* event_wait_list */,
--                            cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+-clEnqueueMarker(cl_command_queue    /* command_queue */,
+-                cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_int CL_API_CALL
--clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
--                             cl_uint           /* num_events_in_wait_list */,
--                             const cl_event *  /* event_wait_list */,
--                             cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+-clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+-                       cl_uint          /* num_events */,
+-                       const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
 -
+-extern CL_API_ENTRY cl_int CL_API_CALL
+-clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 -
 -/* Extension function access
 - *
@@ -1618,51 +1408,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 - * check to make sure the address is not NULL, before using or 
 - * calling the returned function address.
 - */
--extern CL_API_ENTRY void * CL_API_CALL 
--clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
--                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
--    
--
--// Deprecated OpenCL 1.1 APIs
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
--clCreateImage2D(cl_context              /* context */,
--                cl_mem_flags            /* flags */,
--                const cl_image_format * /* image_format */,
--                size_t                  /* image_width */,
--                size_t                  /* image_height */,
--                size_t                  /* image_row_pitch */, 
--                void *                  /* host_ptr */,
--                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
--clCreateImage3D(cl_context              /* context */,
--                cl_mem_flags            /* flags */,
--                const cl_image_format * /* image_format */,
--                size_t                  /* image_width */, 
--                size_t                  /* image_height */,
--                size_t                  /* image_depth */, 
--                size_t                  /* image_row_pitch */, 
--                size_t                  /* image_slice_pitch */, 
--                void *                  /* host_ptr */,
--                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
--clEnqueueMarker(cl_command_queue    /* command_queue */,
--                cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
--clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
--                        cl_uint          /* num_events */,
--                        const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
--clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
--clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
--clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+-extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
 -
 -#ifdef __cplusplus
 -}
@@ -1671,13 +1417,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
 -#endif  /* __OPENCL_CL_H */
 -
 +#include_next <CL/cl.h>
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl_platform.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_platform.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h	2013-05-21 10:41:03.327941820 +0200
-@@ -1,1254 +1 @@
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_platform.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl_platform.h	2013-06-19 21:04:33.478666953 +0200
+@@ -1,1198 +1 @@
 -/**********************************************************************************
-- * Copyright (c) 2008-2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -1724,75 +1470,19 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
 -#endif
 -
 -#ifdef __APPLE__
--    #define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))
--    #define CL_API_SUFFIX__VERSION_1_0                  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
--    #define CL_EXT_SUFFIX__VERSION_1_0                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
--    #define CL_API_SUFFIX__VERSION_1_1                  AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--    #define GCL_API_SUFFIX__VERSION_1_1                 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--    #define CL_EXT_SUFFIX__VERSION_1_1                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED       CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
--    
--    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
--        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
--        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
--        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
--        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
--        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
--    #else
--        #warning  This path should never happen outside of internal operating system development.  AvailabilityMacros do not function correctly here!
--        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
--    #endif
+-    #define CL_EXTENSION_WEAK_LINK                  __attribute__((weak_import))       
+-    #define CL_API_SUFFIX__VERSION_1_0              AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+-    #define CL_EXT_SUFFIX__VERSION_1_0              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+-    #define CL_API_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+-    #define CL_EXT_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
 -#else
--    #define CL_EXTENSION_WEAK_LINK  
+-    #define CL_EXTENSION_WEAK_LINK                         
 -    #define CL_API_SUFFIX__VERSION_1_0
 -    #define CL_EXT_SUFFIX__VERSION_1_0
 -    #define CL_API_SUFFIX__VERSION_1_1
 -    #define CL_EXT_SUFFIX__VERSION_1_1
--    #define CL_API_SUFFIX__VERSION_1_2
--    #define CL_EXT_SUFFIX__VERSION_1_2
--    
--    #ifdef __GNUC__
--        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
--            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
--            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
--        #else
--            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
--            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
--        #endif
--    
--        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
--            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED    
--            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
--        #else
--            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
--            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
--        #endif
--    #elif _WIN32
--        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
--            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED    
--            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
--        #else
--            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED 
--            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)     
--        #endif
--    
--        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
--            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
--            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
--        #else
--            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED 
--            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)     
--        #endif
--    #else
--        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
--        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
--    
--        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
--        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
--    #endif
+-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
 -#endif
 -
 -#if (defined (_WIN32) && defined(_MSC_VER))
@@ -1986,7 +1676,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
 -
 -#include <stddef.h>
 -
--/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
+-/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
 -typedef unsigned int cl_GLuint;
 -typedef int          cl_GLint;
 -typedef unsigned int cl_GLenum;
@@ -2904,13 +2594,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
 -/* Macro to facilitate debugging 
 - * Usage:
 - *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
-- *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
+- *   The first line ends with:   CL_PROGRAM_STRING_BEGIN \"
 - *   Each line thereafter of OpenCL C source must end with: \n\
 - *   The last line ends in ";
 - *
 - *   Example:
 - *
-- *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
+- *   const char *my_program = CL_PROGRAM_STRING_BEGIN "\
 - *   kernel void foo( int a, float * b )             \n\
 - *   {                                               \n\
 - *      // my comment                                \n\
@@ -2931,13 +2621,13 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
 -
 -#endif  /* __CL_PLATFORM_H  */
 +#include_next <CL/cl_platform.h>
-Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl_gl.h
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_gl.h	2013-05-21 10:38:37.207948335 +0200
-+++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h	2013-05-21 10:41:03.327941820 +0200
-@@ -1,161 +1 @@
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_gl.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl_gl.h	2013-06-19 21:04:33.478666953 +0200
+@@ -1,155 +1 @@
 -/**********************************************************************************
-- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and/or associated documentation files (the
@@ -2959,11 +2649,20 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 - **********************************************************************************/
 -
+-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+-
+-/*
+- * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
+- * OpenGL dependencies. The application is responsible for #including
+- * OpenGL or OpenGL ES headers before #including cl_gl.h.
+- */
+-
 -#ifndef __OPENCL_CL_GL_H
 -#define __OPENCL_CL_GL_H
 -
 -#ifdef __APPLE__
 -#include <OpenCL/cl.h>
+-#include <OpenGL/CGLDevice.h>
 -#else
 -#include <CL/cl.h>
 -#endif	
@@ -2977,20 +2676,15 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 -typedef cl_uint     cl_gl_platform_info;
 -typedef struct __GLsync *cl_GLsync;
 -
--/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
--#define CL_GL_OBJECT_BUFFER                     0x2000
--#define CL_GL_OBJECT_TEXTURE2D                  0x2001
--#define CL_GL_OBJECT_TEXTURE3D                  0x2002
--#define CL_GL_OBJECT_RENDERBUFFER               0x2003
--#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
--#define CL_GL_OBJECT_TEXTURE1D                  0x200F
--#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
--#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
--
--/* cl_gl_texture_info           */
--#define CL_GL_TEXTURE_TARGET                    0x2004
--#define CL_GL_MIPMAP_LEVEL                      0x2005
+-/* cl_gl_object_type */
+-#define CL_GL_OBJECT_BUFFER             0x2000
+-#define CL_GL_OBJECT_TEXTURE2D          0x2001
+-#define CL_GL_OBJECT_TEXTURE3D          0x2002
+-#define CL_GL_OBJECT_RENDERBUFFER       0x2003
 -
+-/* cl_gl_texture_info */
+-#define CL_GL_TEXTURE_TARGET            0x2004
+-#define CL_GL_MIPMAP_LEVEL              0x2005
 -
 -extern CL_API_ENTRY cl_mem CL_API_CALL
 -clCreateFromGLBuffer(cl_context     /* context */,
@@ -2999,13 +2693,21 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 -                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 -
 -extern CL_API_ENTRY cl_mem CL_API_CALL
--clCreateFromGLTexture(cl_context      /* context */,
--                      cl_mem_flags    /* flags */,
--                      cl_GLenum       /* target */,
--                      cl_GLint        /* miplevel */,
--                      cl_GLuint       /* texture */,
--                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
--    
+-clCreateFromGLTexture2D(cl_context      /* context */,
+-                        cl_mem_flags    /* flags */,
+-                        cl_GLenum       /* target */,
+-                        cl_GLint        /* miplevel */,
+-                        cl_GLuint       /* texture */,
+-                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+-
+-extern CL_API_ENTRY cl_mem CL_API_CALL
+-clCreateFromGLTexture3D(cl_context      /* context */,
+-                        cl_mem_flags    /* flags */,
+-                        cl_GLenum       /* target */,
+-                        cl_GLint        /* miplevel */,
+-                        cl_GLuint       /* texture */,
+-                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+-
 -extern CL_API_ENTRY cl_mem CL_API_CALL
 -clCreateFromGLRenderbuffer(cl_context   /* context */,
 -                           cl_mem_flags /* flags */,
@@ -3015,7 +2717,7 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetGLObjectInfo(cl_mem                /* memobj */,
 -                  cl_gl_object_type *   /* gl_object_type */,
--                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+-                  cl_GLuint *              /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
 -                  
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetGLTextureInfo(cl_mem               /* memobj */,
@@ -3040,51 +2742,33 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 -                          const cl_event *      /* event_wait_list */,
 -                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 -
--
--// Deprecated OpenCL 1.1 APIs
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
--clCreateFromGLTexture2D(cl_context      /* context */,
--                        cl_mem_flags    /* flags */,
--                        cl_GLenum       /* target */,
--                        cl_GLint        /* miplevel */,
--                        cl_GLuint       /* texture */,
--                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
--extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
--clCreateFromGLTexture3D(cl_context      /* context */,
--                        cl_mem_flags    /* flags */,
--                        cl_GLenum       /* target */,
--                        cl_GLint        /* miplevel */,
--                        cl_GLuint       /* texture */,
--                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
--    
 -/* cl_khr_gl_sharing extension  */
--    
+-
 -#define cl_khr_gl_sharing 1
--    
+-
 -typedef cl_uint     cl_gl_context_info;
--    
+-
 -/* Additional Error Codes  */
 -#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
--    
+-
 -/* cl_gl_context_info  */
 -#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 -#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
--    
+-
 -/* Additional cl_context_properties  */
 -#define CL_GL_CONTEXT_KHR                       0x2008
 -#define CL_EGL_DISPLAY_KHR                      0x2009
 -#define CL_GLX_DISPLAY_KHR                      0x200A
 -#define CL_WGL_HDC_KHR                          0x200B
 -#define CL_CGL_SHAREGROUP_KHR                   0x200C
--    
+-
 -extern CL_API_ENTRY cl_int CL_API_CALL
 -clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
 -                      cl_gl_context_info            /* param_name */,
 -                      size_t                        /* param_value_size */,
 -                      void *                        /* param_value */,
 -                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
--    
+-
 -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
 -    const cl_context_properties * properties,
 -    cl_gl_context_info            param_name,
@@ -3096,5 +2780,4097 @@ Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
 -}
 -#endif
 -
--#endif  /* __OPENCL_CL_GL_H */
+-#endif  /* __OPENCL_CL_GL_H  */
 +#include_next <CL/cl_gl.h>
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl_gl_ext.h
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_gl_ext.h	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl_gl_ext.h	2013-06-19 21:04:33.478666953 +0200
+@@ -1,69 +1 @@
+-/**********************************************************************************
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and/or associated documentation files (the
+- * "Materials"), to deal in the Materials without restriction, including
+- * without limitation the rights to use, copy, modify, merge, publish,
+- * distribute, sublicense, and/or sell copies of the Materials, and to
+- * permit persons to whom the Materials are furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included
+- * in all copies or substantial portions of the Materials.
+- *
+- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+- **********************************************************************************/
+-
+-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+-
+-/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
+-/* OpenGL dependencies.                                                         */
+-
+-#ifndef __OPENCL_CL_GL_EXT_H
+-#define __OPENCL_CL_GL_EXT_H
+-
+-#ifdef __cplusplus
+-extern "C" {
+-#endif
+-
+-#ifdef __APPLE__
+-    #include <OpenCL/cl_gl.h>
+-#else
+-    #include <CL/cl_gl.h>
+-#endif
+-
+-/*
+- * For each extension, follow this template
+- * /* cl_VEN_extname extension  */
+-/* #define cl_VEN_extname 1
+- * ... define new types, if any
+- * ... define new tokens, if any
+- * ... define new APIs, if any
+- *
+- *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
+- *  This allows us to avoid having to decide whether to include GL headers or GLES here.
+- */
+-
+-/* 
+- *  cl_khr_gl_event  extension
+- *  See section 9.9 in the OpenCL 1.1 spec for more information
+- */
+-#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
+-
+-extern CL_API_ENTRY cl_event CL_API_CALL
+-clCreateEventFromGLsyncKHR(cl_context           /* context */,
+-                           cl_GLsync            /* cl_GLsync */,
+-                           cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
+-
+-#ifdef __cplusplus
+-}
+-#endif
+-
+-#endif	/* __OPENCL_CL_GL_EXT_H  */
++#include_next <CL/cl_gl_ext.h>
+Index: beignet-0.1+git20130619+42967d2/include/CL/cl.hpp
+===================================================================
+--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl.hpp	2013-06-19 21:04:24.122667370 +0200
++++ beignet-0.1+git20130619+42967d2/include/CL/cl.hpp	2013-06-19 21:04:33.482666952 +0200
+@@ -1,4011 +1 @@
+-/*******************************************************************************
+- * Copyright (c) 2008-2010 The Khronos Group Inc.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and/or associated documentation files (the
+- * "Materials"), to deal in the Materials without restriction, including
+- * without limitation the rights to use, copy, modify, merge, publish,
+- * distribute, sublicense, and/or sell copies of the Materials, and to
+- * permit persons to whom the Materials are furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included
+- * in all copies or substantial portions of the Materials.
+- *
+- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+- ******************************************************************************/
+-
+-/*! \file
+- *
+- *   \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)    
+- *   \author Benedict R. Gaster and Laurent Morichetti
+- *   
+- *   Additions and fixes from Brian Cole, March 3rd 2010.
+- *   
+- *   \version 1.1
+- *   \date June 2010
+- *
+- *   Optional extension support
+- *
+- *         cl
+- *         cl_ext_device_fission
+- *				#define USE_CL_DEVICE_FISSION
+- */
+-
+-/*! \mainpage
+- * \section intro Introduction
+- * For many large applications C++ is the language of choice and so it seems
+- * reasonable to define C++ bindings for OpenCL.
+- *
+- *
+- * The interface is contained with a single C++ header file \em cl.hpp and all
+- * definitions are contained within the namespace \em cl. There is no additional
+- * requirement to include \em cl.h and to use either the C++ or original C
+- * bindings it is enough to simply include \em cl.hpp.
+- *
+- * The bindings themselves are lightweight and correspond closely to the
+- * underlying C API. Using the C++ bindings introduces no additional execution
+- * overhead.
+- *
+- * For detail documentation on the bindings see:
+- *
+- * The OpenCL C++ Wrapper API 1.1 (revision 04)
+- *  http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf
+- *
+- * \section example Example
+- *
+- * The following example shows a general use case for the C++
+- * bindings, including support for the optional exception feature and
+- * also the supplied vector and string classes, see following sections for
+- * decriptions of these features.
+- *
+- * \code
+- * #define __CL_ENABLE_EXCEPTIONS
+- * 
+- * #if defined(__APPLE__) || defined(__MACOSX)
+- * #include <OpenCL/cl.hpp>
+- * #else
+- * #include <CL/cl.hpp>
+- * #endif
+- * #include <cstdio>
+- * #include <cstdlib>
+- * #include <iostream>
+- * 
+- *  const char * helloStr  = "__kernel void "
+- *                           "hello(void) "
+- *                           "{ "
+- *                           "  "
+- *                           "} ";
+- * 
+- *  int
+- *  main(void)
+- *  {
+- *     cl_int err = CL_SUCCESS;
+- *     try {
+- *
+- *       std::vector<cl::Platform> platforms;
+- *       cl::Platform::get(&platforms);
+- *       if (platforms.size() == 0) {
+- *           std::cout << "Platform size 0\n";
+- *           return -1;
+- *       }
+- *
+- *       cl_context_properties properties[] = 
+- *          { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
+- *       cl::Context context(CL_DEVICE_TYPE_CPU, properties); 
+- * 
+- *       std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
+- * 
+- *       cl::Program::Sources source(1,
+- *           std::make_pair(helloStr,strlen(helloStr)));
+- *       cl::Program program_ = cl::Program(context, source);
+- *       program_.build(devices);
+- * 
+- *       cl::Kernel kernel(program_, "hello", &err);
+- * 
+- *       cl::Event event;
+- *       cl::CommandQueue queue(context, devices[0], 0, &err);
+- *       queue.enqueueNDRangeKernel(
+- *           kernel, 
+- *           cl::NullRange, 
+- *           cl::NDRange(4,4),
+- *           cl::NullRange,
+- *           NULL,
+- *           &event); 
+- * 
+- *       event.wait();
+- *     }
+- *     catch (cl::Error err) {
+- *        std::cerr 
+- *           << "ERROR: "
+- *           << err.what()
+- *           << "("
+- *           << err.err()
+- *           << ")"
+- *           << std::endl;
+- *     }
+- * 
+- *    return EXIT_SUCCESS;
+- *  }
+- * 
+- * \endcode
+- *
+- */
+-#ifndef CL_HPP_
+-#define CL_HPP_
+-
+-#ifdef _WIN32
+-#include <windows.h>
+-#include <malloc.h>
+-#if defined(USE_DX_INTEROP)
+-#include <CL/cl_d3d10.h>
+-#endif
+-#endif // _WIN32
+-
+-// 
+-#if defined(USE_CL_DEVICE_FISSION)
+-#include <CL/cl_ext.h>
+-#endif
+-
+-#if defined(__APPLE__) || defined(__MACOSX)
+-#include <OpenGL/OpenGL.h>
+-#include <OpenCL/opencl.h>
+-#else
+-#include <GL/gl.h>
+-#include <CL/opencl.h>
+-#endif // !__APPLE__
+-
+-#if !defined(CL_CALLBACK)
+-#define CL_CALLBACK
+-#endif //CL_CALLBACK
+-
+-#include <utility>
+-
+-#if !defined(__NO_STD_VECTOR)
+-#include <vector>
+-#endif
+-
+-#if !defined(__NO_STD_STRING)
+-#include <string>
+-#endif 
+-
+-#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
+-# include <alloca.h>
+-#endif // linux
+-
+-#include <cstring>
+-
+-/*! \namespace cl
+- *
+- * \brief The OpenCL C++ bindings are defined within this namespace.
+- *
+- */
+-namespace cl {
+-
+-#define __INIT_CL_EXT_FCN_PTR(name) \
+-    if(!pfn_##name) { \
+-        pfn_##name = (PFN_##name) \
+-            clGetExtensionFunctionAddress(#name); \
+-        if(!pfn_##name) { \
+-        } \
+-    }
+-
+-class Program;
+-class Device;
+-class Context;
+-class CommandQueue;
+-class Memory;
+-
+-#if defined(__CL_ENABLE_EXCEPTIONS)
+-#include <exception>
+-/*! \class Error
+- * \brief Exception class
+- */
+-class Error : public std::exception
+-{
+-private:
+-    cl_int err_;
+-    const char * errStr_;
+-public:
+-    /*! Create a new CL error exception for a given error code
+-     *  and corresponding message.
+-     */
+-    Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
+-    {}
+-
+-    ~Error() throw() {}
+-
+-    /*! \brief Get error string associated with exception
+-     *
+-     * \return A memory pointer to the error message string.
+-     */
+-    virtual const char * what() const throw ()
+-    {
+-        if (errStr_ == NULL) {
+-            return "empty";
+-        }
+-        else {
+-            return errStr_;
+-        }
+-    }
+-
+-    /*! \brief Get error code associated with exception
+-     *
+-     *  \return The error code.
+-     */
+-    const cl_int err(void) const { return err_; }
+-};
+-
+-#define __ERR_STR(x) #x
+-#else
+-#define __ERR_STR(x) NULL
+-#endif // __CL_ENABLE_EXCEPTIONS
+-
+-//! \cond DOXYGEN_DETAIL
+-#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+-#define __GET_DEVICE_INFO_ERR               __ERR_STR(clgetDeviceInfo)
+-#define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
+-#define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
+-#define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
+-#define __GET_CONTEXT_INFO_ERR              __ERR_STR(clGetContextInfo)
+-#define __GET_EVENT_INFO_ERR                __ERR_STR(clGetEventInfo)
+-#define __GET_EVENT_PROFILE_INFO_ERR        __ERR_STR(clGetEventProfileInfo)
+-#define __GET_MEM_OBJECT_INFO_ERR           __ERR_STR(clGetMemObjectInfo)
+-#define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
+-#define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
+-#define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
+-#define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
+-#define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
+-#define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
+-#define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)
+-
+-#define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
+-#define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)
+-
+-#define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
+-#define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
+-#define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
+-#define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
+-#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
+-#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
+-#define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
+-#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
+-
+-#define __CREATE_USER_EVENT_ERR             __ERR_STR(clCreateUserEvent)
+-#define __SET_USER_EVENT_STATUS_ERR         __ERR_STR(clSetUserEventStatus)
+-#define __SET_EVENT_CALLBACK_ERR            __ERR_STR(clSetEventCallback)
+-#define __WAIT_FOR_EVENTS_ERR               __ERR_STR(clWaitForEvents)
+-
+-#define __CREATE_KERNEL_ERR                 __ERR_STR(clCreateKernel)
+-#define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
+-#define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
+-#define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
+-#define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
+-#define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)
+-
+-#define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
+-#define __SET_COMMAND_QUEUE_PROPERTY_ERR    __ERR_STR(clSetCommandQueueProperty)
+-#define __ENQUEUE_READ_BUFFER_ERR           __ERR_STR(clEnqueueReadBuffer)
+-#define __ENQUEUE_READ_BUFFER_RECT_ERR      __ERR_STR(clEnqueueReadBufferRect)
+-#define __ENQUEUE_WRITE_BUFFER_ERR          __ERR_STR(clEnqueueWriteBuffer)
+-#define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
+-#define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
+-#define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
+-#define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
+-#define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
+-#define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
+-#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
+-#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
+-#define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
+-#define __ENQUEUE_MAP_IMAGE_ERR             __ERR_STR(clEnqueueMapImage)
+-#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR      __ERR_STR(clEnqueueUnMapMemObject)
+-#define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
+-#define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
+-#define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
+-#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
+-#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
+-#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
+-
+-#define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
+-#define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)
+-
+-#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
+-
+-#define __FLUSH_ERR                         __ERR_STR(clFlush)
+-#define __FINISH_ERR                        __ERR_STR(clFinish)
+-
+-#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevicesEXT)
+-#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
+-//! \endcond
+-
+-/*! \class string
+- * \brief Simple string class, that provides a limited subset of std::string
+- * functionality but avoids many of the issues that come with that class.
+- */
+-class string
+-{
+-private:
+-    ::size_t size_;
+-    char * str_;
+-public:
+-    string(void) : size_(0), str_(NULL)
+-    {
+-    }
+-
+-    string(char * str, ::size_t size) :
+-        size_(size),
+-        str_(NULL)
+-    {
+-        str_ = new char[size_+1];
+-        if (str_ != NULL) {
+-            memcpy(str_, str, size_  * sizeof(char));
+-            str_[size_] = '\0';
+-        }
+-        else {
+-            size_ = 0;
+-        }
+-    }
+-
+-    string(char * str) :
+-        str_(NULL)
+-    {
+-        size_= ::strlen(str);
+-        str_ = new char[size_ + 1];
+-        if (str_ != NULL) {
+-            memcpy(str_, str, (size_ + 1) * sizeof(char));
+-        }
+-        else {
+-            size_ = 0;
+-        }
+-    }
+-
+-    string& operator=(const string& rhs)
+-    {
+-        if (this == &rhs) {
+-            return *this;
+-        }
+-
+-        if (rhs.size_ == 0 || rhs.str_ == NULL) {
+-            size_ = 0;
+-            str_  = NULL;
+-        } 
+-        else {
+-            size_ = rhs.size_;
+-            str_ = new char[size_ + 1];
+-            if (str_ != NULL) {
+-                memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
+-            }
+-            else {
+-                size_ = 0;
+-            }
+-        }
+-
+-        return *this;
+-    }
+-
+-    string(const string& rhs)
+-    {
+-        *this = rhs;
+-    }
+-
+-    ~string()
+-    {
+-        if (str_ != NULL) {
+-            delete[] str_;
+-        }
+-    }
+-
+-    ::size_t size(void) const   { return size_; }
+-    ::size_t length(void) const { return size(); }
+-
+-    const char * c_str(void) const { return (str_) ? str_ : "";}
+-};
+-
+-#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
+-#include <string>
+-typedef std::string STRING_CLASS;
+-#elif !defined(__USE_DEV_STRING) 
+-typedef cl::string STRING_CLASS;
+-#endif
+-
+-#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+-#include <vector>
+-#define VECTOR_CLASS std::vector
+-#elif !defined(__USE_DEV_VECTOR) 
+-#define VECTOR_CLASS cl::vector 
+-#endif
+-
+-#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
+-#define __MAX_DEFAULT_VECTOR_SIZE 10
+-#endif
+-
+-/*! \class vector
+- * \brief Fixed sized vector implementation that mirroring 
+- * std::vector functionality.
+- */
+-template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
+-class vector
+-{
+-private:
+-    T data_[N];
+-    unsigned int size_;
+-    bool empty_;
+-public:
+-    vector() : 
+-        size_(-1),
+-        empty_(true)
+-    {}
+-
+-    ~vector() {}
+-
+-    unsigned int size(void) const
+-    {
+-        return size_ + 1;
+-    }
+-
+-    void clear()
+-    {
+-        size_ = -1;
+-        empty_ = true;
+-    }
+-
+-    void push_back (const T& x)
+-    { 
+-        if (size() < N) {
+-            size_++;  
+-            data_[size_] = x;
+-            empty_ = false;
+-        }
+-    }
+-
+-    void pop_back(void)
+-    {
+-        if (!empty_) {
+-            data_[size_].~T();
+-            size_--;
+-            if (size_ == -1) {
+-                empty_ = true;
+-            }
+-        }
+-    }
+-  
+-    vector(const vector<T, N>& vec) : 
+-        size_(vec.size_),
+-        empty_(vec.empty_)
+-    {
+-        if (!empty_) {
+-            memcpy(&data_[0], &vec.data_[0], size() * sizeof(T));
+-        }
+-    } 
+-
+-    vector(unsigned int size, const T& val = T()) :
+-        size_(-1),
+-        empty_(true)
+-    {
+-        for (unsigned int i = 0; i < size; i++) {
+-            push_back(val);
+-        }
+-    }
+-
+-    vector<T, N>& operator=(const vector<T, N>& rhs)
+-    {
+-        if (this == &rhs) {
+-            return *this;
+-        }
+-
+-        size_  = rhs.size_;
+-        empty_ = rhs.empty_;
+-
+-        if (!empty_) {	
+-            memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T));
+-        }
+-    
+-        return *this;
+-    }
+-
+-    bool operator==(vector<T,N> &vec)
+-    {
+-        if (empty_ && vec.empty_) {
+-            return true;
+-        }
+-
+-        if (size() != vec.size()) {
+-            return false;
+-        }
+-
+-        return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false;
+-    }
+-  
+-    operator T* ()             { return data_; }
+-    operator const T* () const { return data_; }
+-   
+-    bool empty (void) const
+-    {
+-        return empty_;
+-    }
+-  
+-    unsigned int max_size (void) const
+-    {
+-        return N;
+-    }
+-
+-    unsigned int capacity () const
+-    {
+-        return sizeof(T) * N;
+-    }
+-
+-    T& operator[](int index)
+-    {
+-        return data_[index];
+-    }
+-  
+-    T operator[](int index) const
+-    {
+-        return data_[index];
+-    }
+-  
+-    template<class I>
+-    void assign(I start, I end)
+-    {
+-        clear();   
+-        while(start < end) {
+-            push_back(*start);
+-            start++;
+-        }
+-    }
+-
+-    /*! \class iterator
+-     * \brief Iterator class for vectors
+-     */
+-    class iterator
+-    {
+-    private:
+-        vector<T,N> vec_;
+-        int index_;
+-        bool initialized_;
+-    public:
+-        iterator(void) : 
+-            index_(-1),
+-            initialized_(false)
+-        {
+-            index_ = -1;
+-            initialized_ = false;
+-        }
+-
+-        ~iterator(void) {}
+-
+-        static iterator begin(vector<T,N> &vec)
+-        {
+-            iterator i;
+-
+-            if (!vec.empty()) {
+-                i.index_ = 0;
+-            }
+-
+-            i.vec_ = vec;
+-            i.initialized_ = true;
+-            return i;
+-        }
+-
+-        static iterator end(vector<T,N> &vec)
+-        {
+-            iterator i;
+-
+-            if (!vec.empty()) {
+-                i.index_ = vec.size();
+-            }
+-            i.vec_ = vec;
+-            i.initialized_ = true;
+-            return i;
+-        }
+-    
+-        bool operator==(iterator i)
+-        {
+-            return ((vec_ == i.vec_) && 
+-                    (index_ == i.index_) && 
+-                    (initialized_ == i.initialized_));
+-        }
+-
+-        bool operator!=(iterator i)
+-        {
+-            return (!(*this==i));
+-        }
+-
+-        void operator++()
+-        {
+-            index_++;
+-        }
+-
+-        void operator++(int x)
+-        {
+-            index_ += x;
+-        }
+-
+-        void operator--()
+-        {
+-            index_--;
+-        }
+-
+-        void operator--(int x)
+-        {
+-            index_ -= x;
+-        }
+-
+-        T operator *()
+-        {
+-            return vec_[index_];
+-        }
+-    };
+-
+-    iterator begin(void)
+-    {
+-        return iterator::begin(*this);
+-    }
+-
+-    iterator end(void)
+-    {
+-        return iterator::end(*this);
+-    }
+-
+-    T& front(void)
+-    {
+-        return data_[0];
+-    }
+-
+-    T& back(void)
+-    {
+-        return data_[size_];
+-    }
+-
+-    const T& front(void) const
+-    {
+-        return data_[0];
+-    }
+-
+-    const T& back(void) const
+-    {
+-        return data_[size_];
+-    }
+-};  
+-    
+-/*!
+- * \brief size_t class used to interface between C++ and
+- * OpenCL C calls that require arrays of size_t values, who's
+- * size is known statically.
+- */
+-template <int N>
+-struct size_t : public cl::vector< ::size_t, N> { };
+-
+-namespace detail {
+-
+-// GetInfo help struct
+-template <typename Functor, typename T>
+-struct GetInfoHelper
+-{
+-    static cl_int
+-    get(Functor f, cl_uint name, T* param)
+-    {
+-        return f(name, sizeof(T), param, NULL);
+-    }
+-};
+-
+-// Specialized GetInfoHelper for VECTOR_CLASS params
+-template <typename Func, typename T>
+-struct GetInfoHelper<Func, VECTOR_CLASS<T> >
+-{
+-    static cl_int get(Func f, cl_uint name, VECTOR_CLASS<T>* param)
+-    {
+-        ::size_t required;
+-        cl_int err = f(name, 0, NULL, &required);
+-        if (err != CL_SUCCESS) {
+-            return err;
+-        }
+-
+-        T* value = (T*) alloca(required);
+-        err = f(name, required, value, NULL);
+-        if (err != CL_SUCCESS) {
+-            return err;
+-        }
+-
+-        param->assign(&value[0], &value[required/sizeof(T)]);
+-        return CL_SUCCESS;
+-    }
+-};
+-
+-// Specialized for getInfo<CL_PROGRAM_BINARIES>
+-template <typename Func>
+-struct GetInfoHelper<Func, VECTOR_CLASS<char *> >
+-{
+-    static cl_int
+-    get(Func f, cl_uint name, VECTOR_CLASS<char *>* param)
+-    {
+-      cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
+-      if (err != CL_SUCCESS) {
+-        return err;
+-      }
+-      
+-      return CL_SUCCESS;
+-    }
+-};
+-
+-// Specialized GetInfoHelper for STRING_CLASS params
+-template <typename Func>
+-struct GetInfoHelper<Func, STRING_CLASS>
+-{
+-    static cl_int get(Func f, cl_uint name, STRING_CLASS* param)
+-    {
+-        ::size_t required;
+-        cl_int err = f(name, 0, NULL, &required);
+-        if (err != CL_SUCCESS) {
+-            return err;
+-        }
+-
+-        char* value = (char*) alloca(required);
+-        err = f(name, required, value, NULL);
+-        if (err != CL_SUCCESS) {
+-            return err;
+-        }
+-
+-        *param = value;
+-        return CL_SUCCESS;
+-    }
+-};
+-
+-#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \
+-namespace detail { \
+-template <typename Func> \
+-struct GetInfoHelper<Func, CPP_TYPE> \
+-{ \
+-    static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \
+-    { \
+-      cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \
+-      if (err != CL_SUCCESS) { \
+-        return err; \
+-      } \
+-      \
+-      return ReferenceHandler<CPP_TYPE::cl_type>::retain((*param)()); \
+-    } \
+-}; \
+-} 
+-
+-
+-#define __PARAM_NAME_INFO_1_0(F) \
+-    F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
+-    F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
+-    F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
+-    F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
+-    F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
+-    \
+-    F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
+-    F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \
+-    F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
+-    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
+-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
+-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
+-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
+-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
+-    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
+-    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
+-    F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
+-    F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
+-    F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
+-    F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
+-    F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
+-    F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
+-    F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
+-    F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
+-    F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
+-    F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \
+-    F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \
+-    F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \
+-    F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \
+-    F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \
+-    F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \
+-    \
+-    F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
+-    F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
+-    F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
+-    \
+-    F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
+-    F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
+-    F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
+-    F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
+-    \
+-    F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
+-    F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
+-    F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
+-    F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
+-    \
+-    F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
+-    F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
+-    F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
+-    F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
+-    F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
+-    F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
+-    F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
+-    \
+-    F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
+-    F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
+-    F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
+-    F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
+-    F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
+-    F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
+-    F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
+-    \
+-    F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
+-    F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
+-    F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
+-    F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
+-    F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
+-    \
+-    F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
+-    F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
+-    F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
+-    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \
+-    F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
+-    F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
+-    F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
+-    \
+-    F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
+-    F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
+-    F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
+-    \
+-    F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
+-    F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
+-    F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
+-    F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
+-    F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
+-    \
+-    F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
+-    F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
+-    F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
+-    \
+-    F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
+-    F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
+-    F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
+-    F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)
+-
+-#if defined(CL_VERSION_1_1)
+-#define __PARAM_NAME_INFO_1_1(F) \
+-    F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
+-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
+-    F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
+-    F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
+-    F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
+-    \
+-    F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
+-    F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
+-    \
+-    F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
+-    F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
+-    \
+-    F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
+-#endif // CL_VERSION_1_1
+-
+-#if defined(USE_CL_DEVICE_FISSION)
+-#define __PARAM_NAME_DEVICE_FISSION(F) \
+-    F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
+-	F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+-	F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+-	F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
+-	F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
+-#endif // USE_CL_DEVICE_FISSION
+-
+-template <typename enum_type, cl_int Name>
+-struct param_traits {};
+-
+-#define __DECLARE_PARAM_TRAITS(token, param_name, T) \
+-struct token;                                        \
+-template<>                                           \
+-struct param_traits<detail:: token,param_name>       \
+-{                                                    \
+-    enum { value = param_name };                     \
+-    typedef T param_type;                            \
+-};
+-
+-__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS);
+-#if defined(CL_VERSION_1_1)
+-__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS);
+-#endif // CL_VERSION_1_1
+-
+-#if defined(USE_CL_DEVICE_FISSION)
+-__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS);
+-#endif // USE_CL_DEVICE_FISSION
+-
+-#undef __DECLARE_PARAM_TRAITS
+-
+-// Convenience functions
+-
+-template <typename Func, typename T>
+-inline cl_int
+-getInfo(Func f, cl_uint name, T* param)
+-{
+-    return GetInfoHelper<Func, T>::get(f, name, param);
+-}
+-
+-template <typename Func, typename Arg0>
+-struct GetInfoFunctor0
+-{
+-    Func f_; const Arg0& arg0_;
+-    cl_int operator ()(
+-        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+-    { return f_(arg0_, param, size, value, size_ret); }
+-};
+-
+-template <typename Func, typename Arg0, typename Arg1>
+-struct GetInfoFunctor1
+-{
+-    Func f_; const Arg0& arg0_; const Arg1& arg1_;
+-    cl_int operator ()(
+-        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+-    { return f_(arg0_, arg1_, param, size, value, size_ret); }
+-};
+-
+-template <typename Func, typename Arg0, typename T>
+-inline cl_int
+-getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
+-{
+-    GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
+-    return GetInfoHelper<GetInfoFunctor0<Func, Arg0>, T>
+-        ::get(f0, name, param);
+-}
+-
+-template <typename Func, typename Arg0, typename Arg1, typename T>
+-inline cl_int
+-getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
+-{
+-    GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
+-    return GetInfoHelper<GetInfoFunctor1<Func, Arg0, Arg1>, T>
+-        ::get(f0, name, param);
+-}
+-
+-template<typename T>
+-struct ReferenceHandler
+-{ };
+-
+-template <>
+-struct ReferenceHandler<cl_device_id>
+-{
+-    // cl_device_id does not have retain().
+-    static cl_int retain(cl_device_id)
+-    { return CL_INVALID_DEVICE; }
+-    // cl_device_id does not have release().
+-    static cl_int release(cl_device_id)
+-    { return CL_INVALID_DEVICE; }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_platform_id>
+-{
+-    // cl_platform_id does not have retain().
+-    static cl_int retain(cl_platform_id)
+-    { return CL_INVALID_PLATFORM; }
+-    // cl_platform_id does not have release().
+-    static cl_int release(cl_platform_id)
+-    { return CL_INVALID_PLATFORM; }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_context>
+-{
+-    static cl_int retain(cl_context context)
+-    { return ::clRetainContext(context); }
+-    static cl_int release(cl_context context)
+-    { return ::clReleaseContext(context); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_command_queue>
+-{
+-    static cl_int retain(cl_command_queue queue)
+-    { return ::clRetainCommandQueue(queue); }
+-    static cl_int release(cl_command_queue queue)
+-    { return ::clReleaseCommandQueue(queue); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_mem>
+-{
+-    static cl_int retain(cl_mem memory)
+-    { return ::clRetainMemObject(memory); }
+-    static cl_int release(cl_mem memory)
+-    { return ::clReleaseMemObject(memory); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_sampler>
+-{
+-    static cl_int retain(cl_sampler sampler)
+-    { return ::clRetainSampler(sampler); }
+-    static cl_int release(cl_sampler sampler)
+-    { return ::clReleaseSampler(sampler); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_program>
+-{
+-    static cl_int retain(cl_program program)
+-    { return ::clRetainProgram(program); }
+-    static cl_int release(cl_program program)
+-    { return ::clReleaseProgram(program); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_kernel>
+-{
+-    static cl_int retain(cl_kernel kernel)
+-    { return ::clRetainKernel(kernel); }
+-    static cl_int release(cl_kernel kernel)
+-    { return ::clReleaseKernel(kernel); }
+-};
+-
+-template <>
+-struct ReferenceHandler<cl_event>
+-{
+-    static cl_int retain(cl_event event)
+-    { return ::clRetainEvent(event); }
+-    static cl_int release(cl_event event)
+-    { return ::clReleaseEvent(event); }
+-};
+-
+-template <typename T>
+-class Wrapper
+-{
+-public:
+-    typedef T cl_type;
+-
+-protected:
+-    cl_type object_;
+-
+-public:
+-    Wrapper() : object_(NULL) { }
+-
+-    ~Wrapper()
+-    {
+-        if (object_ != NULL) { release(); }
+-    }
+-
+-    Wrapper(const Wrapper<cl_type>& rhs)
+-    {
+-        object_ = rhs.object_;
+-        if (object_ != NULL) { retain(); }
+-    }
+-
+-    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+-    {
+-        if (object_ != NULL) { release(); }
+-        object_ = rhs.object_;
+-        if (object_ != NULL) { retain(); }
+-        return *this;
+-    }
+-
+-    cl_type operator ()() const { return object_; }
+-
+-    cl_type& operator ()() { return object_; }
+-
+-protected:
+-
+-    cl_int retain() const
+-    {
+-        return ReferenceHandler<cl_type>::retain(object_);
+-    }
+-
+-    cl_int release() const
+-    {
+-        return ReferenceHandler<cl_type>::release(object_);
+-    }
+-};
+-
+-#if defined(__CL_ENABLE_EXCEPTIONS)
+-static inline cl_int errHandler (
+-    cl_int err,
+-    const char * errStr = NULL) throw(Error)
+-{
+-    if (err != CL_SUCCESS) {
+-        throw Error(err, errStr);
+-    }
+-    return err;
+-}
+-#else
+-static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
+-{
+-    return err;
+-}
+-#endif // __CL_ENABLE_EXCEPTIONS
+-
+-} // namespace detail
+-//! \endcond
+-
+-/*! \stuct ImageFormat
+- * \brief ImageFormat interface fro cl_image_format.
+- */
+-struct ImageFormat : public cl_image_format
+-{
+-    ImageFormat(){}
+-
+-    ImageFormat(cl_channel_order order, cl_channel_type type)
+-    {
+-        image_channel_order = order;
+-        image_channel_data_type = type;
+-    }
+-
+-    ImageFormat& operator = (const ImageFormat& rhs)
+-    {
+-        if (this != &rhs) {
+-            this->image_channel_data_type = rhs.image_channel_data_type;
+-            this->image_channel_order     = rhs.image_channel_order;
+-        }
+-        return *this;
+-    }
+-};
+-
+-/*! \class Device
+- * \brief Device interface for cl_device_id.
+- */
+-class Device : public detail::Wrapper<cl_device_id>
+-{
+-public:
+-    Device(cl_device_id device) { object_ = device; }
+-
+-    Device() : detail::Wrapper<cl_type>() { }
+-
+-    Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
+-
+-    Device& operator = (const Device& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_device_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetDeviceInfo, object_, name, param),
+-            __GET_DEVICE_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_device_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_device_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-#if defined(USE_CL_DEVICE_FISSION)
+-	cl_int createSubDevices(
+-		const cl_device_partition_property_ext * properties,
+-		VECTOR_CLASS<Device>* devices)
+-	{
+-		typedef CL_API_ENTRY cl_int 
+-			( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
+-				cl_device_id /*in_device*/,
+-                const cl_device_partition_property_ext * /* properties */,
+-                cl_uint /*num_entries*/,
+-                cl_device_id * /*out_devices*/,
+-                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+-
+-		static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
+-		__INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
+-
+-		cl_uint n = 0;
+-        cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+-        }
+-
+-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+-        err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+-        }
+-
+-        devices->assign(&ids[0], &ids[n]);
+-        return CL_SUCCESS;
+- 	}
+-#endif
+-};
+-
+-/*! \class Platform
+- *  \brief Platform interface.
+- */
+-class Platform : public detail::Wrapper<cl_platform_id>
+-{
+-public:
+-    static const Platform null();
+-
+-    Platform(cl_platform_id platform) { object_ = platform; }
+-
+-    Platform() : detail::Wrapper<cl_type>()  { }
+-
+-    Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
+-
+-    Platform& operator = (const Platform& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetPlatformInfo, object_, name, param),
+-            __GET_PLATFORM_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_platform_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_platform_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    cl_int getDevices(
+-        cl_device_type type,
+-        VECTOR_CLASS<Device>* devices) const
+-    {
+-        cl_uint n = 0;
+-        cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+-        }
+-
+-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+-        err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+-        }
+-
+-        devices->assign(&ids[0], &ids[n]);
+-        return CL_SUCCESS;
+-    }
+-
+-#if defined(USE_DX_INTEROP)
+-   /*! \brief Get the list of available D3D10 devices.
+-     *
+-     *  \param d3d_device_source.
+-     *
+-     *  \param d3d_object.
+-     *
+-     *  \param d3d_device_set.
+-     *
+-     *  \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
+-     *  values returned in devices can be used to identify a specific OpenCL
+-     *  device. If \a devices argument is NULL, this argument is ignored.
+-     *
+-     *  \return One of the following values:
+-     *    - CL_SUCCESS if the function is executed successfully.
+-     *
+-     *  The application can query specific capabilities of the OpenCL device(s)
+-     *  returned by cl::getDevices. This can be used by the application to
+-     *  determine which device(s) to use.
+-     *
+-     * \note In the case that exceptions are enabled and a return value
+-     * other than CL_SUCCESS is generated, then cl::Error exception is
+-     * generated.
+-     */
+-    cl_int getDevices(
+-        cl_d3d10_device_source_khr d3d_device_source,
+-        void *                     d3d_object,
+-        cl_d3d10_device_set_khr    d3d_device_set,
+-        VECTOR_CLASS<Device>* devices) const
+-    {
+-        typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
+-            cl_platform_id platform, 
+-            cl_d3d10_device_source_khr d3d_device_source, 
+-            void * d3d_object,
+-            cl_d3d10_device_set_khr d3d_device_set,
+-            cl_uint num_entries,
+-            cl_device_id * devices,
+-            cl_uint* num_devices);
+-
+-        static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
+-        __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR);
+-
+-        cl_uint n = 0;
+-        cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
+-            object_, 
+-            d3d_device_source, 
+-            d3d_object,
+-            d3d_device_set, 
+-            0, 
+-            NULL, 
+-            &n);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+-        }
+-
+-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+-        err = pfn_clGetDeviceIDsFromD3D10KHR(
+-            object_, 
+-            d3d_device_source, 
+-            d3d_object,
+-            d3d_device_set,
+-            n, 
+-            ids, 
+-            NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+-        }
+-
+-        devices->assign(&ids[0], &ids[n]);
+-        return CL_SUCCESS;
+-    }
+-#endif
+-
+-    static cl_int get(
+-        VECTOR_CLASS<Platform>* platforms)
+-    {
+-        cl_uint n = 0;
+-        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+-        }
+-
+-        cl_platform_id* ids = (cl_platform_id*) alloca(
+-            n * sizeof(cl_platform_id));
+-        err = ::clGetPlatformIDs(n, ids, NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+-        }
+-
+-        platforms->assign(&ids[0], &ids[n]);
+-        return CL_SUCCESS;
+-    }
+-};
+-
+-static inline cl_int
+-UnloadCompiler()
+-{
+-    return ::clUnloadCompiler();
+-}
+-
+-class Context : public detail::Wrapper<cl_context>
+-{
+-public:
+-    Context(
+-        const VECTOR_CLASS<Device>& devices,
+-        cl_context_properties* properties = NULL,
+-        void (CL_CALLBACK * notifyFptr)(
+-            const char *,
+-            const void *,
+-            ::size_t,
+-            void *) = NULL,
+-        void* data = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateContext(
+-            properties, (cl_uint) devices.size(),
+-            (cl_device_id*) &devices.front(),
+-            notifyFptr, data, &error);
+-
+-        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Context(
+-        cl_device_type type,
+-        cl_context_properties* properties = NULL,
+-        void (CL_CALLBACK * notifyFptr)(
+-            const char *,
+-            const void *,
+-            ::size_t,
+-            void *) = NULL,
+-        void* data = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateContextFromType(
+-            properties, type, notifyFptr, data, &error);
+-
+-        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Context() : detail::Wrapper<cl_type>() { }
+-
+-    Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
+-
+-    Context& operator = (const Context& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_context_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetContextInfo, object_, name, param),
+-            __GET_CONTEXT_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_context_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_context_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    cl_int getSupportedImageFormats(
+-        cl_mem_flags flags,
+-        cl_mem_object_type type,
+-        VECTOR_CLASS<ImageFormat>* formats) const
+-    {
+-        cl_uint numEntries;
+-        cl_int err = ::clGetSupportedImageFormats(
+-           object_, 
+-           flags,
+-           type, 
+-           0, 
+-           NULL, 
+-           &numEntries);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+-        }
+-
+-        ImageFormat* value = (ImageFormat*)
+-            alloca(numEntries * sizeof(ImageFormat));
+-        err = ::clGetSupportedImageFormats(
+-            object_, 
+-            flags, 
+-            type, 
+-            numEntries,
+-            (cl_image_format*) value, 
+-            NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+-        }
+-
+-        formats->assign(&value[0], &value[numEntries]);
+-        return CL_SUCCESS;
+-    }
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Context)
+-
+-/*! \class Event
+- * \brief Event interface for cl_event.
+- */
+-class Event : public detail::Wrapper<cl_event>
+-{
+-public:
+-    Event() : detail::Wrapper<cl_type>() { }
+-
+-    Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
+-
+-    Event& operator = (const Event& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_event_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetEventInfo, object_, name, param),
+-            __GET_EVENT_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_event_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_event_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    template <typename T>
+-    cl_int getProfilingInfo(cl_profiling_info name, T* param) const
+-    {
+-        return detail::errHandler(detail::getInfo(
+-            &::clGetEventProfilingInfo, object_, name, param),
+-            __GET_EVENT_PROFILE_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_profiling_info, name>::param_type
+-    getProfilingInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_profiling_info, name>::param_type param;
+-        cl_int result = getProfilingInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    cl_int wait() const
+-    {
+-        return detail::errHandler(
+-            ::clWaitForEvents(1, &object_),
+-            __WAIT_FOR_EVENTS_ERR);
+-    }
+-
+-#if defined(CL_VERSION_1_1)
+-    cl_int setCallback(
+-        cl_int type,
+-        void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),		
+-        void * user_data = NULL)
+-    {
+-        return detail::errHandler(
+-            ::clSetEventCallback(
+-                object_,
+-                type,
+-                pfn_notify,
+-                user_data), 
+-            __SET_EVENT_CALLBACK_ERR);
+-    }
+-#endif
+-
+-    static cl_int
+-    waitForEvents(const VECTOR_CLASS<Event>& events)
+-    {
+-        return detail::errHandler(
+-            ::clWaitForEvents(
+-                (cl_uint) events.size(), (cl_event*)&events.front()),
+-            __WAIT_FOR_EVENTS_ERR);
+-    }
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Event)
+-
+-#if defined(CL_VERSION_1_1)
+-/*! \class UserEvent
+- * \brief User event interface for cl_event.
+- */
+-class UserEvent : public Event
+-{
+-public:
+-    UserEvent(
+-        const Context& context,
+-        cl_int * err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateUserEvent(
+-            context(),
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_USER_EVENT_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    UserEvent() : Event() { }
+-
+-    UserEvent(const UserEvent& event) : Event(event) { }
+-
+-    UserEvent& operator = (const UserEvent& rhs)
+-    {
+-        if (this != &rhs) {
+-            Event::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    cl_int setStatus(cl_int status)
+-    {
+-        return detail::errHandler(
+-            ::clSetUserEventStatus(object_,status), 
+-            __SET_USER_EVENT_STATUS_ERR);
+-    }
+-};
+-#endif
+-
+-inline static cl_int
+-WaitForEvents(const VECTOR_CLASS<Event>& events)
+-{
+-    return detail::errHandler(
+-        ::clWaitForEvents(
+-            (cl_uint) events.size(), (cl_event*)&events.front()),
+-        __WAIT_FOR_EVENTS_ERR);
+-}
+-
+-/*! \class Memory
+- * \brief Memory interface for cl_mem.
+- */
+-class Memory : public detail::Wrapper<cl_mem>
+-{
+-public:
+-    Memory() : detail::Wrapper<cl_type>() { }
+-
+-    Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
+-
+-    Memory& operator = (const Memory& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_mem_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
+-            __GET_MEM_OBJECT_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_mem_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_mem_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-#if defined(CL_VERSION_1_1)
+-    cl_int setDestructorCallback(
+-        void (CL_CALLBACK * pfn_notify)(cl_mem, void *),		
+-        void * user_data = NULL)
+-    {
+-        return detail::errHandler(
+-            ::clSetMemObjectDestructorCallback(
+-                object_,
+-                pfn_notify,
+-                user_data), 
+-            __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
+-    }
+-#endif
+-
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Memory)
+-
+-/*! \class Buffer
+- * \brief Memory buffer interface.
+- */
+-class Buffer : public Memory
+-{
+-public:
+-    Buffer(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        ::size_t size,
+-        void* host_ptr = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+-
+-        detail::errHandler(error, __CREATE_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Buffer() : Memory() { }
+-
+-    Buffer(const Buffer& buffer) : Memory(buffer) { }
+-
+-    Buffer& operator = (const Buffer& rhs)
+-    {
+-        if (this != &rhs) {
+-            Memory::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-#if defined(CL_VERSION_1_1)
+-    Buffer createSubBuffer(
+-        cl_mem_flags flags,
+-        cl_buffer_create_type buffer_create_type,
+-        const void * buffer_create_info,
+-        cl_int * err = NULL)
+-    {
+-        Buffer result;
+-        cl_int error;
+-        result.object_ = ::clCreateSubBuffer(
+-            object_, 
+-            flags, 
+-            buffer_create_type, 
+-            buffer_create_info, 
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-
+-        return result;
+-	}		
+-#endif
+-};
+-
+-#if defined (USE_DX_INTEROP)
+-class BufferD3D10 : public Buffer
+-{
+-public:
+-    typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
+-    cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
+-    cl_int* errcode_ret);
+-
+-    BufferD3D10(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        ID3D10Buffer* bufobj,
+-        cl_int * err = NULL)
+-    {
+-        static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
+-        __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
+-
+-        cl_int error;
+-        object_ = pfn_clCreateFromD3D10BufferKHR(
+-            context(),
+-            flags,
+-            bufobj,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    BufferD3D10() : Buffer() { }
+-
+-    BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
+-
+-    BufferD3D10& operator = (const BufferD3D10& rhs)
+-    {
+-        if (this != &rhs) {
+-            Buffer::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-};
+-#endif
+-
+-/*! \class BufferGL
+- * \brief Memory buffer interface for GL interop.
+- */
+-class BufferGL : public Buffer
+-{
+-public:
+-    BufferGL(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        GLuint bufobj,
+-        cl_int * err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateFromGLBuffer(
+-            context(),
+-            flags,
+-            bufobj,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    BufferGL() : Buffer() { }
+-
+-    BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
+-
+-    BufferGL& operator = (const BufferGL& rhs)
+-    {
+-        if (this != &rhs) {
+-            Buffer::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    cl_int getObjectInfo(
+-        cl_gl_object_type *type,
+-        GLuint * gl_object_name)
+-    {
+-        return detail::errHandler(
+-            ::clGetGLObjectInfo(object_,type,gl_object_name),
+-            __GET_GL_OBJECT_INFO_ERR);
+-    }
+-};
+-
+-/*! \class BufferRenderGL
+- * \brief Memory buffer interface for GL interop with renderbuffer.
+- */
+-class BufferRenderGL : public Buffer
+-{
+-public:
+-    BufferRenderGL(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        GLuint bufobj,
+-        cl_int * err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateFromGLRenderbuffer(
+-            context(),
+-            flags,
+-            bufobj,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    BufferRenderGL() : Buffer() { }
+-
+-    BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
+-
+-    BufferRenderGL& operator = (const BufferRenderGL& rhs)
+-    {
+-        if (this != &rhs) {
+-            Buffer::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    cl_int getObjectInfo(
+-        cl_gl_object_type *type,
+-        GLuint * gl_object_name)
+-    {
+-        return detail::errHandler(
+-            ::clGetGLObjectInfo(object_,type,gl_object_name),
+-            __GET_GL_OBJECT_INFO_ERR);
+-    }
+-};
+-
+-/*! \class Image
+- * \brief Base class  interface for all images.
+- */
+-class Image : public Memory
+-{
+-protected:
+-    Image() : Memory() { }
+-
+-    Image(const Image& image) : Memory(image) { }
+-
+-    Image& operator = (const Image& rhs)
+-    {
+-        if (this != &rhs) {
+-            Memory::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-public:
+-    template <typename T>
+-    cl_int getImageInfo(cl_image_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetImageInfo, object_, name, param),
+-            __GET_IMAGE_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_image_info, name>::param_type
+-    getImageInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_image_info, name>::param_type param;
+-        cl_int result = getImageInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-};
+-
+-/*! \class Image2D
+- * \brief Image interface for 2D images.
+- */
+-class Image2D : public Image
+-{
+-public:
+-    Image2D(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        ImageFormat format,
+-        ::size_t width,
+-        ::size_t height,
+-        ::size_t row_pitch = 0,
+-        void* host_ptr = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateImage2D(
+-            context(), flags,&format, width, height, row_pitch, host_ptr, &error);
+-
+-        detail::errHandler(error, __CREATE_IMAGE2D_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Image2D() { }
+-
+-    Image2D(const Image2D& image2D) : Image(image2D) { }
+-
+-    Image2D& operator = (const Image2D& rhs)
+-    {
+-        if (this != &rhs) {
+-            Image::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-};
+-
+-/*! \class Image2DGL
+- * \brief 2D image interface for GL interop.
+- */
+-class Image2DGL : public Image2D
+-{
+-public:
+-    Image2DGL(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        GLenum target,
+-        GLint  miplevel,
+-        GLuint texobj,
+-        cl_int * err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateFromGLTexture2D(
+-            context(),
+-            flags,
+-            target,
+-            miplevel,
+-            texobj,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Image2DGL() : Image2D() { }
+-
+-    Image2DGL(const Image2DGL& image) : Image2D(image) { }
+-
+-    Image2DGL& operator = (const Image2DGL& rhs)
+-    {
+-        if (this != &rhs) {
+-            Image2D::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-};
+-
+-/*! \class Image3D
+- * \brief Image interface for 3D images.
+- */
+-class Image3D : public Image
+-{
+-public:
+-    Image3D(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        ImageFormat format,
+-        ::size_t width,
+-        ::size_t height,
+-        ::size_t depth,
+-        ::size_t row_pitch = 0,
+-        ::size_t slice_pitch = 0,
+-        void* host_ptr = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateImage3D(
+-            context(), flags, &format, width, height, depth, row_pitch,
+-            slice_pitch, host_ptr, &error);
+-
+-        detail::errHandler(error, __CREATE_IMAGE3D_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Image3D() { }
+-
+-    Image3D(const Image3D& image3D) : Image(image3D) { }
+-
+-    Image3D& operator = (const Image3D& rhs)
+-    {
+-        if (this != &rhs) {
+-            Image::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-};
+-
+-/*! \class Image2DGL
+- * \brief 2D image interface for GL interop.
+- */
+-class Image3DGL : public Image3D
+-{
+-public:
+-    Image3DGL(
+-        const Context& context,
+-        cl_mem_flags flags,
+-        GLenum target,
+-        GLint  miplevel,
+-        GLuint texobj,
+-        cl_int * err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateFromGLTexture3D(
+-            context(),
+-            flags,
+-            target,
+-            miplevel,
+-            texobj,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Image3DGL() : Image3D() { }
+-
+-    Image3DGL(const Image3DGL& image) : Image3D(image) { }
+-
+-    Image3DGL& operator = (const Image3DGL& rhs)
+-    {
+-        if (this != &rhs) {
+-            Image3D::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-};
+-
+-/*! \class Sampler
+- * \brief Sampler interface for cl_sampler.
+- */
+-class Sampler : public detail::Wrapper<cl_sampler>
+-{
+-public:
+-    Sampler() { }
+-
+-    Sampler(
+-        const Context& context,
+-        cl_bool normalized_coords,
+-        cl_addressing_mode addressing_mode,
+-        cl_filter_mode filter_mode,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateSampler(
+-            context(), 
+-            normalized_coords,
+-            addressing_mode,
+-            filter_mode,
+-            &error);
+-
+-        detail::errHandler(error, __CREATE_SAMPLER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+-
+-    Sampler& operator = (const Sampler& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_sampler_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetSamplerInfo, object_, name, param),
+-            __GET_SAMPLER_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_sampler_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_sampler_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler)
+-
+-class Program;
+-class CommandQueue;
+-class Kernel;
+-
+-/*! \class NDRange
+- * \brief NDRange interface
+- */
+-class NDRange
+-{
+-private:
+-    size_t<3> sizes_;
+-    cl_uint dimensions_;
+-
+-public:
+-    NDRange()
+-        : dimensions_(0)
+-    { }
+-
+-    NDRange(::size_t size0)
+-        : dimensions_(1)
+-    {
+-        sizes_.push_back(size0);
+-    }
+-
+-    NDRange(::size_t size0, ::size_t size1)
+-        : dimensions_(2)
+-    {
+-        sizes_.push_back(size0);
+-        sizes_.push_back(size1);
+-    }
+-
+-    NDRange(::size_t size0, ::size_t size1, ::size_t size2)
+-        : dimensions_(3)
+-    {
+-        sizes_.push_back(size0);
+-        sizes_.push_back(size1);
+-        sizes_.push_back(size2);
+-    }
+-
+-    operator const ::size_t*() const { return (const ::size_t*) sizes_; }
+-    ::size_t dimensions() const { return dimensions_; }
+-};
+-
+-static const NDRange NullRange;
+-
+-/*!
+- * \struct LocalSpaceArg
+- * \brief Local address raper for use with Kernel::setArg
+- */
+-struct LocalSpaceArg
+-{
+-    ::size_t size_;
+-};
+-
+-namespace detail {
+-
+-template <typename T>
+-struct KernelArgumentHandler
+-{
+-    static ::size_t size(const T&) { return sizeof(T); }
+-    static T* ptr(T& value) { return &value; }
+-};
+-
+-template <>
+-struct KernelArgumentHandler<LocalSpaceArg>
+-{
+-    static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
+-    static void* ptr(LocalSpaceArg&) { return NULL; }
+-};
+-
+-} 
+-//! \endcond
+-
+-inline LocalSpaceArg
+-__local(::size_t size)
+-{
+-    LocalSpaceArg ret = { size };
+-    return ret;
+-}
+-
+-class KernelFunctor;
+-
+-/*! \class Kernel
+- * \brief Kernel interface that implements cl_kernel
+- */
+-class Kernel : public detail::Wrapper<cl_kernel>
+-{
+-public:
+-    inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
+-
+-    Kernel() { }
+-
+-    Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+-
+-    Kernel& operator = (const Kernel& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_kernel_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetKernelInfo, object_, name, param),
+-            __GET_KERNEL_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_kernel_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_kernel_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    template <typename T>
+-    cl_int getWorkGroupInfo(
+-        const Device& device, cl_kernel_work_group_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(
+-                &::clGetKernelWorkGroupInfo, object_, device(), name, param),
+-                __GET_KERNEL_WORK_GROUP_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
+-        getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-        detail::cl_kernel_work_group_info, name>::param_type param;
+-        cl_int result = getWorkGroupInfo(device, name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    template <typename T>
+-    cl_int setArg(cl_uint index, T value)
+-    {
+-        return detail::errHandler(
+-            ::clSetKernelArg(
+-                object_,
+-                index,
+-                detail::KernelArgumentHandler<T>::size(value),
+-                detail::KernelArgumentHandler<T>::ptr(value)),
+-            __SET_KERNEL_ARGS_ERR);
+-    }
+-
+-    cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
+-    {
+-        return detail::errHandler(
+-            ::clSetKernelArg(object_, index, size, argPtr),
+-            __SET_KERNEL_ARGS_ERR);
+-    }
+-
+-    KernelFunctor bind(
+-        const CommandQueue& queue,
+-        const NDRange& offset,
+-        const NDRange& global,
+-        const NDRange& local);
+-
+-    KernelFunctor bind(
+-        const CommandQueue& queue,
+-        const NDRange& global,
+-        const NDRange& local);
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel)
+-
+-/*! \class Program
+- * \brief Program interface that implements cl_program.
+- */
+-class Program : public detail::Wrapper<cl_program>
+-{
+-public:
+-    typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
+-    typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
+-
+-    Program(
+-        const Context& context,
+-        const Sources& sources,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-
+-        const ::size_t n = (::size_t)sources.size();
+-        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+-        const char** strings = (const char**) alloca(n * sizeof(const char*));
+-
+-        for (::size_t i = 0; i < n; ++i) {
+-            strings[i] = sources[(int)i].first;
+-            lengths[i] = sources[(int)i].second;
+-        }
+-
+-        object_ = ::clCreateProgramWithSource(
+-            context(), (cl_uint)n, strings, lengths, &error);
+-
+-        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Program(
+-        const Context& context,
+-        const VECTOR_CLASS<Device>& devices,
+-        const Binaries& binaries,
+-        VECTOR_CLASS<cl_int>* binaryStatus = NULL,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        const ::size_t n = binaries.size();
+-        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+-        const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*));
+-
+-        for (::size_t i = 0; i < n; ++i) {
+-            images[i] = (const unsigned char*)binaries[(int)i].first;
+-            lengths[i] = binaries[(int)i].second;
+-        }
+-
+-        object_ = ::clCreateProgramWithBinary(
+-            context(), (cl_uint) devices.size(),
+-            (cl_device_id*)&devices.front(),
+-            lengths, images, binaryStatus != NULL
+-               ? (cl_int*) &binaryStatus->front()
+-               : NULL, &error);
+-
+-        detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    Program() { }
+-
+-    Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
+-
+-    Program& operator = (const Program& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    cl_int build(
+-        const VECTOR_CLASS<Device>& devices,
+-        const char* options = NULL,
+-        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+-        void* data = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clBuildProgram(
+-                object_,
+-                (cl_uint)
+-                devices.size(),
+-                (cl_device_id*)&devices.front(),
+-                options,
+-                notifyFptr,
+-                data),
+-                __BUILD_PROGRAM_ERR);
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_program_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(&::clGetProgramInfo, object_, name, param),
+-            __GET_PROGRAM_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_program_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_program_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    template <typename T>
+-    cl_int getBuildInfo(
+-        const Device& device, cl_program_build_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(
+-                &::clGetProgramBuildInfo, object_, device(), name, param),
+-                __GET_PROGRAM_BUILD_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_program_build_info, name>::param_type
+-    getBuildInfo(const Device& device, cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_program_build_info, name>::param_type param;
+-        cl_int result = getBuildInfo(device, name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
+-    {
+-        cl_uint numKernels;
+-        cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+-        }
+-
+-        Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
+-        err = ::clCreateKernelsInProgram(
+-            object_, numKernels, (cl_kernel*) value, NULL);
+-        if (err != CL_SUCCESS) {
+-            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+-        }
+-
+-        kernels->assign(&value[0], &value[numKernels]);
+-        return CL_SUCCESS;
+-    }
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::Program)
+-
+-inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
+-{
+-    cl_int error;
+-
+-    object_ = ::clCreateKernel(program(), name, &error);
+-    detail::errHandler(error, __CREATE_KERNEL_ERR);
+-
+-    if (err != NULL) {
+-        *err = error;
+-    }
+-
+-}
+-
+-/*! \class CommandQueue
+- * \brief CommandQueue interface for cl_command_queue.
+- */
+-class CommandQueue : public detail::Wrapper<cl_command_queue>
+-{
+-public:
+-    CommandQueue(
+-        const Context& context,
+-        const Device& device,
+-        cl_command_queue_properties properties = 0,
+-        cl_int* err = NULL)
+-    {
+-        cl_int error;
+-        object_ = ::clCreateCommandQueue(
+-            context(), device(), properties, &error);
+-
+-        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-    }
+-
+-    CommandQueue() { }
+-
+-    CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+-
+-    CommandQueue& operator = (const CommandQueue& rhs)
+-    {
+-        if (this != &rhs) {
+-            detail::Wrapper<cl_type>::operator=(rhs);
+-        }
+-        return *this;
+-    }
+-
+-    template <typename T>
+-    cl_int getInfo(cl_command_queue_info name, T* param) const
+-    {
+-        return detail::errHandler(
+-            detail::getInfo(
+-                &::clGetCommandQueueInfo, object_, name, param),
+-                __GET_COMMAND_QUEUE_INFO_ERR);
+-    }
+-
+-    template <cl_int name> typename
+-    detail::param_traits<detail::cl_command_queue_info, name>::param_type
+-    getInfo(cl_int* err = NULL) const
+-    {
+-        typename detail::param_traits<
+-            detail::cl_command_queue_info, name>::param_type param;
+-        cl_int result = getInfo(name, &param);
+-        if (err != NULL) {
+-            *err = result;
+-        }
+-        return param;
+-    }
+-
+-    cl_int enqueueReadBuffer(
+-        const Buffer& buffer,
+-        cl_bool blocking,
+-        ::size_t offset,
+-        ::size_t size,
+-        void* ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueReadBuffer(
+-                object_, buffer(), blocking, offset, size,
+-                ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_READ_BUFFER_ERR);
+-    }
+-
+-    cl_int enqueueWriteBuffer(
+-        const Buffer& buffer,
+-        cl_bool blocking,
+-        ::size_t offset,
+-        ::size_t size,
+-        const void* ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueWriteBuffer(
+-                object_, buffer(), blocking, offset, size,
+-                ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-                __ENQUEUE_WRITE_BUFFER_ERR);
+-    }
+-
+-    cl_int enqueueCopyBuffer(
+-        const Buffer& src,
+-        const Buffer& dst,
+-        ::size_t src_offset,
+-        ::size_t dst_offset,
+-        ::size_t size,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueCopyBuffer(
+-                object_, src(), dst(), src_offset, dst_offset, size,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQEUE_COPY_BUFFER_ERR);
+-    }
+-
+-#if defined(CL_VERSION_1_1)
+-    cl_int enqueueReadBufferRect(
+-        const Buffer& buffer,
+-        cl_bool blocking,
+-        const size_t<3>& buffer_offset,
+-        const size_t<3>& host_offset,
+-        const size_t<3>& region,
+-        ::size_t buffer_row_pitch,
+-        ::size_t buffer_slice_pitch,
+-        ::size_t host_row_pitch,
+-        ::size_t host_slice_pitch,
+-        void *ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueReadBufferRect(
+-                object_, 
+-                buffer(), 
+-                blocking, 
+-                (const ::size_t *)buffer_offset,
+-                (const ::size_t *)host_offset,
+-                (const ::size_t *)region,
+-                buffer_row_pitch,
+-                buffer_slice_pitch,
+-                host_row_pitch,
+-                host_slice_pitch,
+-                ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-                __ENQUEUE_READ_BUFFER_RECT_ERR);
+-    }
+-
+-
+-    cl_int enqueueWriteBufferRect(
+-        const Buffer& buffer,
+-        cl_bool blocking,
+-        const size_t<3>& buffer_offset,
+-        const size_t<3>& host_offset,
+-        const size_t<3>& region,
+-        ::size_t buffer_row_pitch,
+-        ::size_t buffer_slice_pitch,
+-        ::size_t host_row_pitch,
+-        ::size_t host_slice_pitch,
+-        void *ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueWriteBufferRect(
+-                object_, 
+-                buffer(), 
+-                blocking, 
+-                (const ::size_t *)buffer_offset,
+-                (const ::size_t *)host_offset,
+-                (const ::size_t *)region,
+-                buffer_row_pitch,
+-                buffer_slice_pitch,
+-                host_row_pitch,
+-                host_slice_pitch,
+-                ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-                __ENQUEUE_WRITE_BUFFER_RECT_ERR);
+-    }
+-
+-    cl_int enqueueCopyBufferRect(
+-        const Buffer& src,
+-        const Buffer& dst,
+-        const size_t<3>& src_origin,
+-        const size_t<3>& dst_origin,
+-        const size_t<3>& region,
+-        ::size_t src_row_pitch,
+-        ::size_t src_slice_pitch,
+-        ::size_t dst_row_pitch,
+-        ::size_t dst_slice_pitch,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueCopyBufferRect(
+-                object_, 
+-                src(), 
+-                dst(), 
+-                (const ::size_t *)src_origin, 
+-                (const ::size_t *)dst_origin, 
+-                (const ::size_t *)region,
+-                src_row_pitch,
+-                src_slice_pitch,
+-                dst_row_pitch,
+-                dst_slice_pitch,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQEUE_COPY_BUFFER_RECT_ERR);
+-    }
+-#endif
+-
+-    cl_int enqueueReadImage(
+-        const Image& image,
+-        cl_bool blocking,
+-        const size_t<3>& origin,
+-        const size_t<3>& region,
+-        ::size_t row_pitch,
+-        ::size_t slice_pitch,
+-        void* ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueReadImage(
+-                object_, image(), blocking, (const ::size_t *) origin,
+-                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_READ_IMAGE_ERR);
+-    }
+-
+-    cl_int enqueueWriteImage(
+-        const Image& image,
+-        cl_bool blocking,
+-        const size_t<3>& origin,
+-        const size_t<3>& region,
+-        ::size_t row_pitch,
+-        ::size_t slice_pitch,
+-        void* ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueWriteImage(
+-                object_, image(), blocking, (const ::size_t *) origin,
+-                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_WRITE_IMAGE_ERR);
+-    }
+-
+-    cl_int enqueueCopyImage(
+-        const Image& src,
+-        const Image& dst,
+-        const size_t<3>& src_origin,
+-        const size_t<3>& dst_origin,
+-        const size_t<3>& region,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueCopyImage(
+-                object_, src(), dst(), (const ::size_t *) src_origin,
+-                (const ::size_t *)dst_origin, (const ::size_t *) region,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_COPY_IMAGE_ERR);
+-    }
+-
+-    cl_int enqueueCopyImageToBuffer(
+-        const Image& src,
+-        const Buffer& dst,
+-        const size_t<3>& src_origin,
+-        const size_t<3>& region,
+-        ::size_t dst_offset,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueCopyImageToBuffer(
+-                object_, src(), dst(), (const ::size_t *) src_origin,
+-                (const ::size_t *) region, dst_offset,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
+-    }
+-
+-    cl_int enqueueCopyBufferToImage(
+-        const Buffer& src,
+-        const Image& dst,
+-        ::size_t src_offset,
+-        const size_t<3>& dst_origin,
+-        const size_t<3>& region,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueCopyBufferToImage(
+-                object_, src(), dst(), src_offset,
+-                (const ::size_t *) dst_origin, (const ::size_t *) region,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
+-    }
+-
+-    void* enqueueMapBuffer(
+-        const Buffer& buffer,
+-        cl_bool blocking,
+-        cl_map_flags flags,
+-        ::size_t offset,
+-        ::size_t size,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL,
+-        cl_int* err = NULL) const
+-    {
+-        cl_int error;
+-        void * result = ::clEnqueueMapBuffer(
+-            object_, buffer(), blocking, flags, offset, size,
+-            (events != NULL) ? (cl_uint) events->size() : 0,
+-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-            (cl_event*) event,
+-            &error);
+-
+-        detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+-        if (err != NULL) {
+-            *err = error;
+-        }
+-        return result;
+-    }
+-
+-    void* enqueueMapImage(
+-        const Image& buffer,
+-        cl_bool blocking,
+-        cl_map_flags flags,
+-        const size_t<3>& origin,
+-        const size_t<3>& region,
+-        ::size_t * row_pitch,
+-        ::size_t * slice_pitch,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL,
+-        cl_int* err = NULL) const
+-    {
+-        cl_int error;
+-        void * result = ::clEnqueueMapImage(
+-            object_, buffer(), blocking, flags,
+-            (const ::size_t *) origin, (const ::size_t *) region,
+-            row_pitch, slice_pitch,
+-            (events != NULL) ? (cl_uint) events->size() : 0,
+-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-            (cl_event*) event,
+-            &error);
+-
+-        detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
+-        if (err != NULL) {
+-              *err = error;
+-        }
+-        return result;
+-    }
+-
+-    cl_int enqueueUnmapMemObject(
+-        const Memory& memory,
+-        void* mapped_ptr,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueUnmapMemObject(
+-                object_, memory(), mapped_ptr,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+-    }
+-
+-    cl_int enqueueNDRangeKernel(
+-        const Kernel& kernel,
+-        const NDRange& offset,
+-        const NDRange& global,
+-        const NDRange& local,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueNDRangeKernel(
+-                object_, kernel(), (cl_uint) global.dimensions(),
+-                offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
+-                (const ::size_t*) global,
+-                local.dimensions() != 0 ? (const ::size_t*) local : NULL,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_NDRANGE_KERNEL_ERR);
+-    }
+-
+-    cl_int enqueueTask(
+-        const Kernel& kernel,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueTask(
+-                object_, kernel(),
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_TASK_ERR);
+-    }
+-
+-    cl_int enqueueNativeKernel(
+-        void (*userFptr)(void *),
+-        std::pair<void*, ::size_t> args,
+-        const VECTOR_CLASS<Memory>* mem_objects = NULL,
+-        const VECTOR_CLASS<const void*>* mem_locs = NULL,
+-        const VECTOR_CLASS<Event>* events = NULL,
+-        Event* event = NULL) const
+-    {
+-        cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) 
+-            ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem))
+-            : NULL;
+-
+-        if (mems != NULL) {
+-            for (unsigned int i = 0; i < mem_objects->size(); i++) {
+-                mems[i] = ((*mem_objects)[i])();
+-            }
+-        }
+-
+-        return detail::errHandler(
+-            ::clEnqueueNativeKernel(
+-                object_, userFptr, args.first, args.second,
+-                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+-                mems,
+-                (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_NATIVE_KERNEL);
+-    }
+-
+-    cl_int enqueueMarker(Event* event = NULL) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueMarker(object_, (cl_event*) event),
+-            __ENQUEUE_MARKER_ERR);
+-    }
+-
+-    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueWaitForEvents(
+-                object_,
+-                (cl_uint) events.size(),
+-                (const cl_event*) &events.front()),
+-            __ENQUEUE_WAIT_FOR_EVENTS_ERR);
+-    }
+-
+-    cl_int enqueueAcquireGLObjects(
+-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+-         const VECTOR_CLASS<Event>* events = NULL,
+-         Event* event = NULL) const
+-     {
+-         return detail::errHandler(
+-             ::clEnqueueAcquireGLObjects(
+-                 object_,
+-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+-                 (events != NULL) ? (cl_uint) events->size() : 0,
+-                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                 (cl_event*) event),
+-             __ENQUEUE_ACQUIRE_GL_ERR);
+-     }
+-
+-    cl_int enqueueReleaseGLObjects(
+-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+-         const VECTOR_CLASS<Event>* events = NULL,
+-         Event* event = NULL) const
+-     {
+-         return detail::errHandler(
+-             ::clEnqueueReleaseGLObjects(
+-                 object_,
+-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+-                 (events != NULL) ? (cl_uint) events->size() : 0,
+-                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+-                 (cl_event*) event),
+-             __ENQUEUE_RELEASE_GL_ERR);
+-     }
+-
+-#if defined (USE_DX_INTEROP)
+-typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
+-    cl_command_queue command_queue, cl_uint num_objects,
+-    const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
+-    const cl_event* event_wait_list, cl_event* event);
+-typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
+-    cl_command_queue command_queue, cl_uint num_objects,
+-    const cl_mem* mem_objects,  cl_uint num_events_in_wait_list,
+-    const cl_event* event_wait_list, cl_event* event);
+-
+-    cl_int enqueueAcquireD3D10Objects(
+-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+-         const VECTOR_CLASS<Event>* events = NULL,
+-         Event* event = NULL) const
+-     {
+-         static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
+-         __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
+-		
+-         return detail::errHandler(
+-             pfn_clEnqueueAcquireD3D10ObjectsKHR(
+-                 object_,
+-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+-                 (events != NULL) ? (cl_uint) events->size() : 0,
+-                 (events != NULL) ? (cl_event*) &events->front() : NULL,
+-                 (cl_event*) event),
+-             __ENQUEUE_ACQUIRE_GL_ERR);
+-     }
+-
+-    cl_int enqueueReleaseD3D10Objects(
+-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+-         const VECTOR_CLASS<Event>* events = NULL,
+-         Event* event = NULL) const
+-    {
+-        static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
+-        __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
+-
+-        return detail::errHandler(
+-            pfn_clEnqueueReleaseD3D10ObjectsKHR(
+-                object_,
+-                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+-                (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+-                (events != NULL) ? (cl_uint) events->size() : 0,
+-                (events != NULL) ? (cl_event*) &events->front() : NULL,
+-                (cl_event*) event),
+-            __ENQUEUE_RELEASE_GL_ERR);
+-    }
+-#endif
+-
+-    cl_int enqueueBarrier() const
+-    {
+-        return detail::errHandler(
+-            ::clEnqueueBarrier(object_),
+-            __ENQUEUE_BARRIER_ERR);
+-    }
+-
+-    cl_int flush() const
+-    {
+-        return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
+-    }
+-
+-    cl_int finish() const
+-    {
+-        return detail::errHandler(::clFinish(object_), __FINISH_ERR);
+-    }
+-};
+-
+-__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue)
+-
+-/*! \class KernelFunctor
+- * \brief Kernel functor interface
+- *
+- * \note Currently only functors of zero to ten arguments are supported. It
+- * is straightforward to add more and a more general solution, similar to
+- * Boost.Lambda could be followed if required in the future.
+- */
+-class KernelFunctor
+-{
+-private:
+-    Kernel kernel_;
+-    CommandQueue queue_;
+-    NDRange offset_;
+-    NDRange global_;
+-    NDRange local_;
+-
+-    cl_int err_;
+-public:
+-    KernelFunctor() { }
+-
+-    KernelFunctor(
+-        const Kernel& kernel,
+-        const CommandQueue& queue,
+-        const NDRange& offset,
+-        const NDRange& global,
+-        const NDRange& local) :
+-            kernel_(kernel),
+-            queue_(queue),
+-            offset_(offset),
+-            global_(global),
+-            local_(local),
+-            err_(CL_SUCCESS)
+-    {}
+-
+-    KernelFunctor& operator=(const KernelFunctor& rhs);
+-
+-    KernelFunctor(const KernelFunctor& rhs);
+-
+-    cl_int getError() { return err_; }
+-
+-    inline Event operator()(const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<typename A1>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4, class A5>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4, class A5, class A6>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4,
+-             class A5, class A6, class A7>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6, 
+-        const A7& a7,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6, 
+-        const A7& a7, 
+-        const A8& a8,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6, 
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10,
+-             class A11>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10, 
+-        const A11& a11,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10,
+-             class A11, class A12>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10, 
+-        const A11& a11, 
+-        const A12& a12,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10,
+-             class A11, class A12, class A13>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10, 
+-        const A11& a11, 
+-        const A12& a12, 
+-        const A13& a13,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10,
+-             class A11, class A12, class A13, class A14>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10, 
+-        const A11& a11,
+-        const A12& a12, 
+-        const A13& a13, 
+-        const A14& a14,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-    
+-    template<class A1, class A2, class A3, class A4, class A5,
+-             class A6, class A7, class A8, class A9, class A10,
+-             class A11, class A12, class A13, class A14, class A15>
+-    inline Event operator()(
+-        const A1& a1, 
+-        const A2& a2, 
+-        const A3& a3, 
+-        const A4& a4, 
+-        const A5& a5, 
+-        const A6& a6,
+-        const A7& a7, 
+-        const A8& a8, 
+-        const A9& a9, 
+-        const A10& a10, 
+-        const A11& a11,
+-        const A12& a12, 
+-        const A13& a13, 
+-        const A14& a14, 
+-        const A15& a15,
+-        const VECTOR_CLASS<Event>* events = NULL);
+-};
+-
+-inline KernelFunctor Kernel::bind(
+-    const CommandQueue& queue,
+-    const NDRange& offset,
+-    const NDRange& global,
+-    const NDRange& local)
+-{
+-    return KernelFunctor(*this,queue,offset,global,local);
+-}
+-
+-inline KernelFunctor Kernel::bind(
+-    const CommandQueue& queue,
+-    const NDRange& global,
+-    const NDRange& local)
+-{
+-    return KernelFunctor(*this,queue,NullRange,global,local);
+-}
+-
+-inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs)
+-{
+-    if (this == &rhs) {
+-        return *this;
+-    }
+-    
+-    kernel_ = rhs.kernel_;
+-    queue_  = rhs.queue_;
+-    offset_ = rhs.offset_;
+-    global_ = rhs.global_;
+-    local_  = rhs.local_;
+-    
+-    return *this;
+-}
+-
+-inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) :
+-    kernel_(rhs.kernel_),
+-    queue_(rhs.queue_),
+-    offset_(rhs.offset_),
+-    global_(rhs.global_),
+-    local_(rhs.local_)
+-{
+-}
+-
+-Event KernelFunctor::operator()(const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4, typename A5>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4, typename A5,
+-         typename A6>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4,
+-         typename A5, typename A6, typename A7>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6, 
+-    const A7& a7,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4, typename A5,
+-         typename A6, typename A7, typename A8>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6, 
+-    const A7& a7, 
+-    const A8& a8,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4, typename A5,
+-         typename A6, typename A7, typename A8, typename A9>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5,
+-    const A6& a6, 
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<typename A1, typename A2, typename A3, typename A4, typename A5,
+-         typename A6, typename A7, typename A8, typename A9, typename A10>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<class A1, class A2, class A3, class A4, class A5,
+-         class A6, class A7, class A8, class A9, class A10,
+-         class A11>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10, 
+-    const A11& a11,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-    kernel_.setArg(10,a11);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<class A1, class A2, class A3, class A4, class A5,
+-         class A6, class A7, class A8, class A9, class A10,
+-         class A11, class A12>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10, 
+-    const A11& a11, 
+-    const A12& a12,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-    kernel_.setArg(10,a11);
+-    kernel_.setArg(11,a12);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<class A1, class A2, class A3, class A4, class A5,
+-         class A6, class A7, class A8, class A9, class A10,
+-         class A11, class A12, class A13>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10, 
+-    const A11& a11, 
+-    const A12& a12, 
+-    const A13& a13,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-    
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-    kernel_.setArg(10,a11);
+-    kernel_.setArg(11,a12);
+-    kernel_.setArg(12,a13);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<class A1, class A2, class A3, class A4, class A5,
+-         class A6, class A7, class A8, class A9, class A10,
+-         class A11, class A12, class A13, class A14>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5, 
+-    const A6& a6,
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10, 
+-    const A11& a11,
+-    const A12& a12, 
+-    const A13& a13, 
+-    const A14& a14,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-    
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-    kernel_.setArg(10,a11);
+-    kernel_.setArg(11,a12);
+-    kernel_.setArg(12,a13);
+-    kernel_.setArg(13,a14);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-template<class A1, class A2, class A3, class A4, class A5,
+-         class A6, class A7, class A8, class A9, class A10,
+-         class A11, class A12, class A13, class A14, class A15>
+-Event KernelFunctor::operator()(
+-    const A1& a1, 
+-    const A2& a2, 
+-    const A3& a3, 
+-    const A4& a4, 
+-    const A5& a5,
+-    const A6& a6, 
+-    const A7& a7, 
+-    const A8& a8, 
+-    const A9& a9, 
+-    const A10& a10, 
+-    const A11& a11,
+-    const A12& a12, 
+-    const A13& a13, 
+-    const A14& a14, 
+-    const A15& a15,
+-    const VECTOR_CLASS<Event>* events)
+-{
+-    Event event;
+-    
+-    kernel_.setArg(0,a1);
+-    kernel_.setArg(1,a2);
+-    kernel_.setArg(2,a3);
+-    kernel_.setArg(3,a4);
+-    kernel_.setArg(4,a5);
+-    kernel_.setArg(5,a6);
+-    kernel_.setArg(6,a7);
+-    kernel_.setArg(7,a8);
+-    kernel_.setArg(8,a9);
+-    kernel_.setArg(9,a10);
+-    kernel_.setArg(10,a11);
+-    kernel_.setArg(11,a12);
+-    kernel_.setArg(12,a13);
+-    kernel_.setArg(13,a14);
+-    kernel_.setArg(14,a15);
+-
+-    err_ = queue_.enqueueNDRangeKernel(
+-        kernel_,
+-        offset_,
+-        global_,
+-        local_,
+-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+-        &event);
+-
+-    return event;
+-}
+-
+-#undef __ERR_STR
+-#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+-#undef __GET_DEVICE_INFO_ERR
+-#undef __GET_PLATFORM_INFO_ERR
+-#undef __GET_DEVICE_IDS_ERR
+-#undef __GET_CONTEXT_INFO_ERR
+-#undef __GET_EVENT_INFO_ERR
+-#undef __GET_EVENT_PROFILE_INFO_ERR
+-#undef __GET_MEM_OBJECT_INFO_ERR
+-#undef __GET_IMAGE_INFO_ERR
+-#undef __GET_SAMPLER_INFO_ERR
+-#undef __GET_KERNEL_INFO_ERR
+-#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
+-#undef __GET_PROGRAM_INFO_ERR
+-#undef __GET_PROGRAM_BUILD_INFO_ERR
+-#undef __GET_COMMAND_QUEUE_INFO_ERR
+-
+-#undef __CREATE_CONTEXT_FROM_TYPE_ERR
+-#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
+-
+-#undef __CREATE_BUFFER_ERR
+-#undef __CREATE_SUBBUFFER_ERR
+-#undef __CREATE_IMAGE2D_ERR
+-#undef __CREATE_IMAGE3D_ERR
+-#undef __CREATE_SAMPLER_ERR
+-#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
+-
+-#undef __CREATE_USER_EVENT_ERR
+-#undef __SET_USER_EVENT_STATUS_ERR
+-#undef __SET_EVENT_CALLBACK_ERR
+-
+-#undef __WAIT_FOR_EVENTS_ERR
+-
+-#undef __CREATE_KERNEL_ERR
+-#undef __SET_KERNEL_ARGS_ERR
+-#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
+-#undef __CREATE_PROGRAM_WITH_BINARY_ERR
+-#undef __BUILD_PROGRAM_ERR
+-#undef __CREATE_KERNELS_IN_PROGRAM_ERR
+-
+-#undef __CREATE_COMMAND_QUEUE_ERR
+-#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
+-#undef __ENQUEUE_READ_BUFFER_ERR
+-#undef __ENQUEUE_WRITE_BUFFER_ERR
+-#undef __ENQUEUE_READ_BUFFER_RECT_ERR
+-#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
+-#undef __ENQEUE_COPY_BUFFER_ERR
+-#undef __ENQEUE_COPY_BUFFER_RECT_ERR
+-#undef __ENQUEUE_READ_IMAGE_ERR
+-#undef __ENQUEUE_WRITE_IMAGE_ERR
+-#undef __ENQUEUE_COPY_IMAGE_ERR
+-#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
+-#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
+-#undef __ENQUEUE_MAP_BUFFER_ERR
+-#undef __ENQUEUE_MAP_IMAGE_ERR
+-#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
+-#undef __ENQUEUE_NDRANGE_KERNEL_ERR
+-#undef __ENQUEUE_TASK_ERR
+-#undef __ENQUEUE_NATIVE_KERNEL
+-
+-#undef __UNLOAD_COMPILER_ERR
+-#endif //__CL_USER_OVERRIDE_ERROR_STRINGS
+-
+-#undef __GET_INFO_HELPER_WITH_RETAIN
+-
+-// Extensions
+-#undef __INIT_CL_EXT_FCN_PTR
+-#undef __CREATE_SUB_DEVICES
+-
+-#if defined(USE_CL_DEVICE_FISSION)
+-#undef __PARAM_NAME_DEVICE_FISSION
+-#endif // USE_CL_DEVICE_FISSION
+-
+-} // namespace cl
+-
+-#endif // CL_HPP_
++#include_next <CL/cl.hpp>
diff --git a/debian/patches/private b/debian/patches/private
index 81f26a0..99da0da 100644
--- a/debian/patches/private
+++ b/debian/patches/private
@@ -2,11 +2,11 @@ Description: Install as private library
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-05-21
 
-Index: beignet-0.1+git20130521+a7ea35c/backend/src/CMakeLists.txt
+Index: beignet-0.1+git20130619+42967d2/backend/src/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/backend/src/CMakeLists.txt	2013-05-21 10:38:35.571948408 +0200
-+++ beignet-0.1+git20130521+a7ea35c/backend/src/CMakeLists.txt	2013-05-21 10:44:32.000000000 +0200
-@@ -120,6 +120,6 @@
+--- beignet-0.1+git20130619+42967d2.orig/backend/src/CMakeLists.txt	2013-06-19 21:04:23.346667404 +0200
++++ beignet-0.1+git20130619+42967d2/backend/src/CMakeLists.txt	2013-06-19 21:04:36.470666819 +0200
+@@ -121,6 +121,6 @@
                        ${CMAKE_THREAD_LIBS_INIT}
                        ${CMAKE_DL_LIBS})
  
@@ -14,10 +14,10 @@ Index: beignet-0.1+git20130521+a7ea35c/backend/src/CMakeLists.txt
 +install (TARGETS gbe LIBRARY DESTINATION lib/beignet)
  install (FILES backend/program.h DESTINATION include/gen)
  
-Index: beignet-0.1+git20130521+a7ea35c/src/CMakeLists.txt
+Index: beignet-0.1+git20130619+42967d2/src/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130521+a7ea35c.orig/src/CMakeLists.txt	2013-05-21 10:38:35.571948408 +0200
-+++ beignet-0.1+git20130521+a7ea35c/src/CMakeLists.txt	2013-05-21 10:45:20.603930350 +0200
+--- beignet-0.1+git20130619+42967d2.orig/src/CMakeLists.txt	2013-06-19 21:04:23.346667404 +0200
++++ beignet-0.1+git20130619+42967d2/src/CMakeLists.txt	2013-06-19 21:04:36.470666819 +0200
 @@ -47,6 +47,8 @@
  
  SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic")
diff --git a/debian/patches/series b/debian/patches/series
index 9971719..3b9d91f 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,8 +1,16 @@
 debug
 flags
 khronos
+deprecated-in-utest
 private
 0001-Generate-all-supported-as_-functions.patch
 0002-Define-all-convert_-functions.patch
 0003-Add-long-and-ulong-types-to-generated-functions.patch
 0004-Add-vector-argument-test-case.patch
+0005-Fix-several-typos-in-unit-test.patch
+0006-Support-64-bit-float.patch
+0007-test-case-for-64-bit-float.patch
+0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
+0010-Define-double-vector-types.patch
+0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
+0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
diff --git a/debian/source/include-binaries b/debian/source/include-binaries
new file mode 100644
index 0000000..3481d43
--- /dev/null
+++ b/debian/source/include-binaries
@@ -0,0 +1,152 @@
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/context.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen/gen_mesa_disasm.c.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_context.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_encoder.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_insn_scheduling.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_insn_selection.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_program.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_reg_allocation.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/program.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/constant.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/context.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/function.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/image.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/instruction.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/liveness.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/lowering.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/profile.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/register.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/sampler.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/type.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/unit.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/value.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_gen_backend.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_passes.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_scalarize.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_to_gen.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ocl_common_defines_str.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ocl_stdlib_str.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/alloc.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/assert.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/cvar.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/intrusive_list.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/mutex.cpp.o
+obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/platform.cpp.o
+obj-x86_64-linux-gnu/backend/src/libgbe.so
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_alloc.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_api.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_command_queue.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_command_queue_gen7.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_context.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_device_id.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_driver.cpp.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_driver_defs.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_event.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_extensions.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_image.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_kernel.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_khr_icd.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_mem.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_platform_id.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_program.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_sampler.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_batchbuffer.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_driver.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_gpgpu.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/x11/dricommon.c.o
+obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/x11/va_dri2.c.o
+obj-x86_64-linux-gnu/src/libcl.so
+obj-x86_64-linux-gnu/utests/CMakeFiles/flat_address_space.dir/runtime_flat_address_space.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utest_run.dir/utest_run.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/buildin_work_dim.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/builtin_global_size.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/cl_create_kernel.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_argument_structure.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_arith_shift_right.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array3.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur_float.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur_image.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_byte_scatter.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_ceil.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_cl_finish.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_convert_uchar_sat.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_buffer.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_buffer_row.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image_3d.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_displacement_map_element.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_double.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_double_2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fabs.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image_3d.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image_3d_2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_get_image_info.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_constant.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_constant_2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_memory_barrier.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_group_size.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_if_else.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insert_to_constant.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_masked_min_max.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_max.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_min.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_integer_division.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_integer_remainder.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_barrier.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_barrier_wg64.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_two_ptr.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_mandelbrot.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_mandelbrot_alternate.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_math.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_movforphi_undef.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_multiple_kernels.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_saturate.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_saturate_sub.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_shader_toy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_shift_right.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_short_scatter.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_sub_bytes.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_sub_shorts.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_switch.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint16_copy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint2_copy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint3_copy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint3_unaligned_copy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint8_copy.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch0.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch1.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch2.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch3.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_vector_load_store.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_volatile.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only_bytes.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only_shorts.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/runtime_createcontext.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/runtime_null_kernel_arg.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_assert.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_error.c.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_file_map.cpp.o
+obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_helper.cpp.o
+obj-x86_64-linux-gnu/utests/flat_address_space
+obj-x86_64-linux-gnu/utests/libutests.so
+obj-x86_64-linux-gnu/utests/utest_run

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-devel mailing list