[Pkg-opencl-devel] [beignet] 30/66: Imported Debian patch 0.1+git20130619+42967d2-1

Andreas Beckmann anbe at moszumanska.debian.org
Fri Oct 31 07:27:05 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit eacdf2b36a1c785a9478c6d80c46aa05b74ad4fa
Merge: 5fa177b b7629fb
Author: Simon Richter <sjr at debian.org>
Date:   Wed Jun 19 20:48:03 2013 +0200

    Imported Debian patch 0.1+git20130619+42967d2-1

 backend/src/backend/gen_context.cpp                |    13 +-
 backend/src/backend/gen_context.hpp                |     1 +
 backend/src/backend/gen_defs.hpp                   |    14 +
 backend/src/backend/gen_encoder.cpp                |     9 +
 backend/src/backend/gen_encoder.hpp                |     2 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |     1 +
 backend/src/backend/gen_insn_scheduling.cpp        |    20 +-
 backend/src/backend/gen_insn_selection.cpp         |    19 +-
 backend/src/backend/gen_insn_selection.hxx         |     1 +
 backend/src/ocl_stdlib.h                           |    29 +-
 debian/changelog                                   |     8 +
 debian/control                                     |     7 +-
 ...0001-Generate-all-supported-as_-functions.patch |    68 +-
 .../0002-Define-all-convert_-functions.patch       |    43 +-
 ...ng-and-ulong-types-to-generated-functions.patch |   141 +-
 .../0004-Add-vector-argument-test-case.patch       |    35 +-
 .../0005-Fix-several-typos-in-unit-test.patch      |    58 +
 debian/patches/0006-Support-64-bit-float.patch     |   842 ++
 .../patches/0007-test-case-for-64-bit-float.patch  |   159 +
 ...d-OpenCL-1.2-definitions-required-for-ICD.patch |    95 +
 ...hr_fp64-extension-for-OpenCL-stdlib-heade.patch |    33 +
 .../patches/0010-Define-double-vector-types.patch  |    23 +
 ...ration-of-convert_-and-as_-functions-for-.patch |  1430 +++
 .../0012-GBE-Fixed-one-bug-in-scalarize-pass.patch |    40 +
 debian/patches/debug                               |     6 +-
 debian/patches/deprecated-in-utest                 |    17 +
 debian/patches/flags                               |    12 +-
 debian/patches/khronos                             |  4724 ++++++++-
 debian/patches/private                             |    14 +-
 debian/patches/series                              |     8 +
 debian/source/include-binaries                     |   152 +
 include/CL/cl.h                                    |   364 +-
 include/CL/cl.hpp                                  | 10360 ++-----------------
 include/CL/cl_d3d10.h                              |     4 +-
 include/CL/cl_d3d11.h                              |   126 -
 include/CL/cl_d3d9.h                               |    98 -
 include/CL/cl_dx9_media_sharing.h                  |   127 -
 include/CL/cl_ext.h                                |    48 +-
 include/CL/cl_gl.h                                 |    90 +-
 include/CL/cl_gl_ext.h                             |     4 +-
 include/CL/cl_platform.h                           |    80 +-
 include/CL/opencl.h                                |     2 +-
 kernels/builtin_global_size.cl                     |     3 +
 kernels/compiler_function_constant0.cl             |     2 +-
 kernels/compiler_local_memory_barrier_2.cl         |     7 +
 src/cl_api.c                                       |   109 +-
 src/cl_device_id.c                                 |     4 +
 src/cl_mem.h                                       |    19 +
 src/cl_mem_gl.c                                    |     4 +
 src/cl_program.c                                   |    70 +-
 src/cl_program.h                                   |     4 +-
 src/intel/intel_gpgpu.c                            |     7 +
 utests/CMakeLists.txt                              |     4 +-
 utests/builtin_global_size.cpp                     |   108 +
 utests/compiler_box_blur_image.cpp                 |    11 +-
 utests/compiler_copy_image.cpp                     |    10 +-
 utests/compiler_copy_image1.cpp                    |    18 +-
 utests/compiler_copy_image_3d.cpp                  |    11 +-
 utests/compiler_fill_gl_image.cpp                  |     2 +-
 utests/compiler_fill_image.cpp                     |     7 +-
 utests/compiler_fill_image0.cpp                    |     7 +-
 utests/compiler_fill_image_3d.cpp                  |     9 +-
 utests/compiler_fill_image_3d_2.cpp                |     9 +-
 utests/compiler_function_constant0.cpp             |     6 +-
 utests/compiler_get_image_info.cpp                 |    12 +-
 utests/compiler_local_memory_barrier_2.cpp         |    29 +
 utests/compiler_movforphi_undef.cpp                |    10 +-
 utests/utest_helper.hpp                            |    12 +
 68 files changed, 8943 insertions(+), 10878 deletions(-)

diff --cc debian/changelog
index 37da78a,0000000..bc2b5ab
mode 100644,000000..100644
--- a/debian/changelog
+++ b/debian/changelog
@@@ -1,113 -1,0 +1,121 @@@
++beignet (0.1+git20130619+42967d2-1) unstable; urgency=low
++
++  * New upstream release
++  * Build against Mesa 9
++  * Enable GL sharing extension
++
++ -- Simon Richter <sjr at debian.org>  Wed, 19 Jun 2013 20:48:03 +0200
++
 +beignet (0.1+git20130614+89b5e40-2) unstable; urgency=low
 +
 +  * Add Ubuntu support
 +  * Upload to unstable to get an ICD capable package there
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 14 Jun 2013 17:40:45 +0200
 +
 +beignet (0.1+git20130614+89b5e40-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 14 Jun 2013 15:22:18 +0200
 +
 +beignet (0.1+git20130521+a7ea35c-1) experimental; urgency=low
 +
 +  * Rename binary package
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 21 May 2013 10:48:39 +0200
 +
 +beignet (0.1+git20130521+a7ea35c-1~prerename) experimental; urgency=low
 +
 +  * New upstream release
 +  * Move libraries to /usr/lib/beignet (should not be used directly)
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 21 May 2013 09:17:45 +0200
 +
 +beignet (0.1+git20130514+19e9c58-1) experimental; urgency=low
 +
 +  * New upstream release
 +  * Added a number of tentative patches
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 14 May 2013 20:04:29 +0200
 +
 +beignet (0.1+git20130502+63e60ed-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 06 May 2013 06:30:32 +0200
 +
 +beignet (0.1+git20130426+0c8f6fe-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 26 Apr 2013 14:42:21 +0200
 +
 +beignet (0.1+git20130422+003fac5-2) experimental; urgency=low
 +
 +  * Add patch for select()
 +  * Add patch for fmin() / fmax()
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 22 Apr 2013 18:26:01 +0200
 +
 +beignet (0.1+git20130422+003fac5-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 22 Apr 2013 15:10:54 +0200
 +
 +beignet (0.1+git20130419+9c11c18-1) experimental; urgency=low
 +
 +  * Add more functionality patches
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 19 Apr 2013 14:14:39 +0200
 +
 +beignet (0.1+git20130418+0546d2e-2) experimental; urgency=low
 +
 +  * Add functionality patches
 +  * Use clang 3.0 command line syntax
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 19 Apr 2013 09:53:23 +0200
 +
 +beignet (0.1+git20130418+0546d2e-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Thu, 18 Apr 2013 11:51:37 +0200
 +
 +beignet (0.1-1) unstable; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 16 Apr 2013 17:16:18 +0200
 +
 +beignet (0.0.0+git2013.04.11+e6b503e-1) unstable; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 15 Apr 2013 18:22:45 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-4) unstable; urgency=low
 +
 +  * Build fix for kfreebsd-*
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 12 Apr 2013 11:22:36 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-3) unstable; urgency=low
 +
 +  * Adjust Build-Depends, Architecture list
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 12 Apr 2013 10:32:36 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-2) unstable; urgency=low
 +
 +  * Add patch to support size queries in device info
 +
 + -- Simon Richter <sjr at debian.org>  Thu, 11 Apr 2013 14:00:59 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-1) unstable; urgency=low
 +
 +  * Initial release.
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 09 Apr 2013 17:14:00 +0200
diff --cc debian/control
index a426429,0000000..ca0d315
mode 100644,000000..100644
--- a/debian/control
+++ b/debian/control
@@@ -1,42 -1,0 +1,41 @@@
 +Source: beignet
 +Priority: extra
 +Maintainer: Simon Richter <sjr at debian.org>
 +Build-Depends: debhelper (>= 9), cmake, pkg-config,
 + ocl-icd-dev, ocl-icd-opencl-dev,
 + libdrm-dev, libxfixes-dev, libxext-dev,
 + llvm-dev (>= 1:3.2),
 + libclang-dev (>= 1:3.2) | libclang-dev (>= 3.2),
 + libclang-dev (>= 1:3.2) | libclang-dev (<< 1:0),
-  libclang-dev (>= 1:3.2) | libgl1-mesa-dev (>= 9),
-  libclang-dev (>= 1:3.2) | libegl1-mesa-dev (>= 9),
-  libclang-dev (>= 1:3.2) | libgbm-dev (>= 9)
- Build-Conflicts: libegl1-mesa-dev (<< 9), libgbm-dev (<< 9)
++ libgl1-mesa-dev (>= 9),
++ libegl1-mesa-dev (>= 9),
++ libgbm-dev (>= 9)
 +Standards-Version: 3.9.4
 +Section: libs
 +Homepage: http://cgit.freedesktop.org/beignet/
 +
 +Package: beignet-dev
 +Section: libdevel
 +Architecture: i386 amd64 kfreebsd-i386 kfreebsd-amd64
 +Depends: beignet (= ${binary:Version}), ${misc:Depends}
 +Description: Intel OpenCL library
 + OpenCL (Open Computing Language) is a multivendor open standard for
 + general-purpose parallel programming of heterogeneous systems that include
 + CPUs, GPUs and other processors.
 + .
 + This package contains the development files for directly linking against
 + the Intel implementation.
 +
 +Package: beignet
 +Section: libs
 +Architecture: i386 amd64 kfreebsd-i386 kfreebsd-amd64
 +Depends: ${shlibs:Depends}, ${misc:Depends}
 +Conflicts: beignet0.0.1
 +Replaces: beignet0.0.1
 +Provides: opencl-icd
 +Description: Intel OpenCL library
 + OpenCL (Open Computing Language) is a multivendor open standard for
 + general-purpose parallel programming of heterogeneous systems that include
 + CPUs, GPUs and other processors.
 + .
 + This package contains the shared library for the Intel implementation.
diff --cc debian/patches/0001-Generate-all-supported-as_-functions.patch
index 45276c2,0000000..d2a8b75
mode 100644,000000..100644
--- a/debian/patches/0001-Generate-all-supported-as_-functions.patch
+++ b/debian/patches/0001-Generate-all-supported-as_-functions.patch
@@@ -1,1307 -1,0 +1,1301 @@@
- From a1926ba22c15aee973d651d700fdc7b94cd8bf4d Mon Sep 17 00:00:00 2001
++From d276ed9d54e7026a777c80048a91e8dd078c4319 Mon Sep 17 00:00:00 2001
 +From: Simon Richter <Simon.Richter at hogyros.de>
 +Date: Mon, 13 May 2013 22:43:34 +0200
- Subject: [PATCH 1/4] Generate all supported as_* functions
++Subject: [PATCH 01/12] Generate all supported as_* functions
 +To: beignet at lists.freedesktop.org
 +
 +This adds support for all reinterpreting type conversions currently
 +possible.
 +
 +The conversion functions can be updated by invoking the
 +update_as.sh script.
 +---
 + backend/src/gen_as.sh    |   83 ++++
 + backend/src/genconfig.sh |   11 +
 + backend/src/ocl_stdlib.h | 1137 +++++++++++++++++++++++++++++++++++++++++++++-
 + backend/src/update.sh    |    2 +
 + backend/src/update_as.sh |   11 +
 + 5 files changed, 1237 insertions(+), 7 deletions(-)
 + create mode 100755 backend/src/gen_as.sh
 + create mode 100644 backend/src/genconfig.sh
 + create mode 100755 backend/src/update.sh
 + create mode 100755 backend/src/update_as.sh
 +
- diff --git a/backend/src/gen_as.sh b/backend/src/gen_as.sh
- new file mode 100755
- index 0000000..76fedf8
- --- /dev/null
- +++ b/backend/src/gen_as.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/gen_as.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/gen_as.sh	2013-06-19 21:04:37.838666758 +0200
 +@@ -0,0 +1,83 @@
 ++#! /bin/sh -e
 ++
 ++. ./genconfig.sh
 ++
 ++# Generate list of union sizes
 ++for type in $TYPES; do
 ++        size=`IFS=:; set -- dummy $type; echo $3`
 ++        for vector_length in $VECTOR_LENGTHS; do
 ++                union_sizes="$union_sizes `expr $vector_length \* $size`"
 ++        done
 ++done
 ++union_sizes="`echo $union_sizes | tr ' ' '\n' | sort -n | uniq`"
 ++
 ++# For each union size
 ++for union_size in $union_sizes; do
 ++
 ++        # Define an union that contains all vector types that have the same size as the union
 ++        unionname="union _type_cast_${union_size}_b"
 ++        echo "$unionname {"
 ++        for type in $TYPES; do
 ++                basetype=`IFS=:; set -- dummy $type; echo $2`
 ++                basesize=`IFS=:; set -- dummy $type; echo $3`
 ++                for vector_length in $VECTOR_LENGTHS; do
 ++                        vector_size_in_union="`expr $vector_length \* $basesize`"
 ++                        if test $union_size -ne $vector_size_in_union; then
 ++                                continue
 ++                        fi
 ++                        if test $vector_length -eq 1; then
 ++                                vectortype=$basetype
 ++                        else
 ++                                vectortype=$basetype$vector_length
 ++                        fi
 ++                        echo "  $vectortype _$vectortype;"
 ++                done
 ++                
 ++        done
 ++        echo "};"
 ++        echo
 ++
 ++        # For each tuple of vector types that has the same size as the current union size,
 ++        # define an as_* function that converts types without changing binary representation.
 ++        for ftype in $TYPES; do
 ++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
 ++                fbasesize=`IFS=:; set -- dummy $ftype; echo $3`
 ++                for fvector_length in $VECTOR_LENGTHS; do
 ++                        fvector_size_in_union="`expr $fvector_length \* $fbasesize`"
 ++                        if test $union_size -ne $fvector_size_in_union; then
 ++                                continue
 ++                        fi
 ++                        if test $fvector_length -eq 1; then
 ++                                fvectortype=$fbasetype
 ++                        else
 ++                                fvectortype=$fbasetype$fvector_length
 ++                        fi
 ++                        for ttype in $TYPES; do
 ++                                tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
 ++                                tbasesize=`IFS=:; set -- dummy $ttype; echo $3`
 ++                                if test $fbasetype = $tbasetype; then
 ++                                        continue
 ++                                fi
 ++                                for tvector_length in $VECTOR_LENGTHS; do
 ++                                        tvector_size_in_union="`expr $tvector_length \* $tbasesize`"
 ++                                        if test $union_size -ne $tvector_size_in_union; then
 ++                                                continue
 ++                                        fi
 ++                                        if test $tvector_length -eq 1; then
 ++                                                tvectortype=$tbasetype
 ++                                        else
 ++                                                tvectortype=$tbasetype$tvector_length
 ++                                        fi
 ++                                        echo "INLINE OVERLOADABLE $tvectortype as_$tvectortype($fvectortype v) {"
 ++                                        echo "  $unionname u;"
 ++                                        echo "  u._$fvectortype = v;"
 ++                                        echo "  return u._$tvectortype;"
 ++                                        echo "}"
 ++                                        echo
 ++                                done
 ++                        done
 ++                done
 ++                
 ++        done
 ++
 ++done
- diff --git a/backend/src/genconfig.sh b/backend/src/genconfig.sh
- new file mode 100644
- index 0000000..60edafd
- --- /dev/null
- +++ b/backend/src/genconfig.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:37.838666758 +0200
 +@@ -0,0 +1,11 @@
 ++#! /bin/false
 ++# This is to be sourced by the generation scripts
 ++
 ++# Supported base types and their lengths
 ++TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
 ++
 ++# Supported vector lengths
 ++VECTOR_LENGTHS="1 2 3 4 8 16"
 ++
 ++## No user serviceable parts below here
 ++
- diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
- index 46b81e1..d00de44 100644
- --- a/backend/src/ocl_stdlib.h
- +++ b/backend/src/ocl_stdlib.h
- @@ -91,15 +91,1138 @@ typedef size_t __event_t;
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:22.050667462 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:37.838666758 +0200
++@@ -91,15 +91,1138 @@
 + /////////////////////////////////////////////////////////////////////////////
 + // OpenCL conversions & type casting
 + /////////////////////////////////////////////////////////////////////////////
 +-union type_cast_4_b {
 +-  float f;
 +-  uchar4 u4;
++-};
++-uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
++-    union type_cast_4_b u;
++-    u.f = f;
++-    return u.u4;
 ++
 ++// ##BEGIN_AS##
 ++union _type_cast_1_b {
 ++  char _char;
 ++  uchar _uchar;
 ++};
 ++
 ++INLINE OVERLOADABLE uchar as_uchar(char v) {
 ++  union _type_cast_1_b u;
 ++  u._char = v;
 ++  return u._uchar;
 ++}
 ++
 ++INLINE OVERLOADABLE char as_char(uchar v) {
 ++  union _type_cast_1_b u;
 ++  u._uchar = v;
 ++  return u._char;
 ++}
 ++
 ++union _type_cast_2_b {
 ++  short _short;
 ++  ushort _ushort;
 ++  char2 _char2;
 ++  uchar2 _uchar2;
 ++};
 ++
 ++INLINE OVERLOADABLE ushort as_ushort(short v) {
 ++  union _type_cast_2_b u;
 ++  u._short = v;
 ++  return u._ushort;
 ++}
 ++
 ++INLINE OVERLOADABLE char2 as_char2(short v) {
 ++  union _type_cast_2_b u;
 ++  u._short = v;
 ++  return u._char2;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 as_uchar2(short v) {
 ++  union _type_cast_2_b u;
 ++  u._short = v;
 ++  return u._uchar2;
 ++}
 ++
 ++INLINE OVERLOADABLE short as_short(ushort v) {
 ++  union _type_cast_2_b u;
 ++  u._ushort = v;
 ++  return u._short;
 ++}
 ++
 ++INLINE OVERLOADABLE char2 as_char2(ushort v) {
 ++  union _type_cast_2_b u;
 ++  u._ushort = v;
 ++  return u._char2;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 as_uchar2(ushort v) {
 ++  union _type_cast_2_b u;
 ++  u._ushort = v;
 ++  return u._uchar2;
 ++}
 ++
 ++INLINE OVERLOADABLE short as_short(char2 v) {
 ++  union _type_cast_2_b u;
 ++  u._char2 = v;
 ++  return u._short;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort as_ushort(char2 v) {
 ++  union _type_cast_2_b u;
 ++  u._char2 = v;
 ++  return u._ushort;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 as_uchar2(char2 v) {
 ++  union _type_cast_2_b u;
 ++  u._char2 = v;
 ++  return u._uchar2;
 ++}
 ++
 ++INLINE OVERLOADABLE short as_short(uchar2 v) {
 ++  union _type_cast_2_b u;
 ++  u._uchar2 = v;
 ++  return u._short;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort as_ushort(uchar2 v) {
 ++  union _type_cast_2_b u;
 ++  u._uchar2 = v;
 ++  return u._ushort;
 ++}
 ++
 ++INLINE OVERLOADABLE char2 as_char2(uchar2 v) {
 ++  union _type_cast_2_b u;
 ++  u._uchar2 = v;
 ++  return u._char2;
 ++}
 ++
 ++union _type_cast_3_b {
 ++  char3 _char3;
 ++  uchar3 _uchar3;
 ++};
 ++
 ++INLINE OVERLOADABLE uchar3 as_uchar3(char3 v) {
 ++  union _type_cast_3_b u;
 ++  u._char3 = v;
 ++  return u._uchar3;
 ++}
 ++
 ++INLINE OVERLOADABLE char3 as_char3(uchar3 v) {
 ++  union _type_cast_3_b u;
 ++  u._uchar3 = v;
 ++  return u._char3;
 ++}
 ++
 ++union _type_cast_4_b {
 ++  int _int;
 ++  uint _uint;
 ++  short2 _short2;
 ++  ushort2 _ushort2;
 ++  char4 _char4;
 ++  uchar4 _uchar4;
 ++  float _float;
 ++};
 ++
 ++INLINE OVERLOADABLE uint as_uint(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._uchar4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(int v) {
 ++  union _type_cast_4_b u;
 ++  u._int = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._uchar4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(uint v) {
 ++  union _type_cast_4_b u;
 ++  u._uint = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE uint as_uint(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._uchar4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(short2 v) {
 ++  union _type_cast_4_b u;
 ++  u._short2 = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE uint as_uint(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._uchar4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(ushort2 v) {
 ++  union _type_cast_4_b u;
 ++  u._ushort2 = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE uint as_uint(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._uchar4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(char4 v) {
 ++  union _type_cast_4_b u;
 ++  u._char4 = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE uint as_uint(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE float as_float(uchar4 v) {
 ++  union _type_cast_4_b u;
 ++  u._uchar4 = v;
 ++  return u._float;
 ++}
 ++
 ++INLINE OVERLOADABLE int as_int(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._int;
 ++}
 ++
 ++INLINE OVERLOADABLE uint as_uint(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._uint;
 ++}
 ++
 ++INLINE OVERLOADABLE short2 as_short2(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._short2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 as_ushort2(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._ushort2;
 ++}
 ++
 ++INLINE OVERLOADABLE char4 as_char4(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._char4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 as_uchar4(float v) {
 ++  union _type_cast_4_b u;
 ++  u._float = v;
 ++  return u._uchar4;
 ++}
 ++
 ++union _type_cast_6_b {
 ++  short3 _short3;
 ++  ushort3 _ushort3;
 ++};
 ++
 ++INLINE OVERLOADABLE ushort3 as_ushort3(short3 v) {
 ++  union _type_cast_6_b u;
 ++  u._short3 = v;
 ++  return u._ushort3;
 ++}
 ++
 ++INLINE OVERLOADABLE short3 as_short3(ushort3 v) {
 ++  union _type_cast_6_b u;
 ++  u._ushort3 = v;
 ++  return u._short3;
 ++}
 ++
 ++union _type_cast_8_b {
 ++  int2 _int2;
 ++  uint2 _uint2;
 ++  short4 _short4;
 ++  ushort4 _ushort4;
 ++  char8 _char8;
 ++  uchar8 _uchar8;
 ++  float2 _float2;
 ++};
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._uchar8;
 ++}
 ++
 ++union _type_cast_12_b {
 ++  int3 _int3;
 ++  uint3 _uint3;
 ++  float3 _float3;
 ++};
 ++
 ++INLINE OVERLOADABLE uint3 as_uint3(int3 v) {
 ++  union _type_cast_12_b u;
 ++  u._int3 = v;
 ++  return u._uint3;
 ++}
 ++
 ++INLINE OVERLOADABLE float3 as_float3(int3 v) {
 ++  union _type_cast_12_b u;
 ++  u._int3 = v;
 ++  return u._float3;
 ++}
 ++
 ++INLINE OVERLOADABLE int3 as_int3(uint3 v) {
 ++  union _type_cast_12_b u;
 ++  u._uint3 = v;
 ++  return u._int3;
 ++}
 ++
 ++INLINE OVERLOADABLE float3 as_float3(uint3 v) {
 ++  union _type_cast_12_b u;
 ++  u._uint3 = v;
 ++  return u._float3;
 ++}
 ++
 ++INLINE OVERLOADABLE int3 as_int3(float3 v) {
 ++  union _type_cast_12_b u;
 ++  u._float3 = v;
 ++  return u._int3;
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 as_uint3(float3 v) {
 ++  union _type_cast_12_b u;
 ++  u._float3 = v;
 ++  return u._uint3;
 ++}
 ++
 ++union _type_cast_16_b {
 ++  int4 _int4;
 ++  uint4 _uint4;
 ++  short8 _short8;
 ++  ushort8 _ushort8;
 ++  char16 _char16;
 ++  uchar16 _uchar16;
 ++  float4 _float4;
 ++};
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++union _type_cast_32_b {
 ++  int8 _int8;
 ++  uint8 _uint8;
 ++  short16 _short16;
 ++  ushort16 _ushort16;
 ++  float8 _float8;
-  };
- -uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
- -    union type_cast_4_b u;
- -    u.f = f;
- -    return u.u4;
+++};
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._int8;
-  }
+++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._int8;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._int8;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._int8;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++union _type_cast_64_b {
 ++  int16 _int16;
 ++  uint16 _uint16;
 ++  float16 _float16;
 ++};
 ++
 ++INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
 ++  union _type_cast_64_b u;
 ++  u._int16 = v;
 ++  return u._uint16;
 ++}
 ++
 ++INLINE OVERLOADABLE float16 as_float16(int16 v) {
 ++  union _type_cast_64_b u;
 ++  u._int16 = v;
 ++  return u._float16;
 ++}
 ++
 ++INLINE OVERLOADABLE int16 as_int16(uint16 v) {
 ++  union _type_cast_64_b u;
 ++  u._uint16 = v;
 ++  return u._int16;
 ++}
 ++
 ++INLINE OVERLOADABLE float16 as_float16(uint16 v) {
 ++  union _type_cast_64_b u;
 ++  u._uint16 = v;
 ++  return u._float16;
 ++}
 ++
 ++INLINE OVERLOADABLE int16 as_int16(float16 v) {
 ++  union _type_cast_64_b u;
 ++  u._float16 = v;
 ++  return u._int16;
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
 ++  union _type_cast_64_b u;
 ++  u._float16 = v;
 ++  return u._uint16;
- +}
++ }
 ++
 ++// ##END_AS##
 ++
 + #define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
 +     return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
 +  }
- diff --git a/backend/src/update.sh b/backend/src/update.sh
- new file mode 100755
- index 0000000..4f9af8c
- --- /dev/null
- +++ b/backend/src/update.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/update.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/update.sh	2013-06-19 21:04:37.838666758 +0200
 +@@ -0,0 +1,2 @@
 ++#! /bin/sh -e
 ++./update_as.sh
- diff --git a/backend/src/update_as.sh b/backend/src/update_as.sh
- new file mode 100755
- index 0000000..54b4191
- --- /dev/null
- +++ b/backend/src/update_as.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/update_as.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/update_as.sh	2013-06-19 21:04:37.838666758 +0200
 +@@ -0,0 +1,11 @@
 ++#! /bin/sh -e
 ++
 ++STDLIB_HEADER=ocl_stdlib.h
 ++
 ++exec >$STDLIB_HEADER.tmp
 ++sed -n -e '1,/##BEGIN_AS##/p' $STDLIB_HEADER
 ++./gen_as.sh
 ++sed -n -e '/##END_AS##/,$p' $STDLIB_HEADER
 ++exec >&2
 ++
 ++mv $STDLIB_HEADER.tmp $STDLIB_HEADER
- -- 
- 1.7.10.4
- 
diff --cc debian/patches/0002-Define-all-convert_-functions.patch
index a0e5c20,0000000..9db6b85
mode 100644,000000..100644
--- a/debian/patches/0002-Define-all-convert_-functions.patch
+++ b/debian/patches/0002-Define-all-convert_-functions.patch
@@@ -1,980 -1,0 +1,975 @@@
- From 1900bf07f138edbf956e01618c304a7b10c59a9b Mon Sep 17 00:00:00 2001
++From 05cfdc1a307a209295263322121b63c5d244d613 Mon Sep 17 00:00:00 2001
 +From: Simon Richter <Simon.Richter at hogyros.de>
 +Date: Tue, 14 May 2013 17:04:56 +0200
- Subject: [PATCH 2/4] Define all convert_* functions.
++Subject: [PATCH 02/12] Define all convert_* functions.
 +To: beignet at lists.freedesktop.org
 +
 +These functions convert between vectors of the same length by casting each
 +member in turn.
 +---
 + backend/src/gen_convert.sh    |   52 +++
 + backend/src/ocl_stdlib.h      |  866 +++++++++++++++++++++++++++++++++++++++--
 + backend/src/update.sh         |    1 +
 + backend/src/update_convert.sh |   11 +
 + 4 files changed, 907 insertions(+), 23 deletions(-)
 + create mode 100755 backend/src/gen_convert.sh
 + create mode 100755 backend/src/update_convert.sh
 +
- diff --git a/backend/src/gen_convert.sh b/backend/src/gen_convert.sh
- new file mode 100755
- index 0000000..74fc73c
- --- /dev/null
- +++ b/backend/src/gen_convert.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/gen_convert.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/gen_convert.sh	2013-06-19 21:04:39.250666695 +0200
 +@@ -0,0 +1,52 @@
 ++#! /bin/sh -e
 ++
 ++. ./genconfig.sh
 ++
 ++# For all vector lengths and types, generate conversion functions
 ++for vector_length in $VECTOR_LENGTHS; do
 ++        if test $vector_length -eq 1; then
 ++                continue;
 ++        fi
 ++        for ftype in $TYPES; do
 ++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
 ++                for ttype in $TYPES; do
 ++                        tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
 ++                        if test $fbasetype = $tbasetype; then
 ++                                continue
 ++                        fi
 ++                        fvectortype=$fbasetype$vector_length
 ++                        tvectortype=$tbasetype$vector_length
 ++                        construct="($tbasetype)(v.s0)"
 ++                        if test $vector_length -gt 1; then
 ++                                construct="$construct, ($tbasetype)(v.s1)"
 ++                        fi
 ++                        if test $vector_length -gt 2; then
 ++                                construct="$construct, ($tbasetype)(v.s2)"
 ++                        fi
 ++                        if test $vector_length -gt 3; then
 ++                                construct="$construct, ($tbasetype)(v.s3)"
 ++                        fi
 ++                        if test $vector_length -gt 4; then
 ++                                construct="$construct, ($tbasetype)(v.s4)"
 ++                                construct="$construct, ($tbasetype)(v.s5)"
 ++                                construct="$construct, ($tbasetype)(v.s6)"
 ++                                construct="$construct, ($tbasetype)(v.s7)"
 ++                        fi
 ++                        if test $vector_length -gt 8; then
 ++                                construct="$construct, ($tbasetype)(v.s8)"
 ++                                construct="$construct, ($tbasetype)(v.s9)"
 ++                                construct="$construct, ($tbasetype)(v.sA)"
 ++                                construct="$construct, ($tbasetype)(v.sB)"
 ++                                construct="$construct, ($tbasetype)(v.sC)"
 ++                                construct="$construct, ($tbasetype)(v.sD)"
 ++                                construct="$construct, ($tbasetype)(v.sE)"
 ++                                construct="$construct, ($tbasetype)(v.sF)"
 ++                        fi
 ++                        
 ++                        echo "INLINE OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) {"
 ++                        echo "  return ($tvectortype)($construct);"
 ++                        echo "}"
 ++                        echo
 ++                done
 ++        done
 ++done
- diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
- index d00de44..913917f 100644
- --- a/backend/src/ocl_stdlib.h
- +++ b/backend/src/ocl_stdlib.h
- @@ -1223,29 +1223,849 @@ INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:37.838666758 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:39.250666695 +0200
++@@ -1223,29 +1223,849 @@
 + 
 + // ##END_AS##
 + 
 +-#define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
 +-    return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
 +- }
 +-#define DEF2(type) DEF(type, 4, char); \
 +-                   DEF(type, 4, uchar); \
 +-                   DEF(type, 4, short); \
 +-                   DEF(type, 4, ushort); \
 +-                   DEF(type, 4, int); \
 +-                   DEF(type, 4, uint); \
 +-                   DEF(type, 4, long); \
 +-                   DEF(type, 4, ulong); \
 +-                   DEF(type, 4, float);
 +-DEF2(char);
 +-DEF2(uchar);
 +-DEF2(short);
 +-DEF2(ushort);
 +-DEF2(int);
 +-DEF2(uint);
 +-DEF2(long);
 +-DEF2(ulong);
 +-DEF2(float);
 +-#undef DEF2
 +-#undef DEF
 ++// ##BEGIN_CONVERT##
 ++INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(int2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(int2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(int2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(int2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(int2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(uint2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(uint2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(uint2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(uint2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(short2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(short2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(short2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(short2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(short2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(short2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(ushort2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(ushort2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(ushort2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(ushort2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(char2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(char2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(char2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(char2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(char2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(char2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(uchar2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(uchar2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(uchar2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(uchar2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(float2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(float2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(int3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(int3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(int3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(int3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(int3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(uint3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(uint3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(uint3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(uint3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(short3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(short3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(short3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(short3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(short3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(short3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(ushort3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(ushort3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(ushort3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(ushort3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(char3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(char3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(char3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(char3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(char3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(char3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(uchar3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(uchar3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(uchar3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(uchar3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(float3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(float3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(float3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(float3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(float3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(int4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(int4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(int4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(int4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(int4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(uint4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(uint4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(uint4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(uint4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(short4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(short4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(short4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(short4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(short4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(short4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(ushort4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(ushort4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(ushort4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(ushort4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(char4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(char4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(char4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(char4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(char4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(char4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(uchar4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(uchar4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(uchar4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(uchar4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(float4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(float4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(float4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(float4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(float4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(int8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(int8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(int8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(int8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(int8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(uint8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(uint8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(uint8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(uint8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(short8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(short8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(short8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(short8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(short8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(short8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(ushort8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(ushort8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(ushort8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(ushort8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(char8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(char8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(char8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(char8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(char8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(char8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(uchar8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(uchar8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(uchar8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(uchar8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(float8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(float8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(float8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(float8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(float8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(int16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(int16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(int16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(int16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(int16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(uint16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(uint16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(uint16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(uint16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(short16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(short16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(short16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(short16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(short16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(short16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(ushort16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(ushort16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(ushort16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(ushort16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(char16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(char16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(char16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(char16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(char16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(char16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(uchar16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(uchar16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(uchar16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(uchar16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(float16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(float16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(float16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(float16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(float16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(float16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++// ##END_CONVERT##
 ++
 + /////////////////////////////////////////////////////////////////////////////
 + // OpenCL preprocessor directives & macros
 + /////////////////////////////////////////////////////////////////////////////
- diff --git a/backend/src/update.sh b/backend/src/update.sh
- index 4f9af8c..0e5f8c0 100755
- --- a/backend/src/update.sh
- +++ b/backend/src/update.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/update.sh
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/update.sh	2013-06-19 21:04:37.838666758 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/update.sh	2013-06-19 21:04:39.250666695 +0200
 +@@ -1,2 +1,3 @@
 + #! /bin/sh -e
 + ./update_as.sh
 ++./update_convert.sh
- diff --git a/backend/src/update_convert.sh b/backend/src/update_convert.sh
- new file mode 100755
- index 0000000..f1fcd36
- --- /dev/null
- +++ b/backend/src/update_convert.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/update_convert.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/backend/src/update_convert.sh	2013-06-19 21:04:39.250666695 +0200
 +@@ -0,0 +1,11 @@
 ++#! /bin/sh -e
 ++
 ++STDLIB_HEADER=ocl_stdlib.h
 ++
 ++exec >$STDLIB_HEADER.tmp
 ++sed -n -e '1,/##BEGIN_CONVERT##/p' $STDLIB_HEADER
 ++./gen_convert.sh
 ++sed -n -e '/##END_CONVERT##/,$p' $STDLIB_HEADER
 ++exec >&2
 ++
 ++mv $STDLIB_HEADER.tmp $STDLIB_HEADER
- -- 
- 1.7.10.4
- 
diff --cc debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
index 7e7f81d,0000000..83358df
mode 100644,000000..100644
--- a/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
+++ b/debian/patches/0003-Add-long-and-ulong-types-to-generated-functions.patch
@@@ -1,1725 -1,0 +1,1722 @@@
- From 44161ff1568479390464c0b0a282f5aeeb86915d Mon Sep 17 00:00:00 2001
++From f3c047e020d2e9f6d84cbcc10b2391d980572e3d Mon Sep 17 00:00:00 2001
 +From: Simon Richter <Simon.Richter at hogyros.de>
 +Date: Tue, 14 May 2013 17:04:57 +0200
- Subject: [PATCH 3/4] Add long and ulong types to generated functions.
++Subject: [PATCH 03/12] Add long and ulong types to generated functions.
 +To: beignet at lists.freedesktop.org
 +
 +This enables all generated functions for 64 bit integers.
 +---
 + backend/src/genconfig.sh |    2 +-
 + backend/src/ocl_stdlib.h | 1248 +++++++++++++++++++++++++++++++++++++++++++++-
 + 2 files changed, 1234 insertions(+), 16 deletions(-)
 +
- diff --git a/backend/src/genconfig.sh b/backend/src/genconfig.sh
- index 60edafd..a3ba3f9 100644
- --- a/backend/src/genconfig.sh
- +++ b/backend/src/genconfig.sh
++Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/genconfig.sh	2013-06-19 21:04:37.838666758 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:40.598666635 +0200
 +@@ -2,7 +2,7 @@
 + # This is to be sourced by the generation scripts
 + 
 + # Supported base types and their lengths
 +-TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
 ++TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
 + 
 + # Supported vector lengths
 + VECTOR_LENGTHS="1 2 3 4 8 16"
- diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
- index 913917f..2f55184 100644
- --- a/backend/src/ocl_stdlib.h
- +++ b/backend/src/ocl_stdlib.h
- @@ -486,6 +486,8 @@ INLINE OVERLOADABLE short3 as_short3(ushort3 v) {
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:39.250666695 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:40.602666635 +0200
++@@ -486,6 +486,8 @@
 + }
 + 
 + union _type_cast_8_b {
 ++  long _long;
 ++  ulong _ulong;
 +   int2 _int2;
 +   uint2 _uint2;
 +   short4 _short4;
- @@ -495,6 +497,114 @@ union _type_cast_8_b {
++@@ -495,6 +497,114 @@
 +   float2 _float2;
 + };
 + 
 ++INLINE OVERLOADABLE ulong as_ulong(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._ulong;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(long v) {
 ++  union _type_cast_8_b u;
 ++  u._long = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE long as_long(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE int2 as_int2(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._int2;
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 as_uint2(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._uint2;
 ++}
 ++
 ++INLINE OVERLOADABLE short4 as_short4(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._short4;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 as_ushort4(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._ushort4;
 ++}
 ++
 ++INLINE OVERLOADABLE char8 as_char8(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._char8;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 as_uchar8(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._uchar8;
 ++}
 ++
 ++INLINE OVERLOADABLE float2 as_float2(ulong v) {
 ++  union _type_cast_8_b u;
 ++  u._ulong = v;
 ++  return u._float2;
 ++}
 ++
 ++INLINE OVERLOADABLE long as_long(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(int2 v) {
 ++  union _type_cast_8_b u;
 ++  u._int2 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
 +   union _type_cast_8_b u;
 +   u._int2 = v;
- @@ -531,6 +641,18 @@ INLINE OVERLOADABLE float2 as_float2(int2 v) {
++@@ -531,6 +641,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(uint2 v) {
 ++  union _type_cast_8_b u;
 ++  u._uint2 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(uint2 v) {
 +   union _type_cast_8_b u;
 +   u._uint2 = v;
- @@ -567,6 +689,18 @@ INLINE OVERLOADABLE float2 as_float2(uint2 v) {
++@@ -567,6 +689,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(short4 v) {
 ++  union _type_cast_8_b u;
 ++  u._short4 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(short4 v) {
 +   union _type_cast_8_b u;
 +   u._short4 = v;
- @@ -603,6 +737,18 @@ INLINE OVERLOADABLE float2 as_float2(short4 v) {
++@@ -603,6 +737,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(ushort4 v) {
 ++  union _type_cast_8_b u;
 ++  u._ushort4 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
 +   union _type_cast_8_b u;
 +   u._ushort4 = v;
- @@ -639,6 +785,18 @@ INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
++@@ -639,6 +785,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(char8 v) {
 ++  union _type_cast_8_b u;
 ++  u._char8 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(char8 v) {
 +   union _type_cast_8_b u;
 +   u._char8 = v;
- @@ -675,6 +833,18 @@ INLINE OVERLOADABLE float2 as_float2(char8 v) {
++@@ -675,6 +833,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(uchar8 v) {
 ++  union _type_cast_8_b u;
 ++  u._uchar8 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
 +   union _type_cast_8_b u;
 +   u._uchar8 = v;
- @@ -711,6 +881,18 @@ INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
++@@ -711,6 +881,18 @@
 +   return u._float2;
 + }
 + 
 ++INLINE OVERLOADABLE long as_long(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._long;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong as_ulong(float2 v) {
 ++  union _type_cast_8_b u;
 ++  u._float2 = v;
 ++  return u._ulong;
 ++}
 ++
 + INLINE OVERLOADABLE int2 as_int2(float2 v) {
 +   union _type_cast_8_b u;
 +   u._float2 = v;
- @@ -790,6 +972,8 @@ INLINE OVERLOADABLE uint3 as_uint3(float3 v) {
++@@ -790,6 +972,8 @@
 + }
 + 
 + union _type_cast_16_b {
 ++  long2 _long2;
 ++  ulong2 _ulong2;
 +   int4 _int4;
 +   uint4 _uint4;
 +   short8 _short8;
- @@ -799,6 +983,114 @@ union _type_cast_16_b {
++@@ -799,6 +983,114 @@
 +   float4 _float4;
 + };
 + 
 ++INLINE OVERLOADABLE ulong2 as_ulong2(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._ulong2;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(long2 v) {
 ++  union _type_cast_16_b u;
 ++  u._long2 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE long2 as_long2(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE int4 as_int4(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._int4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 as_uint4(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._uint4;
 ++}
 ++
 ++INLINE OVERLOADABLE short8 as_short8(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._short8;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 as_ushort8(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._ushort8;
 ++}
 ++
 ++INLINE OVERLOADABLE char16 as_char16(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._char16;
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 as_uchar16(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._uchar16;
 ++}
 ++
 ++INLINE OVERLOADABLE float4 as_float4(ulong2 v) {
 ++  union _type_cast_16_b u;
 ++  u._ulong2 = v;
 ++  return u._float4;
 ++}
 ++
 ++INLINE OVERLOADABLE long2 as_long2(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(int4 v) {
 ++  union _type_cast_16_b u;
 ++  u._int4 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
 +   union _type_cast_16_b u;
 +   u._int4 = v;
- @@ -835,6 +1127,18 @@ INLINE OVERLOADABLE float4 as_float4(int4 v) {
++@@ -835,6 +1127,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(uint4 v) {
 ++  union _type_cast_16_b u;
 ++  u._uint4 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(uint4 v) {
 +   union _type_cast_16_b u;
 +   u._uint4 = v;
- @@ -871,6 +1175,18 @@ INLINE OVERLOADABLE float4 as_float4(uint4 v) {
++@@ -871,6 +1175,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(short8 v) {
 ++  union _type_cast_16_b u;
 ++  u._short8 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(short8 v) {
 +   union _type_cast_16_b u;
 +   u._short8 = v;
- @@ -907,6 +1223,18 @@ INLINE OVERLOADABLE float4 as_float4(short8 v) {
++@@ -907,6 +1223,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(ushort8 v) {
 ++  union _type_cast_16_b u;
 ++  u._ushort8 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
 +   union _type_cast_16_b u;
 +   u._ushort8 = v;
- @@ -943,6 +1271,18 @@ INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
++@@ -943,6 +1271,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(char16 v) {
 ++  union _type_cast_16_b u;
 ++  u._char16 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(char16 v) {
 +   union _type_cast_16_b u;
 +   u._char16 = v;
- @@ -979,6 +1319,18 @@ INLINE OVERLOADABLE float4 as_float4(char16 v) {
++@@ -979,6 +1319,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(uchar16 v) {
 ++  union _type_cast_16_b u;
 ++  u._uchar16 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
 +   union _type_cast_16_b u;
 +   u._uchar16 = v;
- @@ -1015,6 +1367,18 @@ INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
++@@ -1015,6 +1367,18 @@
 +   return u._float4;
 + }
 + 
 ++INLINE OVERLOADABLE long2 as_long2(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._long2;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 as_ulong2(float4 v) {
 ++  union _type_cast_16_b u;
 ++  u._float4 = v;
 ++  return u._ulong2;
 ++}
 ++
 + INLINE OVERLOADABLE int4 as_int4(float4 v) {
 +   union _type_cast_16_b u;
 +   u._float4 = v;
- @@ -1051,7 +1415,26 @@ INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) {
++@@ -1051,7 +1415,26 @@
 +   return u._uchar16;
 + }
 + 
 ++union _type_cast_24_b {
 ++  long3 _long3;
 ++  ulong3 _ulong3;
 ++};
 ++
 ++INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) {
 ++  union _type_cast_24_b u;
 ++  u._long3 = v;
 ++  return u._ulong3;
 ++}
 ++
 ++INLINE OVERLOADABLE long3 as_long3(ulong3 v) {
 ++  union _type_cast_24_b u;
 ++  u._ulong3 = v;
 ++  return u._long3;
 ++}
 ++
 + union _type_cast_32_b {
 ++  long4 _long4;
 ++  ulong4 _ulong4;
 +   int8 _int8;
 +   uint8 _uint8;
 +   short16 _short16;
- @@ -1059,30 +1442,126 @@ union _type_cast_32_b {
++@@ -1059,30 +1442,126 @@
 +   float8 _float8;
 + };
 + 
 +-INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
 ++INLINE OVERLOADABLE ulong4 as_ulong4(long4 v) {
 +   union _type_cast_32_b u;
 +-  u._int8 = v;
 ++  u._long4 = v;
 ++  return u._ulong4;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(long4 v) {
 ++  union _type_cast_32_b u;
 ++  u._long4 = v;
 ++  return u._int8;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(long4 v) {
 ++  union _type_cast_32_b u;
 ++  u._long4 = v;
 +   return u._uint8;
 + }
 + 
 +-INLINE OVERLOADABLE short16 as_short16(int8 v) {
 ++INLINE OVERLOADABLE short16 as_short16(long4 v) {
 +   union _type_cast_32_b u;
 +-  u._int8 = v;
 ++  u._long4 = v;
 +   return u._short16;
 + }
 + 
 +-INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
 ++INLINE OVERLOADABLE ushort16 as_ushort16(long4 v) {
 +   union _type_cast_32_b u;
 +-  u._int8 = v;
 ++  u._long4 = v;
 +   return u._ushort16;
 + }
 + 
 +-INLINE OVERLOADABLE float8 as_float8(int8 v) {
 ++INLINE OVERLOADABLE float8 as_float8(long4 v) {
 ++  union _type_cast_32_b u;
 ++  u._long4 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE long4 as_long4(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE int8 as_int8(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._int8;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(ulong4 v) {
 ++  union _type_cast_32_b u;
 ++  u._ulong4 = v;
 ++  return u._float8;
 ++}
 ++
 ++INLINE OVERLOADABLE long4 as_long4(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 as_ulong4(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._ulong4;
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._uint8;
 ++}
 ++
 ++INLINE OVERLOADABLE short16 as_short16(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._short16;
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
 ++  union _type_cast_32_b u;
 ++  u._int8 = v;
 ++  return u._ushort16;
 ++}
 ++
 ++INLINE OVERLOADABLE float8 as_float8(int8 v) {
 +   union _type_cast_32_b u;
 +   u._int8 = v;
 +   return u._float8;
 + }
 + 
 ++INLINE OVERLOADABLE long4 as_long4(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 as_ulong4(uint8 v) {
 ++  union _type_cast_32_b u;
 ++  u._uint8 = v;
 ++  return u._ulong4;
 ++}
 ++
 + INLINE OVERLOADABLE int8 as_int8(uint8 v) {
 +   union _type_cast_32_b u;
 +   u._uint8 = v;
- @@ -1107,6 +1586,18 @@ INLINE OVERLOADABLE float8 as_float8(uint8 v) {
++@@ -1107,6 +1586,18 @@
 +   return u._float8;
 + }
 + 
 ++INLINE OVERLOADABLE long4 as_long4(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 as_ulong4(short16 v) {
 ++  union _type_cast_32_b u;
 ++  u._short16 = v;
 ++  return u._ulong4;
 ++}
 ++
 + INLINE OVERLOADABLE int8 as_int8(short16 v) {
 +   union _type_cast_32_b u;
 +   u._short16 = v;
- @@ -1131,6 +1622,18 @@ INLINE OVERLOADABLE float8 as_float8(short16 v) {
++@@ -1131,6 +1622,18 @@
 +   return u._float8;
 + }
 + 
 ++INLINE OVERLOADABLE long4 as_long4(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 as_ulong4(ushort16 v) {
 ++  union _type_cast_32_b u;
 ++  u._ushort16 = v;
 ++  return u._ulong4;
 ++}
 ++
 + INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
 +   union _type_cast_32_b u;
 +   u._ushort16 = v;
- @@ -1155,6 +1658,18 @@ INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
++@@ -1155,6 +1658,18 @@
 +   return u._float8;
 + }
 + 
 ++INLINE OVERLOADABLE long4 as_long4(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._long4;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 as_ulong4(float8 v) {
 ++  union _type_cast_32_b u;
 ++  u._float8 = v;
 ++  return u._ulong4;
 ++}
 ++
 + INLINE OVERLOADABLE int8 as_int8(float8 v) {
 +   union _type_cast_32_b u;
 +   u._float8 = v;
- @@ -1180,11 +1695,73 @@ INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) {
++@@ -1180,11 +1695,73 @@
 + }
 + 
 + union _type_cast_64_b {
 ++  long8 _long8;
 ++  ulong8 _ulong8;
 +   int16 _int16;
 +   uint16 _uint16;
 +   float16 _float16;
 + };
 + 
 ++INLINE OVERLOADABLE ulong8 as_ulong8(long8 v) {
 ++  union _type_cast_64_b u;
 ++  u._long8 = v;
 ++  return u._ulong8;
 ++}
 ++
 ++INLINE OVERLOADABLE int16 as_int16(long8 v) {
 ++  union _type_cast_64_b u;
 ++  u._long8 = v;
 ++  return u._int16;
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 as_uint16(long8 v) {
 ++  union _type_cast_64_b u;
 ++  u._long8 = v;
 ++  return u._uint16;
 ++}
 ++
 ++INLINE OVERLOADABLE float16 as_float16(long8 v) {
 ++  union _type_cast_64_b u;
 ++  u._long8 = v;
 ++  return u._float16;
 ++}
 ++
 ++INLINE OVERLOADABLE long8 as_long8(ulong8 v) {
 ++  union _type_cast_64_b u;
 ++  u._ulong8 = v;
 ++  return u._long8;
 ++}
 ++
 ++INLINE OVERLOADABLE int16 as_int16(ulong8 v) {
 ++  union _type_cast_64_b u;
 ++  u._ulong8 = v;
 ++  return u._int16;
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 as_uint16(ulong8 v) {
 ++  union _type_cast_64_b u;
 ++  u._ulong8 = v;
 ++  return u._uint16;
 ++}
 ++
 ++INLINE OVERLOADABLE float16 as_float16(ulong8 v) {
 ++  union _type_cast_64_b u;
 ++  u._ulong8 = v;
 ++  return u._float16;
 ++}
 ++
 ++INLINE OVERLOADABLE long8 as_long8(int16 v) {
 ++  union _type_cast_64_b u;
 ++  u._int16 = v;
 ++  return u._long8;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 as_ulong8(int16 v) {
 ++  union _type_cast_64_b u;
 ++  u._int16 = v;
 ++  return u._ulong8;
 ++}
 ++
 + INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
 +   union _type_cast_64_b u;
 +   u._int16 = v;
- @@ -1197,6 +1774,18 @@ INLINE OVERLOADABLE float16 as_float16(int16 v) {
++@@ -1197,6 +1774,18 @@
 +   return u._float16;
 + }
 + 
 ++INLINE OVERLOADABLE long8 as_long8(uint16 v) {
 ++  union _type_cast_64_b u;
 ++  u._uint16 = v;
 ++  return u._long8;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 as_ulong8(uint16 v) {
 ++  union _type_cast_64_b u;
 ++  u._uint16 = v;
 ++  return u._ulong8;
 ++}
 ++
 + INLINE OVERLOADABLE int16 as_int16(uint16 v) {
 +   union _type_cast_64_b u;
 +   u._uint16 = v;
- @@ -1209,6 +1798,18 @@ INLINE OVERLOADABLE float16 as_float16(uint16 v) {
++@@ -1209,6 +1798,18 @@
 +   return u._float16;
 + }
 + 
 ++INLINE OVERLOADABLE long8 as_long8(float16 v) {
 ++  union _type_cast_64_b u;
 ++  u._float16 = v;
 ++  return u._long8;
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 as_ulong8(float16 v) {
 ++  union _type_cast_64_b u;
 ++  u._float16 = v;
 ++  return u._ulong8;
 ++}
 ++
 + INLINE OVERLOADABLE int16 as_int16(float16 v) {
 +   union _type_cast_64_b u;
 +   u._float16 = v;
- @@ -1221,9 +1822,98 @@ INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
++@@ -1221,9 +1822,98 @@
 +   return u._uint16;
 + }
 + 
 ++union _type_cast_128_b {
 ++  long16 _long16;
 ++  ulong16 _ulong16;
 ++};
 ++
 ++INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) {
 ++  union _type_cast_128_b u;
 ++  u._long16 = v;
 ++  return u._ulong16;
 ++}
 ++
 ++INLINE OVERLOADABLE long16 as_long16(ulong16 v) {
 ++  union _type_cast_128_b u;
 ++  u._ulong16 = v;
 ++  return u._long16;
 ++}
 ++
 + // ##END_AS##
 + 
 + // ##BEGIN_CONVERT##
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(long2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(long2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(long2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(long2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(long2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(long2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(long2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(long2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE long2 convert_long2(ulong2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE int2 convert_int2(ulong2 v) {
 ++  return (int2)((int)(v.s0), (int)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uint2 convert_uint2(ulong2 v) {
 ++  return (uint2)((uint)(v.s0), (uint)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE short2 convert_short2(ulong2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(ulong2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(ulong2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(ulong2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE float2 convert_float2(ulong2 v) {
 ++  return (float2)((float)(v.s0), (float)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE long2 convert_long2(int2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(int2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
 +   return (uint2)((uint)(v.s0), (uint)(v.s1));
 + }
- @@ -1248,6 +1938,14 @@ INLINE OVERLOADABLE float2 convert_float2(int2 v) {
++@@ -1248,6 +1938,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(uint2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(uint2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1272,6 +1970,14 @@ INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
++@@ -1272,6 +1970,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(short2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(short2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(short2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1296,6 +2002,14 @@ INLINE OVERLOADABLE float2 convert_float2(short2 v) {
++@@ -1296,6 +2002,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(ushort2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(ushort2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1320,6 +2034,14 @@ INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
++@@ -1320,6 +2034,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(char2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(char2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(char2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1344,6 +2066,14 @@ INLINE OVERLOADABLE float2 convert_float2(char2 v) {
++@@ -1344,6 +2066,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(uchar2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(uchar2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1368,6 +2098,14 @@ INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
++@@ -1368,6 +2098,14 @@
 +   return (float2)((float)(v.s0), (float)(v.s1));
 + }
 + 
 ++INLINE OVERLOADABLE long2 convert_long2(float2 v) {
 ++  return (long2)((long)(v.s0), (long)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong2 convert_ulong2(float2 v) {
 ++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
 ++}
 ++
 + INLINE OVERLOADABLE int2 convert_int2(float2 v) {
 +   return (int2)((int)(v.s0), (int)(v.s1));
 + }
- @@ -1376,20 +2114,92 @@ INLINE OVERLOADABLE uint2 convert_uint2(float2 v) {
++@@ -1376,20 +2114,92 @@
 +   return (uint2)((uint)(v.s0), (uint)(v.s1));
 + }
 + 
 +-INLINE OVERLOADABLE short2 convert_short2(float2 v) {
 +-  return (short2)((short)(v.s0), (short)(v.s1));
 ++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
 ++  return (short2)((short)(v.s0), (short)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
 ++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
 ++  return (char2)((char)(v.s0), (char)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
 ++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(long3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(long3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(long3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(long3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(long3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(long3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE float3 convert_float3(long3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE long3 convert_long3(ulong3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE int3 convert_int3(ulong3 v) {
 ++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uint3 convert_uint3(ulong3 v) {
 ++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE short3 convert_short3(ulong3 v) {
 ++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort3 convert_ushort3(ulong3 v) {
 ++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE char3 convert_char3(ulong3 v) {
 ++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar3 convert_uchar3(ulong3 v) {
 ++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 + }
 + 
 +-INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
 +-  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
 ++INLINE OVERLOADABLE float3 convert_float3(ulong3 v) {
 ++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 +-INLINE OVERLOADABLE char2 convert_char2(float2 v) {
 +-  return (char2)((char)(v.s0), (char)(v.s1));
 ++INLINE OVERLOADABLE long3 convert_long3(int3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 + }
 + 
 +-INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
 +-  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(int3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 + }
 + 
 + INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
- @@ -1416,6 +2226,14 @@ INLINE OVERLOADABLE float3 convert_float3(int3 v) {
++@@ -1416,6 +2226,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(uint3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(uint3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1440,6 +2258,14 @@ INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
++@@ -1440,6 +2258,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(short3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(short3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(short3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1464,6 +2290,14 @@ INLINE OVERLOADABLE float3 convert_float3(short3 v) {
++@@ -1464,6 +2290,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(ushort3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(ushort3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1488,6 +2322,14 @@ INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
++@@ -1488,6 +2322,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(char3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(char3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(char3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1512,6 +2354,14 @@ INLINE OVERLOADABLE float3 convert_float3(char3 v) {
++@@ -1512,6 +2354,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(uchar3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(uchar3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1536,6 +2386,14 @@ INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
++@@ -1536,6 +2386,14 @@
 +   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE long3 convert_long3(float3 v) {
 ++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong3 convert_ulong3(float3 v) {
 ++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
 ++}
 ++
 + INLINE OVERLOADABLE int3 convert_int3(float3 v) {
 +   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
 + }
- @@ -1560,6 +2418,78 @@ INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) {
++@@ -1560,6 +2418,78 @@
 +   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
 + }
 + 
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(long4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(long4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(long4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(long4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(long4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(long4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(long4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE long4 convert_long4(ulong4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE int4 convert_int4(ulong4 v) {
 ++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uint4 convert_uint4(ulong4 v) {
 ++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE short4 convert_short4(ulong4 v) {
 ++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort4 convert_ushort4(ulong4 v) {
 ++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE char4 convert_char4(ulong4 v) {
 ++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar4 convert_uchar4(ulong4 v) {
 ++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE float4 convert_float4(ulong4 v) {
 ++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE long4 convert_long4(int4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(int4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
 +   return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
 + }
- @@ -1584,6 +2514,14 @@ INLINE OVERLOADABLE float4 convert_float4(int4 v) {
++@@ -1584,6 +2514,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(uint4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(uint4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1608,6 +2546,14 @@ INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
++@@ -1608,6 +2546,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(short4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(short4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(short4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1632,6 +2578,14 @@ INLINE OVERLOADABLE float4 convert_float4(short4 v) {
++@@ -1632,6 +2578,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(ushort4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(ushort4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1656,6 +2610,14 @@ INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
++@@ -1656,6 +2610,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(char4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(char4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(char4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1680,6 +2642,14 @@ INLINE OVERLOADABLE float4 convert_float4(char4 v) {
++@@ -1680,6 +2642,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(uchar4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(uchar4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1704,6 +2674,14 @@ INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
++@@ -1704,6 +2674,14 @@
 +   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE long4 convert_long4(float4 v) {
 ++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong4 convert_ulong4(float4 v) {
 ++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
 ++}
 ++
 + INLINE OVERLOADABLE int4 convert_int4(float4 v) {
 +   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
 + }
- @@ -1728,6 +2706,78 @@ INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) {
++@@ -1728,6 +2706,78 @@
 +   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
 + }
 + 
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(long8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(long8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(long8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(long8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(long8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(long8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(long8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE long8 convert_long8(ulong8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE int8 convert_int8(ulong8 v) {
 ++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uint8 convert_uint8(ulong8 v) {
 ++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE short8 convert_short8(ulong8 v) {
 ++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort8 convert_ushort8(ulong8 v) {
 ++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE char8 convert_char8(ulong8 v) {
 ++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar8 convert_uchar8(ulong8 v) {
 ++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE float8 convert_float8(ulong8 v) {
 ++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE long8 convert_long8(int8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(int8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
 +   return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
 + }
- @@ -1752,6 +2802,14 @@ INLINE OVERLOADABLE float8 convert_float8(int8 v) {
++@@ -1752,6 +2802,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(uint8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(uint8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1776,6 +2834,14 @@ INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
++@@ -1776,6 +2834,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(short8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(short8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(short8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1800,6 +2866,14 @@ INLINE OVERLOADABLE float8 convert_float8(short8 v) {
++@@ -1800,6 +2866,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(ushort8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(ushort8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1824,6 +2898,14 @@ INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
++@@ -1824,6 +2898,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(char8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(char8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(char8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1848,6 +2930,14 @@ INLINE OVERLOADABLE float8 convert_float8(char8 v) {
++@@ -1848,6 +2930,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(uchar8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(uchar8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1872,6 +2962,14 @@ INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
++@@ -1872,6 +2962,14 @@
 +   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE long8 convert_long8(float8 v) {
 ++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong8 convert_ulong8(float8 v) {
 ++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
 ++}
 ++
 + INLINE OVERLOADABLE int8 convert_int8(float8 v) {
 +   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
 + }
- @@ -1896,6 +2994,78 @@ INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) {
++@@ -1896,6 +2994,78 @@
 +   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
 + }
 + 
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(long16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(long16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(long16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(long16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(long16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(long16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(long16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE long16 convert_long16(ulong16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE int16 convert_int16(ulong16 v) {
 ++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uint16 convert_uint16(ulong16 v) {
 ++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE short16 convert_short16(ulong16 v) {
 ++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ushort16 convert_ushort16(ulong16 v) {
 ++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE char16 convert_char16(ulong16 v) {
 ++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE uchar16 convert_uchar16(ulong16 v) {
 ++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE float16 convert_float16(ulong16 v) {
 ++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE long16 convert_long16(int16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(int16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
 +   return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
 + }
- @@ -1920,6 +3090,14 @@ INLINE OVERLOADABLE float16 convert_float16(int16 v) {
++@@ -1920,6 +3090,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(uint16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(uint16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- @@ -1944,6 +3122,14 @@ INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
++@@ -1944,6 +3122,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(short16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(short16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(short16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- @@ -1968,6 +3154,14 @@ INLINE OVERLOADABLE float16 convert_float16(short16 v) {
++@@ -1968,6 +3154,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(ushort16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(ushort16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- @@ -1992,6 +3186,14 @@ INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
++@@ -1992,6 +3186,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(char16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(char16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(char16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- @@ -2016,6 +3218,14 @@ INLINE OVERLOADABLE float16 convert_float16(char16 v) {
++@@ -2016,6 +3218,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(uchar16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(uchar16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- @@ -2040,6 +3250,14 @@ INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
++@@ -2040,6 +3250,14 @@
 +   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
 + }
 + 
 ++INLINE OVERLOADABLE long16 convert_long16(float16 v) {
 ++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
 ++}
 ++
 ++INLINE OVERLOADABLE ulong16 convert_ulong16(float16 v) {
 ++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
 ++}
 ++
 + INLINE OVERLOADABLE int16 convert_int16(float16 v) {
 +   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
 + }
- -- 
- 1.7.10.4
- 
diff --cc debian/patches/0004-Add-vector-argument-test-case.patch
index cf43d98,0000000..ed1564d
mode 100644,000000..100644
--- a/debian/patches/0004-Add-vector-argument-test-case.patch
+++ b/debian/patches/0004-Add-vector-argument-test-case.patch
@@@ -1,74 -1,0 +1,69 @@@
- From e9f476243902f2f3989c880030b267c384d7c040 Mon Sep 17 00:00:00 2001
++From b5563b40490e799465a597dab817c9e603c24028 Mon Sep 17 00:00:00 2001
 +From: Yang Rong <rong.r.yang at intel.com>
 +Date: Thu, 16 May 2013 12:36:35 +0800
- Subject: [PATCH 4/4] Add vector argument test case.
++Subject: [PATCH 04/12] Add vector argument test case.
 +To: beignet at lists.freedesktop.org
 +
 +Signed-off-by: Yang Rong <rong.r.yang at intel.com>
 +---
 + kernels/compiler_function_argument2.cl |    6 ++++++
 + utests/CMakeLists.txt                  |    1 +
 + utests/compiler_function_argument2.cpp |   26 ++++++++++++++++++++++++++
 + 3 files changed, 33 insertions(+)
 + create mode 100644 kernels/compiler_function_argument2.cl
 + create mode 100644 utests/compiler_function_argument2.cpp
 +
- diff --git a/kernels/compiler_function_argument2.cl b/kernels/compiler_function_argument2.cl
- new file mode 100644
- index 0000000..0985dbd
- --- /dev/null
- +++ b/kernels/compiler_function_argument2.cl
++Index: beignet-0.1+git20130619+42967d2/kernels/compiler_function_argument2.cl
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/kernels/compiler_function_argument2.cl	2013-06-19 21:04:43.270666516 +0200
 +@@ -0,0 +1,6 @@
 ++__kernel void
 ++compiler_function_argument2(__global int *dst, int4 value)
 ++{
 ++  int id = (int)get_global_id(0);
 ++  dst[id] = value.w;
 ++}
- diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
- index e5c03ee..f0bbe46 100644
- --- a/utests/CMakeLists.txt
- +++ b/utests/CMakeLists.txt
- @@ -34,6 +34,7 @@ set (utests_sources
++Index: beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/utests/CMakeLists.txt	2013-06-19 21:03:26.278669949 +0200
+++++ beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt	2013-06-19 21:04:43.270666516 +0200
++@@ -34,6 +34,7 @@
 +   compiler_fill_image_3d_2.cpp
 +   compiler_function_argument0.cpp
 +   compiler_function_argument1.cpp
 ++  compiler_function_argument2.cpp
 +   compiler_function_argument.cpp
 +   compiler_function_constant0.cpp
 +   compiler_function_constant1.cpp
- diff --git a/utests/compiler_function_argument2.cpp b/utests/compiler_function_argument2.cpp
- new file mode 100644
- index 0000000..1e398a9
- --- /dev/null
- +++ b/utests/compiler_function_argument2.cpp
++Index: beignet-0.1+git20130619+42967d2/utests/compiler_function_argument2.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/utests/compiler_function_argument2.cpp	2013-06-19 21:04:43.270666516 +0200
 +@@ -0,0 +1,26 @@
 ++#include "utest_helper.hpp"
 ++
 ++struct int4 {int x,y,z,w;};
 ++void compiler_function_argument2(void)
 ++{
 ++  const size_t n = 2048;
 ++  const int4 value = {31, 32, 33, 34};
 ++
 ++  // Setup kernel and buffers
 ++  OCL_CREATE_KERNEL("compiler_function_argument2");
 ++  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
 ++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
 ++  OCL_SET_ARG(1, sizeof(int4), &value);
 ++
 ++  // Run the kernel
 ++  globals[0] = n;
 ++  locals[0] = 16;
 ++  OCL_NDRANGE(1);
 ++  OCL_MAP_BUFFER(0);
 ++
 ++  // Check results
 ++  for (uint32_t i = 0; i < n; ++i)
 ++    OCL_ASSERT(((int*)buf_data[0])[i] == value.w);
 ++}
 ++
 ++MAKE_UTEST_FROM_FUNCTION(compiler_function_argument2);
- -- 
- 1.7.10.4
- 
diff --cc debian/patches/0005-Fix-several-typos-in-unit-test.patch
index 0000000,0000000..ac23650
new file mode 100644
--- /dev/null
+++ b/debian/patches/0005-Fix-several-typos-in-unit-test.patch
@@@ -1,0 -1,0 +1,58 @@@
++From 050e16612260137274a71e3abe3bbcf607cc2f86 Mon Sep 17 00:00:00 2001
++From: Ruiling Song <ruiling.song at intel.com>
++Date: Wed, 19 Jun 2013 10:04:54 +0800
++Subject: [PATCH 05/12] Fix several typos in unit test.
++To: beignet at lists.freedesktop.org
++
++compiler_sub_bytes and compiler_sub_shorts
++
++Signed-off-by: Ruiling Song <ruiling.song at intel.com>
++---
++ utests/compiler_sub_bytes.cpp  |    4 ++--
++ utests/compiler_sub_shorts.cpp |    4 ++--
++ 2 files changed, 4 insertions(+), 4 deletions(-)
++
++Index: beignet-0.1+git20130619+42967d2/utests/compiler_sub_bytes.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/utests/compiler_sub_bytes.cpp	2013-06-19 21:03:25.994669961 +0200
+++++ beignet-0.1+git20130619+42967d2/utests/compiler_sub_bytes.cpp	2013-06-19 21:04:44.686666453 +0200
++@@ -11,7 +11,7 @@
++   for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[0])[i] = (int8_t) rand();
++   for (uint32_t i = 0; i < n; ++i) ((int8_t*)buf_data[1])[i] = (int8_t) rand();
++   OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
++-  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[0]);
+++  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int8_t), buf_data[1]);
++   OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int8_t), NULL);
++ 
++   // Run the kernel
++@@ -25,7 +25,7 @@
++   // Check result
++   OCL_MAP_BUFFER(2);
++   for (uint32_t i = 0; i < n; ++i)
++-    OCL_ASSERT(((int8_t*)buf_data[2])[i] = ((int8_t*)buf_data[0])[i] - ((int8_t*)buf_data[1])[i]);
+++    OCL_ASSERT(((int8_t*)buf_data[2])[i] == (int8_t)(((int8_t*)buf_data[0])[i] - ((int8_t*)buf_data[1])[i]));
++   free(buf_data[0]);
++   free(buf_data[1]);
++   buf_data[0] = buf_data[1] = NULL;
++Index: beignet-0.1+git20130619+42967d2/utests/compiler_sub_shorts.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/utests/compiler_sub_shorts.cpp	2013-06-19 21:03:25.994669961 +0200
+++++ beignet-0.1+git20130619+42967d2/utests/compiler_sub_shorts.cpp	2013-06-19 21:04:44.686666453 +0200
++@@ -11,7 +11,7 @@
++   for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[0])[i] = (int16_t) rand();
++   for (uint32_t i = 0; i < n; ++i) ((int16_t*)buf_data[1])[i] = (int16_t) rand();
++   OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
++-  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[0]);
+++  OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(int16_t), buf_data[1]);
++   OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int16_t), NULL);
++ 
++   // Run the kernel
++@@ -25,7 +25,7 @@
++   // Check result
++   OCL_MAP_BUFFER(2);
++   for (uint32_t i = 0; i < n; ++i)
++-    OCL_ASSERT(((int16_t*)buf_data[2])[i] = ((int16_t*)buf_data[0])[i] - ((int16_t*)buf_data[1])[i]);
+++    OCL_ASSERT(((int16_t*)buf_data[2])[i] == (int16_t)(((int16_t*)buf_data[0])[i] - ((int16_t*)buf_data[1])[i]));
++   free(buf_data[0]);
++   free(buf_data[1]);
++   buf_data[0] = buf_data[1] = NULL;
diff --cc debian/patches/0006-Support-64-bit-float.patch
index 0000000,0000000..9ca7880
new file mode 100644
--- /dev/null
+++ b/debian/patches/0006-Support-64-bit-float.patch
@@@ -1,0 -1,0 +1,842 @@@
++From 3f27c4e6648ee4f98f27de6beaba713ee3c35985 Mon Sep 17 00:00:00 2001
++From: Homer Hsing <homer.xing at intel.com>
++Date: Wed, 19 Jun 2013 12:40:35 +0800
++Subject: [PATCH 06/12] Support 64-bit float
++To: beignet at lists.freedesktop.org
++
++support arithmetic, store, load, and 64-bit float immediate
++
++example:
++
++  kernel void f(global double *src, global double *dst) {
++    int i = get_global_id(0);
++    double d = 1.234567890123456789;
++    dst[i] = d * (src[i] + d);
++  }
++
++Signed-off-by: Homer Hsing <homer.xing at intel.com>
++---
++ backend/src/backend/gen_context.cpp                |   17 +++
++ backend/src/backend/gen_context.hpp                |    2 +
++ backend/src/backend/gen_defs.hpp                   |    2 +
++ backend/src/backend/gen_encoder.cpp                |  140 ++++++++++++++++++-
++ backend/src/backend/gen_encoder.hpp                |    6 +
++ .../src/backend/gen_insn_gen7_schedule_info.hxx    |    2 +
++ backend/src/backend/gen_insn_selection.cpp         |  146 ++++++++++++++++++--
++ backend/src/backend/gen_insn_selection.hxx         |    4 +
++ backend/src/backend/gen_reg_allocation.cpp         |    1 -
++ backend/src/backend/gen_register.hpp               |   90 +++++++++++-
++ 10 files changed, 393 insertions(+), 17 deletions(-)
++
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_context.cpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.cpp	2013-06-19 21:04:46.030666393 +0200
++@@ -139,6 +139,7 @@
++     const GenRegister src = ra->genReg(insn.src(0));
++     switch (insn.opcode) {
++       case SEL_OP_MOV: p->MOV(dst, src); break;
+++      case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src); break;
++       case SEL_OP_NOT: p->NOT(dst, src); break;
++       case SEL_OP_RNDD: p->RNDD(dst, src); break;
++       case SEL_OP_RNDU: p->RNDU(dst, src); break;
++@@ -153,6 +154,7 @@
++     const GenRegister src0 = ra->genReg(insn.src(0));
++     const GenRegister src1 = ra->genReg(insn.src(1));
++     switch (insn.opcode) {
+++      case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
++       case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
++       case SEL_OP_AND:  p->AND(dst, src0, src1); break;
++       case SEL_OP_OR:   p->OR (dst, src0, src1);  break;
++@@ -269,6 +271,14 @@
++     p->pop();
++   }
++ 
+++  void GenContext::emitReadFloat64Instruction(const SelectionInstruction &insn) {
+++    const GenRegister dst = ra->genReg(insn.dst(0));
+++    const GenRegister src = ra->genReg(insn.src(0));
+++    const uint32_t bti = insn.extra.function;
+++    const uint32_t elemNum = insn.extra.elem;
+++    p->READ_FLOAT64(dst, src, bti, elemNum);
+++  }
+++
++   void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) {
++     const GenRegister dst = ra->genReg(insn.dst(0));
++     const GenRegister src = ra->genReg(insn.src(0));
++@@ -277,6 +287,13 @@
++     p->UNTYPED_READ(dst, src, bti, elemNum);
++   }
++ 
+++  void GenContext::emitWriteFloat64Instruction(const SelectionInstruction &insn) {
+++    const GenRegister src = ra->genReg(insn.src(0));
+++    const uint32_t bti = insn.extra.function;
+++    const uint32_t elemNum = insn.extra.elem;
+++    p->WRITE_FLOAT64(src, bti, elemNum);
+++  }
+++
++   void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
++     const GenRegister src = ra->genReg(insn.src(0));
++     const uint32_t bti = insn.extra.function;
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_context.hpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_context.hpp	2013-06-19 21:04:46.030666393 +0200
++@@ -87,6 +87,8 @@
++     void emitBarrierInstruction(const SelectionInstruction &insn);
++     void emitFenceInstruction(const SelectionInstruction &insn);
++     void emitMathInstruction(const SelectionInstruction &insn);
+++    void emitReadFloat64Instruction(const SelectionInstruction &insn);
+++    void emitWriteFloat64Instruction(const SelectionInstruction &insn);
++     void emitUntypedReadInstruction(const SelectionInstruction &insn);
++     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
++     void emitByteGatherInstruction(const SelectionInstruction &insn);
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_defs.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_defs.hpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_defs.hpp	2013-06-19 21:04:46.030666393 +0200
++@@ -215,6 +215,7 @@
++ #define GEN_TYPE_VF  5 /* packed float vector, immediates only? */
++ #define GEN_TYPE_HF  6
++ #define GEN_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
+++#define GEN_TYPE_DF  6
++ #define GEN_TYPE_F   7
++ 
++ #define GEN_ARF_NULL                  0x00
++@@ -303,6 +304,7 @@
++ #define GEN_BYTE_SCATTER_BYTE   0
++ #define GEN_BYTE_SCATTER_WORD   1
++ #define GEN_BYTE_SCATTER_DWORD  2
+++#define GEN_BYTE_SCATTER_QWORD  3
++ 
++ #define GEN_SAMPLER_RETURN_FORMAT_FLOAT32     0
++ #define GEN_SAMPLER_RETURN_FORMAT_UINT32      2
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_encoder.cpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.cpp	2013-06-19 21:04:46.030666393 +0200
++@@ -355,6 +355,64 @@
++     0
++   };
++ 
+++  void GenEncoder::READ_FLOAT64(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+++    int w = curr.execWidth;
+++    GenRegister r = GenRegister::retype(GenRegister::suboffset(src, w*2), GEN_TYPE_UD);
+++    GenRegister hdr = GenRegister::h2(r);
+++    GenRegister imm4 = GenRegister::immud(4);
+++    push();
+++      curr.execWidth = 8;
+++      MOV(hdr,                            GenRegister::ud8grf(src.nr, 0));
+++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+++      if (w == 16) {
+++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(src.nr, 4));
+++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+++      }
+++    pop();
+++    UNTYPED_READ(dst, hdr, bti, 1);
+++    push();
+++      curr.execWidth = 8;
+++      MOV(hdr, w == 16 ? GenRegister::ud8grf(src.nr+1, 0) : GenRegister::retype(GenRegister::offset(src, 0, 16), GEN_TYPE_UD));
+++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+++      if (w == 16) {
+++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(src.nr + 1, 4));
+++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+++      }
+++    pop();
+++    UNTYPED_READ(GenRegister::offset(dst, w / 8), hdr, bti, 1);
+++  }
+++
+++  void GenEncoder::WRITE_FLOAT64(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+++    int w = curr.execWidth;
+++    GenRegister r = GenRegister::retype(GenRegister::suboffset(msg, w*3), GEN_TYPE_UD);
+++    r.type = GEN_TYPE_UD;
+++    GenRegister hdr = GenRegister::h2(r);
+++    GenRegister data = GenRegister::offset(r, w / 8);
+++    GenRegister imm4 = GenRegister::immud(4);
+++    push();
+++      curr.execWidth = 8;
+++      MOV(hdr,                            GenRegister::ud8grf(msg.nr, 0));
+++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+++      if (w == 16) {
+++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(msg.nr, 4));
+++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+++      }
+++    pop();
+++    MOV(data, GenRegister::ud16grf(msg.nr + w / 8, 0));
+++    UNTYPED_WRITE(hdr, bti, 1);
+++    push();
+++      curr.execWidth = 8;
+++      MOV(hdr, w == 16 ? GenRegister::ud8grf(msg.nr+1, 0) : GenRegister::retype(GenRegister::offset(msg, 0, 16), GEN_TYPE_UD));
+++      ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+++      if (w == 16) {
+++        MOV(GenRegister::offset(hdr, 1),    GenRegister::ud8grf(msg.nr+1, 4));
+++        ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+++      }
+++    pop();
+++    MOV(data, GenRegister::ud16grf(msg.nr + w / 4, 0));
+++    UNTYPED_WRITE(hdr, bti, 1);
+++  }
+++
++   void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
++     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
++     assert(elemNum >= 1 || elemNum <= 4);
++@@ -467,7 +525,19 @@
++   }
++ 
++   INLINE void alu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src) {
++-     if (needToSplitAlu1(p, dst, src) == false) {
+++     if (dst.isdf() && src.isdf()) {
+++       int w = p->curr.execWidth;
+++       p->push();
+++       GenInstruction *insn = p->next(opcode);
+++       p->setHeader(insn);
+++       p->setDst(insn, dst);
+++       p->setSrc0(insn, src);
+++       insn = p->next(opcode);
+++       p->setHeader(insn);
+++       p->setDst(insn, GenRegister::suboffset(dst, w / 2));
+++       p->setSrc0(insn, GenRegister::suboffset(src, w / 2));
+++       p->pop();
+++     } else if (needToSplitAlu1(p, dst, src) == false) {
++        GenInstruction *insn = p->next(opcode);
++        p->setHeader(insn);
++        p->setDst(insn, dst);
++@@ -499,7 +569,21 @@
++                    GenRegister src0,
++                    GenRegister src1)
++   {
++-    if (needToSplitAlu2(p, dst, src0, src1) == false) {
+++    if (dst.isdf() && src0.isdf() && src1.isdf()) {
+++       int w = p->curr.execWidth;
+++       p->push();
+++       GenInstruction *insn = p->next(opcode);
+++       p->setHeader(insn);
+++       p->setDst(insn, dst);
+++       p->setSrc0(insn, src0);
+++       p->setSrc1(insn, src1);
+++       insn = p->next(opcode);
+++       p->setHeader(insn);
+++       p->setDst(insn, GenRegister::suboffset(dst, w / 2));
+++       p->setSrc0(insn, GenRegister::suboffset(src0, w / 2));
+++       p->setSrc1(insn, GenRegister::suboffset(src1, w / 2));
+++       p->pop();
+++    } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
++        GenInstruction *insn = p->next(opcode);
++        p->setHeader(insn);
++        p->setDst(insn, dst);
++@@ -620,6 +704,58 @@
++     alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
++   }
++ 
+++  void GenEncoder::LOAD_DF_IMM(GenRegister dest, GenRegister src0) {
+++    union { double d; unsigned u[2]; } u;
+++    u.d = src0.value.df;
+++    GenRegister r = GenRegister::retype(dest, GEN_TYPE_UD);
+++    push();
+++    curr.execWidth = 1;
+++    MOV(r, GenRegister::immud(u.u[1]));
+++    MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[0]));
+++    pop();
+++    r.type = GEN_TYPE_DF;
+++    r.vstride = GEN_VERTICAL_STRIDE_0;
+++    r.width = GEN_WIDTH_1;
+++    r.hstride = GEN_HORIZONTAL_STRIDE_0;
+++    push();
+++    MOV(dest, r);
+++    pop();
+++  }
+++
+++  void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
+++    int w = curr.execWidth;
+++    if (src0.isdf()) {
+++      push();
+++      curr.execWidth = 16;
+++      MOV(dest, src0);
+++      if (w == 16)
+++        MOV(GenRegister::QnPhysical(dest, w / 4), GenRegister::QnPhysical(src0, w / 4));
+++      pop();
+++    } else {
+++      GenRegister r0 = GenRegister::h2(r);
+++      push();
+++      curr.execWidth = 8;
+++      MOV(r0, src0);
+++      MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 4));
+++      pop();
+++      push();
+++      curr.execWidth = 16;
+++      MOV(dest, r);
+++      pop();
+++      if (w == 16) {
+++        push();
+++        curr.execWidth = 8;
+++        MOV(r0, GenRegister::suboffset(src0, 8));
+++        MOV(GenRegister::suboffset(r0, 8), GenRegister::suboffset(src0, 12));
+++        pop();
+++        push();
+++        curr.execWidth = 16;
+++        MOV(GenRegister::suboffset(dest, 8), r);
+++        pop();
+++      }
+++    }
+++  }
+++
++   ALU1(MOV)
++   ALU1(RNDZ)
++   ALU1(RNDE)
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_encoder.hpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_encoder.hpp	2013-06-19 21:04:46.030666393 +0200
++@@ -113,6 +113,8 @@
++     ALU2(LINE)
++     ALU2(PLN)
++     ALU3(MAD)
+++    ALU1(LOAD_DF_IMM);
+++    ALU2(MOV_DF);
++ #undef ALU1
++ #undef ALU2
++ #undef ALU3
++@@ -132,6 +134,10 @@
++     void NOP(void);
++     /*! Wait instruction (used for the barrier) */
++     void WAIT(void);
+++    /*! Read 64-bits float arrays */
+++    void READ_FLOAT64(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+++    /*! Write 64-bits float arrays */
+++    void WRITE_FLOAT64(GenRegister src, uint32_t bti, uint32_t elemNum);
++     /*! Untyped read (upto 4 channels) */
++     void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
++     /*! Untyped write (upto 4 channels) */
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_gen7_schedule_info.hxx
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_gen7_schedule_info.hxx	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_gen7_schedule_info.hxx	2013-06-19 21:04:46.030666393 +0200
++@@ -12,6 +12,8 @@
++ DECL_GEN7_SCHEDULE(Math,            20,        4,        2)
++ DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
++ DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
+++DECL_GEN7_SCHEDULE(ReadFloat64,     80,        1,        1)
+++DECL_GEN7_SCHEDULE(WriteFloat64,    80,        1,        1)
++ DECL_GEN7_SCHEDULE(UntypedRead,     80,        1,        1)
++ DECL_GEN7_SCHEDULE(UntypedWrite,    80,        1,        1)
++ DECL_GEN7_SCHEDULE(ByteGather,      80,        1,        1)
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_selection.cpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.cpp	2013-06-19 21:04:46.034666393 +0200
++@@ -129,6 +129,7 @@
++       case TYPE_S32: return GEN_TYPE_D;
++       case TYPE_U32: return GEN_TYPE_UD;
++       case TYPE_FLOAT: return GEN_TYPE_F;
+++      case TYPE_DOUBLE: return GEN_TYPE_DF;
++       default: NOT_SUPPORTED; return GEN_TYPE_F;
++     }
++   }
++@@ -166,11 +167,13 @@
++ 
++   bool SelectionInstruction::isRead(void) const {
++     return this->opcode == SEL_OP_UNTYPED_READ ||
+++           this->opcode == SEL_OP_READ_FLOAT64 ||
++            this->opcode == SEL_OP_BYTE_GATHER;
++   }
++ 
++   bool SelectionInstruction::isWrite(void) const {
++     return this->opcode == SEL_OP_UNTYPED_WRITE ||
+++           this->opcode == SEL_OP_WRITE_FLOAT64 ||
++            this->opcode == SEL_OP_BYTE_SCATTER;
++   }
++ 
++@@ -406,6 +409,8 @@
++ #define ALU3(OP) \
++   INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
++     ALU1(MOV)
+++    ALU2(MOV_DF)
+++    ALU1(LOAD_DF_IMM)
++     ALU1(RNDZ)
++     ALU1(RNDE)
++     ALU2(SEL)
++@@ -449,6 +454,10 @@
++     void NOP(void);
++     /*! Wait instruction (used for the barrier) */
++     void WAIT(void);
+++    /*! Read 64 bits float array */
+++    void READ_FLOAT64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+++    /*! Write 64 bits float array */
+++    void WRITE_FLOAT64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti);
++     /*! Untyped read (up to 4 elements) */
++     void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
++     /*! Untyped write (up to 4 elements) */
++@@ -610,20 +619,23 @@
++ 
++   ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
++     SelectionBlock *block = insn->parent;
++-    const uint32_t simdWidth = ctx.getSimdWidth();
+++    uint32_t simdWidth = ctx.getSimdWidth();
++     ir::Register tmp;
+++    ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family;
+++    int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F;
+++    GenRegister gr;
++ 
++     // This will append the temporary register in the instruction block
++     this->block = block;
++-    tmp = this->reg(ir::FAMILY_DWORD);
+++    tmp = this->reg(f);
++ 
++     // Generate the MOV instruction and replace the register in the instruction
++     SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
++-    mov->dst(0) = GenRegister::retype(insn->dst(regID), GEN_TYPE_F);
+++    mov->dst(0) = GenRegister::retype(insn->dst(regID), genType);
++     mov->state = GenInstructionState(simdWidth);
++-    insn->dst(regID) = mov->src(0) = GenRegister::fxgrf(simdWidth, tmp);
+++    gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp);
+++    insn->dst(regID) = mov->src(0) = gr;
++     insn->append(*mov);
++-
++     return tmp;
++   }
++ 
++@@ -657,6 +669,7 @@
++       case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break;
++       case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break;
++       case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break;
+++      case FAMILY_QWORD: SEL_REG(df16grf, df8grf, df1grf); break;
++       default: NOT_SUPPORTED;
++     }
++     GBE_ASSERT(false);
++@@ -719,6 +732,33 @@
++   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
++   void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
++ 
+++  void Selection::Opaque::READ_FLOAT64(Reg addr,
+++                                       const GenRegister *dst,
+++                                       uint32_t elemNum,
+++                                       uint32_t bti)
+++  {
+++    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ_FLOAT64, elemNum, 1);
+++    SelectionVector *srcVector = this->appendVector();
+++    SelectionVector *dstVector = this->appendVector();
+++
+++    // Regular instruction to encode
+++    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+++      insn->dst(elemID) = dst[elemID];
+++    insn->src(0) = addr;
+++    insn->extra.function = bti;
+++    insn->extra.elem = elemNum;
+++
+++    // Sends require contiguous allocation
+++    dstVector->regNum = elemNum;
+++    dstVector->isSrc = 0;
+++    dstVector->reg = &insn->dst(0);
+++
+++    // Source cannot be scalar (yet)
+++    srcVector->regNum = 1;
+++    srcVector->isSrc = 1;
+++    srcVector->reg = &insn->src(0);
+++  }
+++
++   void Selection::Opaque::UNTYPED_READ(Reg addr,
++                                        const GenRegister *dst,
++                                        uint32_t elemNum,
++@@ -746,6 +786,27 @@
++     srcVector->reg = &insn->src(0);
++   }
++ 
+++  void Selection::Opaque::WRITE_FLOAT64(Reg addr,
+++                                        const GenRegister *src,
+++                                        uint32_t elemNum,
+++                                        uint32_t bti)
+++  {
+++    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE_FLOAT64, 0, elemNum+1);
+++    SelectionVector *vector = this->appendVector();
+++
+++    // Regular instruction to encode
+++    insn->src(0) = addr;
+++    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+++      insn->src(elemID+1) = src[elemID];
+++    insn->extra.function = bti;
+++    insn->extra.elem = elemNum;
+++
+++    // Sends require contiguous allocation for the sources
+++    vector->regNum = elemNum+1;
+++    vector->reg = &insn->src(0);
+++    vector->isSrc = 1;
+++  }
+++
++   void Selection::Opaque::UNTYPED_WRITE(Reg addr,
++                                         const GenRegister *src,
++                                         uint32_t elemNum,
++@@ -1092,6 +1153,15 @@
++   // Implementation of all patterns
++   ///////////////////////////////////////////////////////////////////////////
++ 
+++  bool canGetRegisterFromImmediate(const ir::Instruction &insn) {
+++    using namespace ir;
+++    const auto &childInsn = cast<LoadImmInstruction>(insn);
+++    const auto &imm = childInsn.getImmediate();
+++    if(imm.type != TYPE_DOUBLE)
+++      return true;
+++    return false;
+++  }
+++
++   GenRegister getRegisterFromImmediate(ir::Immediate imm)
++   {
++     using namespace ir;
++@@ -1103,6 +1173,7 @@
++       case TYPE_S16: return  GenRegister::immw(imm.data.s16);
++       case TYPE_U8:  return GenRegister::immuw(imm.data.u8);
++       case TYPE_S8:  return GenRegister::immw(imm.data.s8);
+++      case TYPE_DOUBLE: return GenRegister::immdf(imm.data.f64);
++       default: NOT_SUPPORTED; return GenRegister::immuw(0);
++     }
++   }
++@@ -1146,7 +1217,13 @@
++       const GenRegister src = sel.selReg(insn.getSrc(0));
++       switch (opcode) {
++         case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
++-        case ir::OP_MOV: sel.MOV(dst, src); break;
+++        case ir::OP_MOV:
+++          if (dst.isdf()) {
+++            ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
+++            sel.MOV_DF(dst, src, sel.selReg(r));
+++          } else
+++            sel.MOV(dst, src);
+++          break;
++         case ir::OP_RNDD: sel.RNDD(dst, src); break;
++         case ir::OP_RNDE: sel.RNDE(dst, src); break;
++         case ir::OP_RNDU: sel.RNDU(dst, src); break;
++@@ -1225,14 +1302,14 @@
++       SelectionDAG *dag1 = dag.child[1];
++ 
++       // Right source can always be an immediate
++-      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+++      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
++         const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
++         src0 = sel.selReg(insn.getSrc(0), type);
++         src1 = getRegisterFromImmediate(childInsn.getImmediate());
++         if (dag0) dag0->isRoot = 1;
++       }
++       // Left source cannot be immediate but it is OK if we can commute
++-      else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI) {
+++      else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag0->insn)) {
++         const auto &childInsn = cast<LoadImmInstruction>(dag0->insn);
++         src0 = sel.selReg(insn.getSrc(1), type);
++         src1 = getRegisterFromImmediate(childInsn.getImmediate());
++@@ -1268,7 +1345,7 @@
++         case OP_SHR: sel.SHR(dst, src0, src1); break;
++         case OP_ASR: sel.ASR(dst, src0, src1); break;
++         case OP_MUL:
++-          if (type == TYPE_FLOAT)
+++          if (type == TYPE_FLOAT || type == TYPE_DOUBLE)
++             sel.MUL(dst, src0, src1);
++           else if (type == TYPE_U32 || type == TYPE_S32) {
++             sel.pop();
++@@ -1599,6 +1676,7 @@
++         case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break;
++         case TYPE_U8:  sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break;
++         case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
+++        case TYPE_DOUBLE: sel.LOAD_DF_IMM(dst, GenRegister::immdf(imm.data.f64)); break;
++         default: NOT_SUPPORTED;
++       }
++       sel.pop();
++@@ -1650,6 +1728,8 @@
++   INLINE uint32_t getByteScatterGatherSize(ir::Type type) {
++     using namespace ir;
++     switch (type) {
+++      case TYPE_DOUBLE:
+++        return GEN_BYTE_SCATTER_QWORD;
++       case TYPE_FLOAT:
++       case TYPE_U32:
++       case TYPE_S32:
++@@ -1681,6 +1761,22 @@
++       sel.UNTYPED_READ(addr, dst.data(), valueNum, bti);
++     }
++ 
+++    void emitReadFloat64(Selection::Opaque &sel,
+++                         const ir::LoadInstruction &insn,
+++                         GenRegister addr,
+++                         uint32_t bti) const
+++    {
+++      using namespace ir;
+++      const uint32_t valueNum = insn.getValueNum();
+++      vector<GenRegister> dst(valueNum);
+++      for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+++        dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
+++      dst.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
+++      if (sel.ctx.getSimdWidth() == 16)
+++        dst.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
+++      sel.READ_FLOAT64(addr, dst.data(), dst.size(), bti);
+++    }
+++
++     void emitByteGather(Selection::Opaque &sel,
++                         const ir::LoadInstruction &insn,
++                         const uint32_t elemSize,
++@@ -1732,6 +1828,8 @@
++       const uint32_t elemSize = getByteScatterGatherSize(type);
++       if (insn.getAddressSpace() == MEM_CONSTANT)
++         this->emitIndirectMove(sel, insn, address);
+++      else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
+++        this->emitReadFloat64(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
++       else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
++         this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
++       else {
++@@ -1762,6 +1860,25 @@
++       sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti);
++     }
++ 
+++    void emitWriteFloat64(Selection::Opaque &sel,
+++                          const ir::StoreInstruction &insn,
+++                          uint32_t bti) const
+++    {
+++      using namespace ir;
+++      const uint32_t valueNum = insn.getValueNum();
+++      const uint32_t addrID = ir::StoreInstruction::addressIndex;
+++      GenRegister addr;
+++      vector<GenRegister> value(valueNum);
+++
+++      addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);
+++      for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
+++        value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F);
+++      value.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
+++      if (sel.ctx.getSimdWidth() == 16)
+++        value.push_back(sel.selReg(sel.reg(FAMILY_QWORD)));
+++      sel.WRITE_FLOAT64(addr, value.data(), value.size(), bti);
+++    }
+++
++     void emitByteScatter(Selection::Opaque &sel,
++                          const ir::StoreInstruction &insn,
++                          const uint32_t elemSize,
++@@ -1791,7 +1908,9 @@
++       const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
++       const Type type = insn.getValueType();
++       const uint32_t elemSize = getByteScatterGatherSize(type);
++-      if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+++      if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
+++        this->emitWriteFloat64(sel, insn, bti);
+++      else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
++         this->emitUntypedWrite(sel, insn, bti);
++       else {
++         const GenRegister address = sel.selReg(insn.getAddress());
++@@ -1839,7 +1958,7 @@
++       SelectionDAG *dag1 = dag.child[1];
++ 
++       // Right source can always be an immediate
++-      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+++      if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
++         const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
++         src0 = sel.selReg(insn.getSrc(0), type);
++         src1 = getRegisterFromImmediate(childInsn.getImmediate());
++@@ -1873,7 +1992,7 @@
++       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
++ 
++       // We need two instructions to make the conversion
++-      if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) {
+++      if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
++         GenRegister unpacked;
++         if (dstFamily == FAMILY_WORD) {
++           const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
++@@ -1886,6 +2005,9 @@
++         }
++         sel.MOV(unpacked, src);
++         sel.MOV(dst, unpacked);
+++      } else if (dst.isdf()) {
+++        ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
+++        sel.MOV_DF(dst, src, sel.selReg(r));
++       } else
++         sel.MOV(dst, src);
++       return true;
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.hxx
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_insn_selection.hxx	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_insn_selection.hxx	2013-06-19 21:04:46.034666393 +0200
++@@ -1,5 +1,7 @@
++ DECL_SELECTION_IR(LABEL, LabelInstruction)
++ DECL_SELECTION_IR(MOV, UnaryInstruction)
+++DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
+++DECL_SELECTION_IR(LOAD_DF_IMM, UnaryInstruction)
++ DECL_SELECTION_IR(NOT, UnaryInstruction)
++ DECL_SELECTION_IR(LZD, UnaryInstruction)
++ DECL_SELECTION_IR(RNDZ, UnaryInstruction)
++@@ -32,6 +34,8 @@
++ DECL_SELECTION_IR(FENCE, FenceInstruction)
++ DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
++ DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
+++DECL_SELECTION_IR(READ_FLOAT64, ReadFloat64Instruction)
+++DECL_SELECTION_IR(WRITE_FLOAT64, WriteFloat64Instruction)
++ DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
++ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
++ DECL_SELECTION_IR(SAMPLE, SampleInstruction)
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_reg_allocation.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_reg_allocation.cpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_reg_allocation.cpp	2013-06-19 21:04:46.034666393 +0200
++@@ -458,7 +458,6 @@
++   }
++ 
++   bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
++-
++     // Perform the linear scan allocator
++     const uint32_t regNum = ctx.sel->getRegNum();
++     for (uint32_t startID = 0; startID < regNum; ++startID) {
++Index: beignet-0.1+git20130619+42967d2/backend/src/backend/gen_register.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/backend/gen_register.hpp	2013-06-19 21:03:25.726669973 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/backend/gen_register.hpp	2013-06-19 21:04:46.034666393 +0200
++@@ -69,11 +69,12 @@
++   /*! Type size in bytes for each Gen type */
++   INLINE int typeSize(uint32_t type) {
++     switch(type) {
+++      case GEN_TYPE_DF:
+++        return 8;
++       case GEN_TYPE_UD:
++       case GEN_TYPE_D:
++       case GEN_TYPE_F:
++         return 4;
++-      case GEN_TYPE_HF:
++       case GEN_TYPE_UW:
++       case GEN_TYPE_W:
++         return 2;
++@@ -192,6 +193,7 @@
++ 
++     /*! For immediates or virtual register */
++     union {
+++      double df;
++       float f;
++       int32_t d;
++       uint32_t ud;
++@@ -211,6 +213,31 @@
++     uint32_t quarter:1;      //!< To choose which part we want (Q1 / Q2)
++     uint32_t address_mode:1; //!< direct or indirect
++ 
+++    static INLINE GenRegister offset(GenRegister reg, int nr, int subnr = 0) {
+++      GenRegister r = reg;
+++      r.nr += nr;
+++      r.subnr += subnr;
+++      return r;
+++    }
+++
+++    INLINE bool isimmdf(void) const {
+++      if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
+++        return true;
+++      return false;
+++    }
+++
+++    INLINE bool isdf(void) const {
+++      if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
+++        return true;
+++      return false;
+++    }
+++
+++    static INLINE GenRegister h2(GenRegister reg) {
+++      GenRegister r = reg;
+++      r.hstride = GEN_HORIZONTAL_STRIDE_2;
+++      return r;
+++    }
+++
++     static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
++       GBE_ASSERT(reg.physical == 0);
++       if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
++@@ -293,6 +320,18 @@
++       return reg;
++     }
++ 
+++    static INLINE GenRegister df16(uint32_t file, ir::Register reg) {
+++      return retype(vec16(file, reg), GEN_TYPE_DF);
+++    }
+++
+++    static INLINE GenRegister df8(uint32_t file, ir::Register reg) {
+++      return retype(vec8(file, reg), GEN_TYPE_DF);
+++    }
+++
+++    static INLINE GenRegister df1(uint32_t file, ir::Register reg) {
+++      return retype(vec1(file, reg), GEN_TYPE_DF);
+++    }
+++
++     static INLINE GenRegister ud16(uint32_t file, ir::Register reg) {
++       return retype(vec16(file, reg), GEN_TYPE_UD);
++     }
++@@ -371,6 +410,12 @@
++                          GEN_HORIZONTAL_STRIDE_0);
++     }
++ 
+++    static INLINE GenRegister immdf(double df) {
+++      GenRegister immediate = imm(GEN_TYPE_DF);
+++      immediate.value.df = df;
+++      return immediate;
+++    }
+++
++     static INLINE GenRegister immf(float f) {
++       GenRegister immediate = imm(GEN_TYPE_F);
++       immediate.value.f = f;
++@@ -448,6 +493,18 @@
++       return vec16(GEN_GENERAL_REGISTER_FILE, reg);
++     }
++ 
+++    static INLINE GenRegister df1grf(ir::Register reg) {
+++      return df1(GEN_GENERAL_REGISTER_FILE, reg);
+++    }
+++
+++    static INLINE GenRegister df8grf(ir::Register reg) {
+++      return df8(GEN_GENERAL_REGISTER_FILE, reg);
+++    }
+++
+++    static INLINE GenRegister df16grf(ir::Register reg) {
+++      return df16(GEN_GENERAL_REGISTER_FILE, reg);
+++    }
+++
++     static INLINE GenRegister ud16grf(ir::Register reg) {
++       return ud16(GEN_GENERAL_REGISTER_FILE, reg);
++     }
++@@ -609,10 +666,26 @@
++     }
++ 
++     static INLINE GenRegister suboffset(GenRegister reg, uint32_t delta) {
++-      reg.subnr += delta * typeSize(reg.type);
+++      if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+++        reg.subnr += delta * typeSize(reg.type);
+++        reg.nr += reg.subnr / 32;
+++        reg.subnr %= 32;
+++      }
++       return reg;
++     }
++ 
+++    static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) {
+++      return retype(vec16(file, nr, subnr), GEN_TYPE_DF);
+++    }
+++
+++    static INLINE GenRegister df8(uint32_t file, uint32_t nr, uint32_t subnr) {
+++      return retype(vec8(file, nr, subnr), GEN_TYPE_DF);
+++    }
+++
+++    static INLINE GenRegister df1(uint32_t file, uint32_t nr, uint32_t subnr) {
+++      return retype(vec1(file, nr, subnr), GEN_TYPE_DF);
+++    }
+++
++     static INLINE GenRegister ud16(uint32_t file, uint32_t nr, uint32_t subnr) {
++       return retype(vec16(file, nr, subnr), GEN_TYPE_UD);
++     }
++@@ -685,6 +758,18 @@
++       return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
++     }
++ 
+++    static INLINE GenRegister df16grf(uint32_t nr, uint32_t subnr) {
+++      return df16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+++    }
+++
+++    static INLINE GenRegister df8grf(uint32_t nr, uint32_t subnr) {
+++      return df8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+++    }
+++
+++    static INLINE GenRegister df1grf(uint32_t nr, uint32_t subnr) {
+++      return df1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+++    }
+++
++     static INLINE GenRegister ud16grf(uint32_t nr, uint32_t subnr) {
++       return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
++     }
++@@ -790,6 +875,7 @@
++         return SIMD1(values...); \
++       } \
++     }
+++    DECL_REG_ENCODER(dfxgrf, df16grf, df8grf, df1grf);
++     DECL_REG_ENCODER(fxgrf, f16grf, f8grf, f1grf);
++     DECL_REG_ENCODER(uwxgrf, uw16grf, uw8grf, uw1grf);
++     DECL_REG_ENCODER(udxgrf, ud16grf, ud8grf, ud1grf);
diff --cc debian/patches/0007-test-case-for-64-bit-float.patch
index 0000000,0000000..766b885
new file mode 100644
--- /dev/null
+++ b/debian/patches/0007-test-case-for-64-bit-float.patch
@@@ -1,0 -1,0 +1,159 @@@
++From 54eac0d43d1db154d77070bcbf226880e014c30f Mon Sep 17 00:00:00 2001
++From: Homer Hsing <homer.xing at intel.com>
++Date: Wed, 19 Jun 2013 12:45:52 +0800
++Subject: [PATCH 07/12] test case for 64-bit float
++To: beignet at lists.freedesktop.org
++
++Signed-off-by: Homer Hsing <homer.xing at intel.com>
++---
++ kernels/compiler_double.cl   |    7 +++++++
++ kernels/compiler_double_2.cl |    7 +++++++
++ utests/CMakeLists.txt        |    2 ++
++ utests/compiler_double.cpp   |   46 +++++++++++++++++++++++++++++++++++++++++
++ utests/compiler_double_2.cpp |   47 ++++++++++++++++++++++++++++++++++++++++++
++ 5 files changed, 109 insertions(+)
++ create mode 100644 kernels/compiler_double.cl
++ create mode 100644 kernels/compiler_double_2.cl
++ create mode 100644 utests/compiler_double.cpp
++ create mode 100644 utests/compiler_double_2.cpp
++
++Index: beignet-0.1+git20130619+42967d2/kernels/compiler_double.cl
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/kernels/compiler_double.cl	2013-06-19 21:04:47.534666326 +0200
++@@ -0,0 +1,7 @@
+++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+++kernel void compiler_double(global double *src, global double *dst) {
+++  int i = get_global_id(0);
+++  double d = 1.234567890123456789;
+++  dst[i] = d * (src[i] + d);
+++}
+++
++Index: beignet-0.1+git20130619+42967d2/kernels/compiler_double_2.cl
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/kernels/compiler_double_2.cl	2013-06-19 21:04:47.534666326 +0200
++@@ -0,0 +1,7 @@
+++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+++kernel void compiler_double_2(global float *src, global double *dst) {
+++  int i = get_global_id(0);
+++  float d = 1.234567890123456789f;
+++  dst[i] = d * (d + src[i]);
+++}
+++
++Index: beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/utests/CMakeLists.txt	2013-06-19 21:04:43.270666516 +0200
+++++ beignet-0.1+git20130619+42967d2/utests/CMakeLists.txt	2013-06-19 21:04:47.534666326 +0200
++@@ -27,6 +27,8 @@
++   compiler_copy_image.cpp
++   compiler_copy_image_3d.cpp
++   compiler_copy_buffer_row.cpp
+++  compiler_double.cpp
+++  compiler_double_2.cpp
++   compiler_fabs.cpp
++   compiler_fill_image.cpp
++   compiler_fill_image0.cpp
++Index: beignet-0.1+git20130619+42967d2/utests/compiler_double.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/utests/compiler_double.cpp	2013-06-19 21:04:47.534666326 +0200
++@@ -0,0 +1,46 @@
+++#include <cmath>
+++#include "utest_helper.hpp"
+++
+++static void cpu(int global_id, double *src, double *dst) {
+++  double f = src[global_id];
+++  double d = 1.234567890123456789;
+++  dst[global_id] = d * (f + d);
+++}
+++
+++void compiler_double(void)
+++{
+++  const size_t n = 16;
+++  double cpu_dst[n], cpu_src[n];
+++
+++  // Setup kernel and buffers
+++  OCL_CREATE_KERNEL("compiler_double");
+++  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+++  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
+++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+++  globals[0] = n;
+++  locals[0] = 16;
+++
+++  // Run random tests
+++  for (uint32_t pass = 0; pass < 1; ++pass) {
+++    OCL_MAP_BUFFER(0);
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      cpu_src[i] = ((double*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
+++    OCL_UNMAP_BUFFER(0);
+++
+++    // Run the kernel on GPU
+++    OCL_NDRANGE(1);
+++
+++    // Run on CPU
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      cpu(i, cpu_src, cpu_dst);
+++
+++    // Compare
+++    OCL_MAP_BUFFER(1);
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4);
+++    OCL_UNMAP_BUFFER(1);
+++  }
+++}
+++
+++MAKE_UTEST_FROM_FUNCTION(compiler_double);
++Index: beignet-0.1+git20130619+42967d2/utests/compiler_double_2.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130619+42967d2/utests/compiler_double_2.cpp	2013-06-19 21:04:47.534666326 +0200
++@@ -0,0 +1,47 @@
+++#include <cmath>
+++#include "utest_helper.hpp"
+++
+++static void cpu(int global_id, float *src, double *dst) {
+++  float f = src[global_id];
+++  float d = 1.234567890123456789;
+++  dst[global_id] = d * (d + f);
+++}
+++
+++void compiler_double_2(void)
+++{
+++  const size_t n = 16;
+++  float cpu_src[n];
+++  double cpu_dst[n];
+++
+++  // Setup kernel and buffers
+++  OCL_CREATE_KERNEL("compiler_double_2");
+++  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+++  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
+++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+++  globals[0] = n;
+++  locals[0] = 16;
+++
+++  // Run random tests
+++  for (uint32_t pass = 0; pass < 1; ++pass) {
+++    OCL_MAP_BUFFER(0);
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
+++    OCL_UNMAP_BUFFER(0);
+++
+++    // Run the kernel on GPU
+++    OCL_NDRANGE(1);
+++
+++    // Run on CPU
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      cpu(i, cpu_src, cpu_dst);
+++
+++    // Compare
+++    OCL_MAP_BUFFER(1);
+++    for (int32_t i = 0; i < (int32_t) n; ++i)
+++      OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4);
+++    OCL_UNMAP_BUFFER(1);
+++  }
+++}
+++
+++MAKE_UTEST_FROM_FUNCTION(compiler_double_2);
diff --cc debian/patches/0008-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
index 0000000,0000000..2cb9ac2
new file mode 100644
--- /dev/null
+++ b/debian/patches/0008-Readd-OpenCL-1.2-definitions-required-for-ICD.patch
@@@ -1,0 -1,0 +1,95 @@@
++From 4d8c1ce21b4ac760c994401e30294b782a9492a9 Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Wed, 19 Jun 2013 11:30:36 +0200
++Subject: [PATCH 08/12] Readd OpenCL 1.2 definitions required for ICD
++To: beignet at lists.freedesktop.org
++
++The definition for the ICD dispatch table requires a few additional
++definitions from OpenCL 1.2.
++---
++ include/CL/cl.h          |   15 +++++++++++++++
++ include/CL/cl_platform.h |    2 ++
++ src/cl_mem.h             |   12 ------------
++ 3 files changed, 17 insertions(+), 12 deletions(-)
++
++diff --git a/include/CL/cl.h b/include/CL/cl.h
++index 4355e74..a7f25d1 100644
++--- a/include/CL/cl.h
+++++ b/include/CL/cl.h
++@@ -67,6 +67,7 @@ typedef cl_uint             cl_channel_type;
++ typedef cl_bitfield         cl_mem_flags;
++ typedef cl_uint             cl_mem_object_type;
++ typedef cl_uint             cl_mem_info;
+++typedef cl_bitfield         cl_mem_migration_flags;
++ typedef cl_uint             cl_image_info;
++ typedef cl_uint             cl_buffer_create_type;
++ typedef cl_uint             cl_addressing_mode;
++@@ -75,8 +76,10 @@ typedef cl_uint             cl_sampler_info;
++ typedef cl_bitfield         cl_map_flags;
++ typedef cl_uint             cl_program_info;
++ typedef cl_uint             cl_program_build_info;
+++typedef intptr_t            cl_device_partition_property;
++ typedef cl_int              cl_build_status;
++ typedef cl_uint             cl_kernel_info;
+++typedef cl_uint             cl_kernel_arg_info;
++ typedef cl_uint             cl_kernel_work_group_info;
++ typedef cl_uint             cl_event_info;
++ typedef cl_uint             cl_command_type;
++@@ -87,6 +90,18 @@ typedef struct _cl_image_format {
++     cl_channel_type         image_channel_data_type;
++ } cl_image_format;
++ 
+++typedef struct _cl_image_desc {
+++    cl_mem_object_type      image_type;
+++    size_t                  image_width;
+++    size_t                  image_height;
+++    size_t                  image_depth;
+++    size_t                  image_array_size;
+++    size_t                  image_row_pitch;
+++    size_t                  image_slice_pitch;
+++    cl_uint                 num_mip_levels;
+++    cl_uint                 num_samples;
+++    cl_mem                  buffer;
+++} cl_image_desc;
++ 
++ typedef struct _cl_buffer_region {
++     size_t                  origin;
++diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
++index 043b048..9a2f17a 100644
++--- a/include/CL/cl_platform.h
+++++ b/include/CL/cl_platform.h
++@@ -58,6 +58,8 @@ extern "C" {
++     #define CL_EXT_SUFFIX__VERSION_1_0
++     #define CL_API_SUFFIX__VERSION_1_1
++     #define CL_EXT_SUFFIX__VERSION_1_1
+++    #define CL_API_SUFFIX__VERSION_1_2
+++    #define CL_EXT_SUFFIX__VERSION_1_2
++     #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
++ #endif
++ 
++diff --git a/src/cl_mem.h b/src/cl_mem.h
++index 33ad174..66815fe 100644
++--- a/src/cl_mem.h
+++++ b/src/cl_mem.h
++@@ -29,18 +29,6 @@
++ #define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
++ #define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
++ #define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
++-typedef struct _cl_image_desc {
++-    cl_mem_object_type      image_type;
++-    size_t                  image_width;
++-    size_t                  image_height;
++-    size_t                  image_depth;
++-    size_t                  image_array_size;
++-    size_t                  image_row_pitch;
++-    size_t                  image_slice_pitch;
++-    cl_uint                 num_mip_levels;
++-    cl_uint                 num_samples;
++-    cl_mem                  buffer;
++-} cl_image_desc;
++ #endif
++ 
++ typedef enum cl_image_tiling {
++-- 
++1.7.10.4
++
diff --cc debian/patches/0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
index 0000000,0000000..cd81790
new file mode 100644
--- /dev/null
+++ b/debian/patches/0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
@@@ -1,0 -1,0 +1,33 @@@
++From 80907affc6d24f3580dd59f75d4203df491846ae Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Wed, 19 Jun 2013 11:42:40 +0200
++Subject: [PATCH 09/12] Enable cl_khr_fp64 extension for OpenCL stdlib header
++To: beignet at lists.freedesktop.org
++
++This allows the stdlib header to define overloads for doubles.
++---
++ backend/src/ocl_stdlib.h |    4 ++++
++ 1 file changed, 4 insertions(+)
++
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:40.602666635 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:48.974666262 +0200
++@@ -53,6 +53,8 @@
++ #define private __private
++ #endif
++ 
+++#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+++
++ /////////////////////////////////////////////////////////////////////////////
++ // OpenCL built-in vector data types
++ /////////////////////////////////////////////////////////////////////////////
++@@ -4430,6 +4432,8 @@
++ DECL_IMAGE(uint4, ui)
++ DECL_IMAGE(float4, f)
++ 
+++#pragma OPENCL EXTENSION cl_khr_fp64 : disable
+++
++ #undef DECL_IMAGE
++ #undef DECL_READ_IMAGE
++ #undef DECL_READ_IMAGE_NOSAMPLER
diff --cc debian/patches/0010-Define-double-vector-types.patch
index 0000000,0000000..c042fb8
new file mode 100644
--- /dev/null
+++ b/debian/patches/0010-Define-double-vector-types.patch
@@@ -1,0 -1,0 +1,23 @@@
++From e8d0c6882addfc71dbc9cf3f973d86c24f73c133 Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Wed, 19 Jun 2013 11:44:22 +0200
++Subject: [PATCH 10/12] Define double vector types
++To: beignet at lists.freedesktop.org
++
++Add the definition for the "doubleN" vector types
++---
++ backend/src/ocl_stdlib.h |    1 +
++ 1 file changed, 1 insertion(+)
++
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:48.974666262 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:50.314666202 +0200
++@@ -72,6 +72,7 @@
++ DEF(long);
++ DEF(ulong);
++ DEF(float);
+++DEF(double);
++ #undef DEF
++ /////////////////////////////////////////////////////////////////////////////
++ // OpenCL other built-in data types
diff --cc debian/patches/0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
index 0000000,0000000..26c641b
new file mode 100644
--- /dev/null
+++ b/debian/patches/0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
@@@ -1,0 -1,0 +1,1430 @@@
++From 1b5cfb96810aec74acd09bd01530b59de5155b28 Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Wed, 19 Jun 2013 11:45:15 +0200
++Subject: [PATCH 11/12] Enable generation of convert_ and as_ functions for
++ double
++To: beignet at lists.freedesktop.org
++
++---
++ backend/src/genconfig.sh |    2 +-
++ backend/src/ocl_stdlib.h |  774 ++++++++++++++++++++++++++++++++++++++++++++++
++ 2 files changed, 775 insertions(+), 1 deletion(-)
++
++Index: beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/genconfig.sh	2013-06-19 21:04:40.598666635 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/genconfig.sh	2013-06-19 21:04:51.674666141 +0200
++@@ -2,7 +2,7 @@
++ # This is to be sourced by the generation scripts
++ 
++ # Supported base types and their lengths
++-TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
+++TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 double:8 float:4"
++ 
++ # Supported vector lengths
++ VECTOR_LENGTHS="1 2 3 4 8 16"
++Index: beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/ocl_stdlib.h	2013-06-19 21:04:50.314666202 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/ocl_stdlib.h	2013-06-19 21:04:51.678666141 +0200
++@@ -497,6 +497,7 @@
++   ushort4 _ushort4;
++   char8 _char8;
++   uchar8 _uchar8;
+++  double _double;
++   float2 _float2;
++ };
++ 
++@@ -542,6 +543,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(long v) {
++   union _type_cast_8_b u;
++   u._long = v;
++@@ -590,6 +597,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(ulong v) {
++   union _type_cast_8_b u;
++   u._ulong = v;
++@@ -638,6 +651,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(int2 v) {
++   union _type_cast_8_b u;
++   u._int2 = v;
++@@ -686,6 +705,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(uint2 v) {
++   union _type_cast_8_b u;
++   u._uint2 = v;
++@@ -734,6 +759,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(short4 v) {
++   union _type_cast_8_b u;
++   u._short4 = v;
++@@ -782,6 +813,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
++   union _type_cast_8_b u;
++   u._ushort4 = v;
++@@ -830,6 +867,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(char8 v) {
++   union _type_cast_8_b u;
++   u._char8 = v;
++@@ -878,12 +921,72 @@
++   return u._char8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._double;
+++}
+++
++ INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
++   union _type_cast_8_b u;
++   u._uchar8 = v;
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._ulong;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(double v) {
+++  union _type_cast_8_b u;
+++  u._double = v;
+++  return u._float2;
+++}
+++
++ INLINE OVERLOADABLE long as_long(float2 v) {
++   union _type_cast_8_b u;
++   u._float2 = v;
++@@ -932,6 +1035,12 @@
++   return u._uchar8;
++ }
++ 
+++INLINE OVERLOADABLE double as_double(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._double;
+++}
+++
++ union _type_cast_12_b {
++   int3 _int3;
++   uint3 _uint3;
++@@ -983,6 +1092,7 @@
++   ushort8 _ushort8;
++   char16 _char16;
++   uchar16 _uchar16;
+++  double2 _double2;
++   float4 _float4;
++ };
++ 
++@@ -1028,6 +1138,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(long2 v) {
++   union _type_cast_16_b u;
++   u._long2 = v;
++@@ -1076,6 +1192,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(ulong2 v) {
++   union _type_cast_16_b u;
++   u._ulong2 = v;
++@@ -1124,6 +1246,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(int4 v) {
++   union _type_cast_16_b u;
++   u._int4 = v;
++@@ -1172,6 +1300,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(uint4 v) {
++   union _type_cast_16_b u;
++   u._uint4 = v;
++@@ -1220,6 +1354,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(short8 v) {
++   union _type_cast_16_b u;
++   u._short8 = v;
++@@ -1268,6 +1408,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
++   union _type_cast_16_b u;
++   u._ushort8 = v;
++@@ -1316,6 +1462,12 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(char16 v) {
++   union _type_cast_16_b u;
++   u._char16 = v;
++@@ -1364,12 +1516,72 @@
++   return u._char16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._double2;
+++}
+++
++ INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
++   union _type_cast_16_b u;
++   u._uchar16 = v;
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._ulong2;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(double2 v) {
+++  union _type_cast_16_b u;
+++  u._double2 = v;
+++  return u._float4;
+++}
+++
++ INLINE OVERLOADABLE long2 as_long2(float4 v) {
++   union _type_cast_16_b u;
++   u._float4 = v;
++@@ -1418,9 +1630,16 @@
++   return u._uchar16;
++ }
++ 
+++INLINE OVERLOADABLE double2 as_double2(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._double2;
+++}
+++
++ union _type_cast_24_b {
++   long3 _long3;
++   ulong3 _ulong3;
+++  double3 _double3;
++ };
++ 
++ INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) {
++@@ -1429,12 +1648,36 @@
++   return u._ulong3;
++ }
++ 
+++INLINE OVERLOADABLE double3 as_double3(long3 v) {
+++  union _type_cast_24_b u;
+++  u._long3 = v;
+++  return u._double3;
+++}
+++
++ INLINE OVERLOADABLE long3 as_long3(ulong3 v) {
++   union _type_cast_24_b u;
++   u._ulong3 = v;
++   return u._long3;
++ }
++ 
+++INLINE OVERLOADABLE double3 as_double3(ulong3 v) {
+++  union _type_cast_24_b u;
+++  u._ulong3 = v;
+++  return u._double3;
+++}
+++
+++INLINE OVERLOADABLE long3 as_long3(double3 v) {
+++  union _type_cast_24_b u;
+++  u._double3 = v;
+++  return u._long3;
+++}
+++
+++INLINE OVERLOADABLE ulong3 as_ulong3(double3 v) {
+++  union _type_cast_24_b u;
+++  u._double3 = v;
+++  return u._ulong3;
+++}
+++
++ union _type_cast_32_b {
++   long4 _long4;
++   ulong4 _ulong4;
++@@ -1442,6 +1685,7 @@
++   uint8 _uint8;
++   short16 _short16;
++   ushort16 _ushort16;
+++  double4 _double4;
++   float8 _float8;
++ };
++ 
++@@ -1475,6 +1719,12 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(long4 v) {
+++  union _type_cast_32_b u;
+++  u._long4 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(long4 v) {
++   union _type_cast_32_b u;
++   u._long4 = v;
++@@ -1511,6 +1761,12 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(ulong4 v) {
++   union _type_cast_32_b u;
++   u._ulong4 = v;
++@@ -1547,6 +1803,12 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(int8 v) {
++   union _type_cast_32_b u;
++   u._int8 = v;
++@@ -1583,6 +1845,12 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(uint8 v) {
++   union _type_cast_32_b u;
++   u._uint8 = v;
++@@ -1619,6 +1887,12 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(short16 v) {
++   union _type_cast_32_b u;
++   u._short16 = v;
++@@ -1655,12 +1929,60 @@
++   return u._short16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._double4;
+++}
+++
++ INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
++   union _type_cast_32_b u;
++   u._ushort16 = v;
++   return u._float8;
++ }
++ 
+++INLINE OVERLOADABLE long4 as_long4(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._ulong4;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(double4 v) {
+++  union _type_cast_32_b u;
+++  u._double4 = v;
+++  return u._float8;
+++}
+++
++ INLINE OVERLOADABLE long4 as_long4(float8 v) {
++   union _type_cast_32_b u;
++   u._float8 = v;
++@@ -1697,11 +2019,18 @@
++   return u._ushort16;
++ }
++ 
+++INLINE OVERLOADABLE double4 as_double4(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._double4;
+++}
+++
++ union _type_cast_64_b {
++   long8 _long8;
++   ulong8 _ulong8;
++   int16 _int16;
++   uint16 _uint16;
+++  double8 _double8;
++   float16 _float16;
++ };
++ 
++@@ -1723,6 +2052,12 @@
++   return u._uint16;
++ }
++ 
+++INLINE OVERLOADABLE double8 as_double8(long8 v) {
+++  union _type_cast_64_b u;
+++  u._long8 = v;
+++  return u._double8;
+++}
+++
++ INLINE OVERLOADABLE float16 as_float16(long8 v) {
++   union _type_cast_64_b u;
++   u._long8 = v;
++@@ -1747,6 +2082,12 @@
++   return u._uint16;
++ }
++ 
+++INLINE OVERLOADABLE double8 as_double8(ulong8 v) {
+++  union _type_cast_64_b u;
+++  u._ulong8 = v;
+++  return u._double8;
+++}
+++
++ INLINE OVERLOADABLE float16 as_float16(ulong8 v) {
++   union _type_cast_64_b u;
++   u._ulong8 = v;
++@@ -1771,6 +2112,12 @@
++   return u._uint16;
++ }
++ 
+++INLINE OVERLOADABLE double8 as_double8(int16 v) {
+++  union _type_cast_64_b u;
+++  u._int16 = v;
+++  return u._double8;
+++}
+++
++ INLINE OVERLOADABLE float16 as_float16(int16 v) {
++   union _type_cast_64_b u;
++   u._int16 = v;
++@@ -1795,12 +2142,48 @@
++   return u._int16;
++ }
++ 
+++INLINE OVERLOADABLE double8 as_double8(uint16 v) {
+++  union _type_cast_64_b u;
+++  u._uint16 = v;
+++  return u._double8;
+++}
+++
++ INLINE OVERLOADABLE float16 as_float16(uint16 v) {
++   union _type_cast_64_b u;
++   u._uint16 = v;
++   return u._float16;
++ }
++ 
+++INLINE OVERLOADABLE long8 as_long8(double8 v) {
+++  union _type_cast_64_b u;
+++  u._double8 = v;
+++  return u._long8;
+++}
+++
+++INLINE OVERLOADABLE ulong8 as_ulong8(double8 v) {
+++  union _type_cast_64_b u;
+++  u._double8 = v;
+++  return u._ulong8;
+++}
+++
+++INLINE OVERLOADABLE int16 as_int16(double8 v) {
+++  union _type_cast_64_b u;
+++  u._double8 = v;
+++  return u._int16;
+++}
+++
+++INLINE OVERLOADABLE uint16 as_uint16(double8 v) {
+++  union _type_cast_64_b u;
+++  u._double8 = v;
+++  return u._uint16;
+++}
+++
+++INLINE OVERLOADABLE float16 as_float16(double8 v) {
+++  union _type_cast_64_b u;
+++  u._double8 = v;
+++  return u._float16;
+++}
+++
++ INLINE OVERLOADABLE long8 as_long8(float16 v) {
++   union _type_cast_64_b u;
++   u._float16 = v;
++@@ -1825,9 +2208,16 @@
++   return u._uint16;
++ }
++ 
+++INLINE OVERLOADABLE double8 as_double8(float16 v) {
+++  union _type_cast_64_b u;
+++  u._float16 = v;
+++  return u._double8;
+++}
+++
++ union _type_cast_128_b {
++   long16 _long16;
++   ulong16 _ulong16;
+++  double16 _double16;
++ };
++ 
++ INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) {
++@@ -1836,12 +2226,36 @@
++   return u._ulong16;
++ }
++ 
+++INLINE OVERLOADABLE double16 as_double16(long16 v) {
+++  union _type_cast_128_b u;
+++  u._long16 = v;
+++  return u._double16;
+++}
+++
++ INLINE OVERLOADABLE long16 as_long16(ulong16 v) {
++   union _type_cast_128_b u;
++   u._ulong16 = v;
++   return u._long16;
++ }
++ 
+++INLINE OVERLOADABLE double16 as_double16(ulong16 v) {
+++  union _type_cast_128_b u;
+++  u._ulong16 = v;
+++  return u._double16;
+++}
+++
+++INLINE OVERLOADABLE long16 as_long16(double16 v) {
+++  union _type_cast_128_b u;
+++  u._double16 = v;
+++  return u._long16;
+++}
+++
+++INLINE OVERLOADABLE ulong16 as_ulong16(double16 v) {
+++  union _type_cast_128_b u;
+++  u._double16 = v;
+++  return u._ulong16;
+++}
+++
++ // ##END_AS##
++ 
++ // ##BEGIN_CONVERT##
++@@ -1873,6 +2287,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(long2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(long2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -1905,6 +2323,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(ulong2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(ulong2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -1937,6 +2359,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(int2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(int2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -1969,6 +2395,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(uint2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -2001,6 +2431,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(short2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(short2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -2033,6 +2467,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(ushort2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -2065,6 +2503,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(char2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(char2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++@@ -2097,10 +2539,50 @@
++   return (char2)((char)(v.s0), (char)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(uchar2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(double2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(double2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(double2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(double2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(double2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(double2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(double2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(double2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(double2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE long2 convert_long2(float2 v) {
++   return (long2)((long)(v.s0), (long)(v.s1));
++ }
++@@ -2133,6 +2615,10 @@
++   return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE double2 convert_double2(float2 v) {
+++  return (double2)((double)(v.s0), (double)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) {
++   return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++ }
++@@ -2161,6 +2647,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(long3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(long3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2193,6 +2683,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(ulong3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(ulong3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2225,6 +2719,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(int3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(int3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2257,6 +2755,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(uint3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2289,6 +2791,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(short3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(short3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2321,6 +2827,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(ushort3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2353,6 +2863,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(char3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(char3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++@@ -2385,10 +2899,50 @@
++   return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(uchar3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(double3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(double3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(double3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(double3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(double3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(double3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(double3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(double3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(double3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE long3 convert_long3(float3 v) {
++   return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++ }
++@@ -2421,6 +2975,10 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE double3 convert_double3(float3 v) {
+++  return (double3)((double)(v.s0), (double)(v.s1), (double)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) {
++   return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++ }
++@@ -2449,6 +3007,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(long4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(long4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2481,6 +3043,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(ulong4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(ulong4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2513,6 +3079,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(int4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(int4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2545,6 +3115,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(uint4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2577,6 +3151,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(short4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(short4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2609,6 +3187,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(ushort4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2641,6 +3223,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(char4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(char4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++@@ -2673,10 +3259,50 @@
++   return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(uchar4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(double4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(double4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(double4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(double4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(double4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(double4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(double4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(double4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(double4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE long4 convert_long4(float4 v) {
++   return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++ }
++@@ -2709,6 +3335,10 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE double4 convert_double4(float4 v) {
+++  return (double4)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) {
++   return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++ }
++@@ -2737,6 +3367,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(long8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(long8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2769,6 +3403,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(ulong8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(ulong8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2801,6 +3439,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(int8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(int8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2833,6 +3475,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(uint8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2865,6 +3511,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(short8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(short8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2897,6 +3547,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(ushort8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2929,6 +3583,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(char8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(char8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++@@ -2961,10 +3619,50 @@
++   return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(uchar8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(double8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(double8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(double8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(double8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(double8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(double8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(double8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(double8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(double8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE long8 convert_long8(float8 v) {
++   return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++ }
++@@ -2997,6 +3695,10 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE double8 convert_double8(float8 v) {
+++  return (double8)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) {
++   return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++ }
++@@ -3025,6 +3727,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(long16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(long16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3057,6 +3763,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(ulong16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(ulong16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3089,6 +3799,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(int16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(int16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3121,6 +3835,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(uint16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3153,6 +3871,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(short16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(short16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3185,6 +3907,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(ushort16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3217,6 +3943,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(char16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(char16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++@@ -3249,10 +3979,50 @@
++   return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(uchar16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(double16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(double16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(double16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(double16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(double16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(double16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(double16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(double16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(double16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE long16 convert_long16(float16 v) {
++   return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++ }
++@@ -3285,6 +4055,10 @@
++   return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE double16 convert_double16(float16 v) {
+++  return (double16)((double)(v.s0), (double)(v.s1), (double)(v.s2), (double)(v.s3), (double)(v.s4), (double)(v.s5), (double)(v.s6), (double)(v.s7), (double)(v.s8), (double)(v.s9), (double)(v.sA), (double)(v.sB), (double)(v.sC), (double)(v.sD), (double)(v.sE), (double)(v.sF));
+++}
+++
++ // ##END_CONVERT##
++ 
++ /////////////////////////////////////////////////////////////////////////////
diff --cc debian/patches/0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
index 0000000,0000000..d0c8f6c
new file mode 100644
--- /dev/null
+++ b/debian/patches/0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
@@@ -1,0 -1,0 +1,40 @@@
++From 4c2f4a53d5c9a9eca0b8d55586fa1d8f070faddf Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Wed, 19 Jun 2013 18:36:30 +0800
++Subject: [PATCH 12/12] GBE: Fixed one bug in scalarize pass
++To: beignet at lists.freedesktop.org
++
++I met segfault at void Scalarize::dce() randomly when I integrate
++a openCL kernel to the chromium's GPU process. After discuss with
++Yang Rong, I found one bug in this function. As it use two loops
++to erase the dead instructions, but it doesn't set the pointer to
++NULL at the first loop when it already erased the instruction. Thus
++at the second loop, when it call (*i)->getParent, the (*i) may
++already be deleted then it may refer a freed region and may cause
++segfault.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/llvm/llvm_scalarize.cpp |    6 ++++--
++ 1 file changed, 4 insertions(+), 2 deletions(-)
++
++Index: beignet-0.1+git20130619+42967d2/backend/src/llvm/llvm_scalarize.cpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/llvm/llvm_scalarize.cpp	2013-06-19 21:03:23.570670069 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/llvm/llvm_scalarize.cpp	2013-06-19 21:04:53.078666079 +0200
++@@ -825,11 +825,13 @@
++     //two passes delete for some phinode
++     for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
++       (*i)->dropAllReferences();
++-      if((*i)->use_empty())
+++      if((*i)->use_empty()) {
++         (*i)->eraseFromParent();
+++        (*i) = NULL;
+++      }
++     }
++     for (std::vector<Instruction*>::reverse_iterator i = deadList.rbegin(), e = deadList.rend(); i != e; ++i) {
++-      if((*i)->getParent())
+++      if((*i) && (*i)->getParent())
++         (*i)->eraseFromParent();
++     }
++     deadList.clear();
diff --cc debian/patches/debug
index d8e8997,0000000..b0f8843
mode 100644,000000..100644
--- a/debian/patches/debug
+++ b/debian/patches/debug
@@@ -1,16 -1,0 +1,16 @@@
 +Description: Enhance debug output
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-05-21
 +
- Index: beignet-0.1+git20130521+a7ea35c/src/cl_utils.h
++Index: beignet-0.1+git20130619+42967d2/src/cl_utils.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/src/cl_utils.h	2013-05-21 10:39:13.823946702 +0200
- +++ beignet-0.1+git20130521+a7ea35c/src/cl_utils.h	2013-05-21 10:39:20.751946393 +0200
++--- beignet-0.1+git20130619+42967d2.orig/src/cl_utils.h	2013-06-19 21:04:25.066667328 +0200
+++++ beignet-0.1+git20130619+42967d2/src/cl_utils.h	2013-06-19 21:04:28.066667194 +0200
 +@@ -80,6 +80,7 @@
 + 
 + #define FATAL(...)                                          \
 + do {                                                        \
 ++  fprintf(stderr, "in function %s:\n", __FUNCTION__);       \
 +   fprintf(stderr, "error: ");                               \
 +   fprintf(stderr, __VA_ARGS__);                             \
 +   fprintf(stderr, "\n");                                    \
diff --cc debian/patches/deprecated-in-utest
index 0000000,0000000..152a2cc
new file mode 100644
--- /dev/null
+++ b/debian/patches/deprecated-in-utest
@@@ -1,0 -1,0 +1,17 @@@
++Description: Utest requires deprecated function names
++Author: Simon Richter <sjr at debian.org>
++Last-Update: 2013-06-19
++
++Index: beignet-0.1+git20130619+42967d2/utests/utest_helper.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/utests/utest_helper.hpp	2013-06-19 21:04:23.714667388 +0200
+++++ beignet-0.1+git20130619+42967d2/utests/utest_helper.hpp	2013-06-19 21:04:35.066666882 +0200
++@@ -25,6 +25,8 @@
++ #ifndef __UTEST_HELPER_HPP__
++ #define __UTEST_HELPER_HPP__
++ 
+++#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+++
++ #include "CL/cl.h"
++ #include "CL/cl_intel.h"
++ #include "utest.hpp"
diff --cc debian/patches/flags
index c90c1d4,0000000..ac207f3
mode 100644,000000..100644
--- a/debian/patches/flags
+++ b/debian/patches/flags
@@@ -1,94 -1,0 +1,94 @@@
 +Description: Debian compliant compiler flags handling
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-05-21
 +
- Index: beignet-0.1+git20130521+a7ea35c/CMakeLists.txt
++Index: beignet-0.1+git20130619+42967d2/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/CMakeLists.txt	2013-05-21 10:40:02.635944526 +0200
- +++ beignet-0.1+git20130521+a7ea35c/CMakeLists.txt	2013-05-21 10:40:37.351942978 +0200
++--- beignet-0.1+git20130619+42967d2.orig/CMakeLists.txt	2013-06-19 21:04:24.770667341 +0200
+++++ beignet-0.1+git20130619+42967d2/CMakeLists.txt	2013-06-19 21:04:31.958667020 +0200
 +@@ -18,7 +18,6 @@
 + 
 + INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
 + 
 +-SET(CMAKE_VERBOSE_MAKEFILE "false")
 + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake/")
 + SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
 + SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
- Index: beignet-0.1+git20130521+a7ea35c/backend/CMakeLists.txt
++Index: beignet-0.1+git20130619+42967d2/backend/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/backend/CMakeLists.txt	2013-05-21 10:40:03.103944505 +0200
- +++ beignet-0.1+git20130521+a7ea35c/backend/CMakeLists.txt	2013-05-21 10:40:37.351942978 +0200
++--- beignet-0.1+git20130619+42967d2.orig/backend/CMakeLists.txt	2013-06-19 21:04:24.770667341 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/CMakeLists.txt	2013-06-19 21:04:31.958667020 +0200
 +@@ -45,39 +45,39 @@
 + if (COMPILER STREQUAL "GCC")
 +   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -fPIC -Wall")
 +   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  ${LLVM_CFLAGS}")
 +-  set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  -Wno-invalid-offsetof -fno-rtti -std=c++0x")
 ++  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS}  -Wno-invalid-offsetof -fno-rtti -std=c++0x")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-E")
 +-  set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-undefined ${LLVM_LFLAGS}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_CXX_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
 ++  set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined ${LLVM_LFLAGS}")
 ++  set (CMAKE_CXX_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-E")
 +-  set (CMAKE_C_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 + elseif (COMPILER STREQUAL "CLANG")
 +   set (CMAKE_C_COMPILER             "clang")
 +   set (CMAKE_C_FLAGS                "-Wall -std=c99")
 +-  set (CMAKE_C_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 +   set (CMAKE_CXX_COMPILER             "clang++")
 +   set (CMAKE_CXX_FLAGS "-fstrict-aliasing -msse2 -fPIC -Wall -Wno-format-security -Wno-invalid-offsetof -std=c++0x")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_CXX_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 +   set (CMAKE_AR      "/usr/bin/llvm-ar")
 +   set (CMAKE_LINKER  "/usr/bin/llvm-ld")
 +   set (CMAKE_NM      "/usr/bin/llvm-nm")
 +@@ -91,10 +91,10 @@
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MODE_FLAG}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DGBE_DEBUG=1")
 +-  set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O2 -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O2 -DGBE_DEBUG=0")
 +-  set (CCMAKE_CXX_FLAGS_MINSIZEREL "-Os -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_DEBUG "-DGBE_DEBUG=1")
 ++  set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CCMAKE_CXX_FLAGS_MINSIZEREL "-DGBE_DEBUG=0")
 +   set (CMAKE_EXE_LINKER_FLAGS "")
 + endif ()
 + 
diff --cc debian/patches/khronos
index 8f4f6ac,0000000..0b418f1
mode 100644,000000..100644
--- a/debian/patches/khronos
+++ b/debian/patches/khronos
@@@ -1,3100 -1,0 +1,6876 @@@
 +Description: Use Khronos Group headers
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-05-21
 +
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl_ext.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_ext.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_ext.h	2013-05-21 10:41:03.323941820 +0200
- @@ -1,251 +1 @@
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_ext.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl_ext.h	2013-06-19 21:04:33.466666953 +0200
++@@ -1,213 +1 @@
 +-/*******************************************************************************
- - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
 +-/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
 +-
 +-/* cl_ext.h contains OpenCL extensions which don't have external */
 +-/* (OpenGL, D3D) dependencies.                                   */
 +-
 +-#ifndef __CL_EXT_H
 +-#define __CL_EXT_H
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#ifdef __APPLE__
 +-	#include <OpenCL/cl.h>
 +-    #include <AvailabilityMacros.h>
 +-#else
 +-	#include <CL/cl.h>
 +-#endif
 +-
++-/* cl_khr_fp64 extension - no extension #define since it has no functions  */
++-#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
++-
 +-/* cl_khr_fp16 extension - no extension #define since it has no functions  */
 +-#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
 +-
 +-/* Memory object destruction
 +- *
 +- * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
 +- *
 +- * Registers a user callback function that will be called when the memory object is deleted and its resources 
 +- * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback 
 +- * stack associated with memobj. The registered user callback functions are called in the reverse order in 
 +- * which they were registered. The user callback functions are called and then the memory object is deleted 
 +- * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be 
 +- * notified when the memory referenced by host_ptr, specified when the memory object is created and used as 
 +- * the storage bits for the memory object, can be reused or freed.
 +- *
 +- * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
 +- *
 +- * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 +- * before using.
 +- */
 +-#define cl_APPLE_SetMemObjectDestructor 1
 +-cl_int	CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */, 
 +-                                        void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
 +-                                        void * /*user_data */ )             CL_EXT_SUFFIX__VERSION_1_0;  
 +-
 +-
 +-/* Context Logging Functions
 +- *
 +- * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
 +- * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 +- * before using.
 +- *
 +- * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger 
 +- */
 +-#define cl_APPLE_ContextLoggingFunctions 1
 +-extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * /* errstr */, 
 +-                                            const void * /* private_info */, 
 +-                                            size_t       /* cb */, 
 +-                                            void *       /* user_data */ )  CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
 +-extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * /* errstr */, 
 +-                                          const void * /* private_info */, 
 +-                                          size_t       /* cb */, 
 +-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
 +-extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */, 
 +-                                          const void * /* private_info */, 
 +-                                          size_t       /* cb */, 
 +-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-
 +-/************************ 
 +-* cl_khr_icd extension *                                                  
 +-************************/
 +-#define cl_khr_icd 1
 +-
 +-/* cl_platform_info                                                        */
 +-#define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
 +-
 +-/* Additional Error Codes                                                  */
 +-#define CL_PLATFORM_NOT_FOUND_KHR                   -1001
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
 +-                       cl_platform_id * /* platforms */,
 +-                       cl_uint *        /* num_platforms */);
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
 +-    cl_uint          /* num_entries */,
 +-    cl_platform_id * /* platforms */,
 +-    cl_uint *        /* num_platforms */);
 +-
 +-
- -/* Extension: cl_khr_image2D_buffer
- - *
- - * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
- - * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
- - * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
- - * and 2D images created from a buffer.  Similarly, the write_image built-ins are also supported
- - * for 2D images created from a buffer.
- - *
- - * When the 2D image from buffer is created, the client must specify the width,
- - * height, image format (i.e. channel order and channel data type) and optionally the row pitch
- - *
- - * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
- - * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
- - */
- -    
- -/*************************************
- - * cl_khr_initalize_memory extension *
- - *************************************/
- -    
- -#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
- -    
- -    
- -/**************************************
- - * cl_khr_terminate_context extension *
- - **************************************/
- -    
- -#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
- -#define CL_CONTEXT_TERMINATE_KHR                    0x2010
- -
- -#define cl_khr_terminate_context 1
- -extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
- -
- -typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
- -    
- -    
- -/*
- - * Extension: cl_khr_spir
- - *
- - * This extension adds support to create an OpenCL program object from a 
- - * Standard Portable Intermediate Representation (SPIR) instance
- - */
- -
 +-/******************************************
 +-* cl_nv_device_attribute_query extension *
 +-******************************************/
 +-/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
 +-#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
 +-#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
 +-#define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
 +-#define CL_DEVICE_WARP_SIZE_NV                      0x4003
 +-#define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
 +-#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
 +-#define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
 +-
 +-
 +-/*********************************
 +-* cl_amd_device_attribute_query *
 +-*********************************/
 +-#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
 +-
++-
 +-#ifdef CL_VERSION_1_1
 +-   /***********************************
 +-    * cl_ext_device_fission extension *
 +-    ***********************************/
 +-    #define cl_ext_device_fission   1
 +-    
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
 +-    
 +-    typedef CL_API_ENTRY cl_int 
 +-    (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
 +-    
 +-    typedef CL_API_ENTRY cl_int 
 +-    (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    typedef cl_ulong  cl_device_partition_property_ext;
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clCreateSubDevicesEXT(  cl_device_id /*in_device*/,
 +-                            const cl_device_partition_property_ext * /* properties */,
 +-                            cl_uint /*num_entries*/,
 +-                            cl_device_id * /*out_devices*/,
 +-                            cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    typedef CL_API_ENTRY cl_int 
 +-    ( CL_API_CALL * clCreateSubDevicesEXT_fn)(  cl_device_id /*in_device*/,
 +-                                                const cl_device_partition_property_ext * /* properties */,
 +-                                                cl_uint /*num_entries*/,
 +-                                                cl_device_id * /*out_devices*/,
 +-                                                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    /* cl_device_partition_property_ext */
 +-    #define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
 +-    #define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
 +-    #define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
 +-    #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
 +-    
 +-    /* clDeviceGetInfo selectors */
 +-    #define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
 +-    #define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
 +-    #define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
 +-    #define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
 +-    #define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
 +-    
 +-    /* error codes */
 +-    #define CL_DEVICE_PARTITION_FAILED_EXT              -1057
 +-    #define CL_INVALID_PARTITION_COUNT_EXT              -1058
 +-    #define CL_INVALID_PARTITION_NAME_EXT               -1059
 +-    
 +-    /* CL_AFFINITY_DOMAINs */
 +-    #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
 +-    #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
 +-    #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
 +-    #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
 +-    #define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
 +-    #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
 +-    
 +-    /* cl_device_partition_property_ext list terminators */
 +-    #define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
 +-    #define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
 +-    #define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
 +-
 +-
 +-
 +-#endif /* CL_VERSION_1_1 */
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-
 +-#endif /* __CL_EXT_H */
 ++#include_next <CL/cl_ext.h>
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/opencl.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/opencl.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/opencl.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/opencl.h	2013-05-21 10:41:03.323941820 +0200
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/opencl.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/opencl.h	2013-06-19 21:04:33.466666953 +0200
 +@@ -1,54 +1 @@
 +-/*******************************************************************************
- - * Copyright (c) 2008-2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
 +-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 +-
 +-#ifndef __OPENCL_H
 +-#define __OPENCL_H
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#ifdef __APPLE__
 +-
 +-#include <OpenCL/cl.h>
 +-#include <OpenCL/cl_gl.h>
 +-#include <OpenCL/cl_gl_ext.h>
 +-#include <OpenCL/cl_ext.h>
 +-
 +-#else
 +-
 +-#include <CL/cl.h>
 +-#include <CL/cl_gl.h>
 +-#include <CL/cl_gl_ext.h>
 +-#include <CL/cl_ext.h>
 +-
 +-#endif
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __OPENCL_H   */
 +-
 ++#include_next <CL/opencl.h>
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl_d3d10.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_d3d10.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_d3d10.h	2013-05-21 10:41:03.323941820 +0200
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_d3d10.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl_d3d10.h	2013-06-19 21:04:33.470666953 +0200
 +@@ -1,126 +1 @@
 +-/**********************************************************************************
- - * Copyright (c) 2008-2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
 +-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 +-
 +-#ifndef __OPENCL_CL_D3D10_H
 +-#define __OPENCL_CL_D3D10_H
 +-
 +-#include <d3d10.h>
 +-#include <CL/cl.h>
 +-#include <CL/cl_platform.h>
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-/******************************************************************************
 +- * cl_khr_d3d10_sharing                                                       */
 +-#define cl_khr_d3d10_sharing 1
 +-
 +-typedef cl_uint cl_d3d10_device_source_khr;
 +-typedef cl_uint cl_d3d10_device_set_khr;
 +-
 +-/******************************************************************************/
 +-
 +-// Error Codes
 +-#define CL_INVALID_D3D10_DEVICE_KHR                  -1002
 +-#define CL_INVALID_D3D10_RESOURCE_KHR                -1003
 +-#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR       -1004
 +-#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR           -1005
 +-
 +-// cl_d3d10_device_source_nv
 +-#define CL_D3D10_DEVICE_KHR                          0x4010
 +-#define CL_D3D10_DXGI_ADAPTER_KHR                    0x4011
 +-
 +-// cl_d3d10_device_set_nv
 +-#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR           0x4012
 +-#define CL_ALL_DEVICES_FOR_D3D10_KHR                 0x4013
 +-
 +-// cl_context_info
 +-#define CL_CONTEXT_D3D10_DEVICE_KHR                  0x4014
 +-#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
 +-
 +-// cl_mem_info
 +-#define CL_MEM_D3D10_RESOURCE_KHR                    0x4015
 +-
 +-// cl_image_info
 +-#define CL_IMAGE_D3D10_SUBRESOURCE_KHR               0x4016
 +-
 +-// cl_command_type
 +-#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR         0x4017
 +-#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR         0x4018
 +-
 +-/******************************************************************************/
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
 +-    cl_platform_id             platform,
 +-    cl_d3d10_device_source_khr d3d_device_source,
 +-    void *                     d3d_object,
 +-    cl_d3d10_device_set_khr    d3d_device_set,
 +-    cl_uint                    num_entries,
 +-    cl_device_id *             devices,
 +-    cl_uint *                  num_devices) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
 +-    cl_context     context,
 +-    cl_mem_flags   flags,
 +-    ID3D10Buffer * resource,
 +-    cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
 +-    cl_context        context,
 +-    cl_mem_flags      flags,
 +-    ID3D10Texture2D * resource,
 +-    UINT              subresource,
 +-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
 +-    cl_context        context,
 +-    cl_mem_flags      flags,
 +-    ID3D10Texture3D * resource,
 +-    UINT              subresource,
 +-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
 +-    cl_command_queue command_queue,
 +-    cl_uint          num_objects,
 +-    const cl_mem *   mem_objects,
 +-    cl_uint          num_events_in_wait_list,
 +-    const cl_event * event_wait_list,
 +-    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
 +-    cl_command_queue command_queue,
 +-    cl_uint          num_objects,
- -    const cl_mem *   mem_objects,
++-    cl_mem *         mem_objects,
 +-    cl_uint          num_events_in_wait_list,
 +-    const cl_event * event_wait_list,
 +-    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  // __OPENCL_CL_D3D10_H
 +-
 ++#include_next <CL/cl_d3d10.h>
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/cl.h	2013-05-21 10:41:03.327941820 +0200
- @@ -1,1214 +1 @@
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl.h	2013-06-19 21:04:33.474666953 +0200
++@@ -1,998 +1 @@
 +-/*******************************************************************************
- - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
++-/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
++-
 +-#ifndef __OPENCL_CL_H
 +-#define __OPENCL_CL_H
 +-
 +-#ifdef __APPLE__
 +-#include <OpenCL/cl_platform.h>
 +-#else
 +-#include <CL/cl_platform.h>
 +-#endif	
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-/******************************************************************************/
 +-
 +-typedef struct _cl_platform_id *    cl_platform_id;
 +-typedef struct _cl_device_id *      cl_device_id;
 +-typedef struct _cl_context *        cl_context;
 +-typedef struct _cl_command_queue *  cl_command_queue;
 +-typedef struct _cl_mem *            cl_mem;
 +-typedef struct _cl_program *        cl_program;
 +-typedef struct _cl_kernel *         cl_kernel;
 +-typedef struct _cl_event *          cl_event;
 +-typedef struct _cl_sampler *        cl_sampler;
 +-
 +-typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 
 +-typedef cl_ulong            cl_bitfield;
 +-typedef cl_bitfield         cl_device_type;
 +-typedef cl_uint             cl_platform_info;
 +-typedef cl_uint             cl_device_info;
 +-typedef cl_bitfield         cl_device_fp_config;
 +-typedef cl_uint             cl_device_mem_cache_type;
 +-typedef cl_uint             cl_device_local_mem_type;
 +-typedef cl_bitfield         cl_device_exec_capabilities;
 +-typedef cl_bitfield         cl_command_queue_properties;
- -typedef intptr_t            cl_device_partition_property;
- -typedef cl_bitfield         cl_device_affinity_domain;
 +-
- -typedef intptr_t            cl_context_properties;
++-typedef intptr_t			cl_context_properties;
 +-typedef cl_uint             cl_context_info;
 +-typedef cl_uint             cl_command_queue_info;
 +-typedef cl_uint             cl_channel_order;
 +-typedef cl_uint             cl_channel_type;
 +-typedef cl_bitfield         cl_mem_flags;
 +-typedef cl_uint             cl_mem_object_type;
 +-typedef cl_uint             cl_mem_info;
- -typedef cl_bitfield         cl_mem_migration_flags;
 +-typedef cl_uint             cl_image_info;
 +-typedef cl_uint             cl_buffer_create_type;
 +-typedef cl_uint             cl_addressing_mode;
 +-typedef cl_uint             cl_filter_mode;
 +-typedef cl_uint             cl_sampler_info;
 +-typedef cl_bitfield         cl_map_flags;
 +-typedef cl_uint             cl_program_info;
 +-typedef cl_uint             cl_program_build_info;
- -typedef cl_uint             cl_program_binary_type;
 +-typedef cl_int              cl_build_status;
 +-typedef cl_uint             cl_kernel_info;
- -typedef cl_uint             cl_kernel_arg_info;
- -typedef cl_uint             cl_kernel_arg_address_qualifier;
- -typedef cl_uint             cl_kernel_arg_access_qualifier;
- -typedef cl_bitfield         cl_kernel_arg_type_qualifier;
 +-typedef cl_uint             cl_kernel_work_group_info;
 +-typedef cl_uint             cl_event_info;
 +-typedef cl_uint             cl_command_type;
 +-typedef cl_uint             cl_profiling_info;
 +-
- -
 +-typedef struct _cl_image_format {
 +-    cl_channel_order        image_channel_order;
 +-    cl_channel_type         image_channel_data_type;
 +-} cl_image_format;
 +-
- -typedef struct _cl_image_desc {
- -    cl_mem_object_type      image_type;
- -    size_t                  image_width;
- -    size_t                  image_height;
- -    size_t                  image_depth;
- -    size_t                  image_array_size;
- -    size_t                  image_row_pitch;
- -    size_t                  image_slice_pitch;
- -    cl_uint                 num_mip_levels;
- -    cl_uint                 num_samples;
- -    cl_mem                  buffer;
- -} cl_image_desc;
 +-
 +-typedef struct _cl_buffer_region {
 +-    size_t                  origin;
 +-    size_t                  size;
 +-} cl_buffer_region;
 +-
- -
 +-/******************************************************************************/
 +-
 +-/* Error Codes */
 +-#define CL_SUCCESS                                  0
 +-#define CL_DEVICE_NOT_FOUND                         -1
 +-#define CL_DEVICE_NOT_AVAILABLE                     -2
 +-#define CL_COMPILER_NOT_AVAILABLE                   -3
 +-#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
 +-#define CL_OUT_OF_RESOURCES                         -5
 +-#define CL_OUT_OF_HOST_MEMORY                       -6
 +-#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
 +-#define CL_MEM_COPY_OVERLAP                         -8
 +-#define CL_IMAGE_FORMAT_MISMATCH                    -9
 +-#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
 +-#define CL_BUILD_PROGRAM_FAILURE                    -11
 +-#define CL_MAP_FAILURE                              -12
 +-#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
 +-#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
- -#define CL_COMPILE_PROGRAM_FAILURE                  -15
- -#define CL_LINKER_NOT_AVAILABLE                     -16
- -#define CL_LINK_PROGRAM_FAILURE                     -17
- -#define CL_DEVICE_PARTITION_FAILED                  -18
- -#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
 +-
 +-#define CL_INVALID_VALUE                            -30
 +-#define CL_INVALID_DEVICE_TYPE                      -31
 +-#define CL_INVALID_PLATFORM                         -32
 +-#define CL_INVALID_DEVICE                           -33
 +-#define CL_INVALID_CONTEXT                          -34
 +-#define CL_INVALID_QUEUE_PROPERTIES                 -35
 +-#define CL_INVALID_COMMAND_QUEUE                    -36
 +-#define CL_INVALID_HOST_PTR                         -37
 +-#define CL_INVALID_MEM_OBJECT                       -38
 +-#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
 +-#define CL_INVALID_IMAGE_SIZE                       -40
 +-#define CL_INVALID_SAMPLER                          -41
 +-#define CL_INVALID_BINARY                           -42
 +-#define CL_INVALID_BUILD_OPTIONS                    -43
 +-#define CL_INVALID_PROGRAM                          -44
 +-#define CL_INVALID_PROGRAM_EXECUTABLE               -45
 +-#define CL_INVALID_KERNEL_NAME                      -46
 +-#define CL_INVALID_KERNEL_DEFINITION                -47
 +-#define CL_INVALID_KERNEL                           -48
 +-#define CL_INVALID_ARG_INDEX                        -49
 +-#define CL_INVALID_ARG_VALUE                        -50
 +-#define CL_INVALID_ARG_SIZE                         -51
 +-#define CL_INVALID_KERNEL_ARGS                      -52
 +-#define CL_INVALID_WORK_DIMENSION                   -53
 +-#define CL_INVALID_WORK_GROUP_SIZE                  -54
 +-#define CL_INVALID_WORK_ITEM_SIZE                   -55
 +-#define CL_INVALID_GLOBAL_OFFSET                    -56
 +-#define CL_INVALID_EVENT_WAIT_LIST                  -57
 +-#define CL_INVALID_EVENT                            -58
 +-#define CL_INVALID_OPERATION                        -59
 +-#define CL_INVALID_GL_OBJECT                        -60
 +-#define CL_INVALID_BUFFER_SIZE                      -61
 +-#define CL_INVALID_MIP_LEVEL                        -62
 +-#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
 +-#define CL_INVALID_PROPERTY                         -64
- -#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
- -#define CL_INVALID_COMPILER_OPTIONS                 -66
- -#define CL_INVALID_LINKER_OPTIONS                   -67
- -#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
 +-
 +-/* OpenCL Version */
 +-#define CL_VERSION_1_0                              1
 +-#define CL_VERSION_1_1                              1
- -#define CL_VERSION_1_2                              1
 +-
 +-/* cl_bool */
 +-#define CL_FALSE                                    0
 +-#define CL_TRUE                                     1
- -#define CL_BLOCKING                                 CL_TRUE
- -#define CL_NON_BLOCKING                             CL_FALSE
 +-
 +-/* cl_platform_info */
 +-#define CL_PLATFORM_PROFILE                         0x0900
 +-#define CL_PLATFORM_VERSION                         0x0901
 +-#define CL_PLATFORM_NAME                            0x0902
 +-#define CL_PLATFORM_VENDOR                          0x0903
 +-#define CL_PLATFORM_EXTENSIONS                      0x0904
 +-
 +-/* cl_device_type - bitfield */
 +-#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
 +-#define CL_DEVICE_TYPE_CPU                          (1 << 1)
 +-#define CL_DEVICE_TYPE_GPU                          (1 << 2)
 +-#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
- -#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
 +-#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
 +-
 +-/* cl_device_info */
 +-#define CL_DEVICE_TYPE                              0x1000
 +-#define CL_DEVICE_VENDOR_ID                         0x1001
 +-#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
 +-#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
 +-#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
 +-#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
 +-#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
 +-#define CL_DEVICE_ADDRESS_BITS                      0x100D
 +-#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
 +-#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
 +-#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
 +-#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
 +-#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
 +-#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
 +-#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
 +-#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
 +-#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
 +-#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
 +-#define CL_DEVICE_MAX_SAMPLERS                      0x1018
 +-#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
 +-#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
 +-#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
 +-#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
 +-#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
 +-#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
 +-#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
 +-#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
 +-#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
 +-#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
 +-#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
 +-#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
 +-#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
 +-#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
 +-#define CL_DEVICE_AVAILABLE                         0x1027
 +-#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
 +-#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
 +-#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
 +-#define CL_DEVICE_NAME                              0x102B
 +-#define CL_DEVICE_VENDOR                            0x102C
 +-#define CL_DRIVER_VERSION                           0x102D
 +-#define CL_DEVICE_PROFILE                           0x102E
 +-#define CL_DEVICE_VERSION                           0x102F
 +-#define CL_DEVICE_EXTENSIONS                        0x1030
 +-#define CL_DEVICE_PLATFORM                          0x1031
- -#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
++-/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
 +-/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
 +-#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
 +-#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
- -#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
- -#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
- -#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
- -#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
- -#define CL_DEVICE_PARENT_DEVICE                     0x1042
- -#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
- -#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
- -#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
- -#define CL_DEVICE_PARTITION_TYPE                    0x1046
- -#define CL_DEVICE_REFERENCE_COUNT                   0x1047
- -#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
- -#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
- -#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
- -#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
 +-
 +-/* cl_device_fp_config - bitfield */
 +-#define CL_FP_DENORM                                (1 << 0)
 +-#define CL_FP_INF_NAN                               (1 << 1)
 +-#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
 +-#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
 +-#define CL_FP_ROUND_TO_INF                          (1 << 4)
 +-#define CL_FP_FMA                                   (1 << 5)
 +-#define CL_FP_SOFT_FLOAT                            (1 << 6)
- -#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
 +-
 +-/* cl_device_mem_cache_type */
 +-#define CL_NONE                                     0x0
 +-#define CL_READ_ONLY_CACHE                          0x1
 +-#define CL_READ_WRITE_CACHE                         0x2
 +-
 +-/* cl_device_local_mem_type */
 +-#define CL_LOCAL                                    0x1
 +-#define CL_GLOBAL                                   0x2
 +-
 +-/* cl_device_exec_capabilities - bitfield */
 +-#define CL_EXEC_KERNEL                              (1 << 0)
 +-#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
 +-
 +-/* cl_command_queue_properties - bitfield */
 +-#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
 +-#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
 +-
 +-/* cl_context_info  */
 +-#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
 +-#define CL_CONTEXT_DEVICES                          0x1081
 +-#define CL_CONTEXT_PROPERTIES                       0x1082
 +-#define CL_CONTEXT_NUM_DEVICES                      0x1083
 +-
- -/* cl_context_properties */
++-/* cl_context_info + cl_context_properties */
 +-#define CL_CONTEXT_PLATFORM                         0x1084
- -#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
- -    
- -/* cl_device_partition_property */
- -#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
- -#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
- -#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
- -#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
- -    
- -/* cl_device_affinity_domain */
- -#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
- -#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
- -#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
- -#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
- -#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
- -#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
 +-
 +-/* cl_command_queue_info */
 +-#define CL_QUEUE_CONTEXT                            0x1090
 +-#define CL_QUEUE_DEVICE                             0x1091
 +-#define CL_QUEUE_REFERENCE_COUNT                    0x1092
 +-#define CL_QUEUE_PROPERTIES                         0x1093
 +-
 +-/* cl_mem_flags - bitfield */
 +-#define CL_MEM_READ_WRITE                           (1 << 0)
 +-#define CL_MEM_WRITE_ONLY                           (1 << 1)
 +-#define CL_MEM_READ_ONLY                            (1 << 2)
 +-#define CL_MEM_USE_HOST_PTR                         (1 << 3)
 +-#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
 +-#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
- -// reserved                                         (1 << 6)    
- -#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
- -#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
- -#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
- -
- -/* cl_mem_migration_flags - bitfield */
- -#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
- -#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
 +-
 +-/* cl_channel_order */
 +-#define CL_R                                        0x10B0
 +-#define CL_A                                        0x10B1
 +-#define CL_RG                                       0x10B2
 +-#define CL_RA                                       0x10B3
 +-#define CL_RGB                                      0x10B4
 +-#define CL_RGBA                                     0x10B5
 +-#define CL_BGRA                                     0x10B6
 +-#define CL_ARGB                                     0x10B7
 +-#define CL_INTENSITY                                0x10B8
 +-#define CL_LUMINANCE                                0x10B9
 +-#define CL_Rx                                       0x10BA
 +-#define CL_RGx                                      0x10BB
 +-#define CL_RGBx                                     0x10BC
- -#define CL_DEPTH                                    0x10BD
- -#define CL_DEPTH_STENCIL                            0x10BE
 +-
 +-/* cl_channel_type */
 +-#define CL_SNORM_INT8                               0x10D0
 +-#define CL_SNORM_INT16                              0x10D1
 +-#define CL_UNORM_INT8                               0x10D2
 +-#define CL_UNORM_INT16                              0x10D3
 +-#define CL_UNORM_SHORT_565                          0x10D4
 +-#define CL_UNORM_SHORT_555                          0x10D5
 +-#define CL_UNORM_INT_101010                         0x10D6
 +-#define CL_SIGNED_INT8                              0x10D7
 +-#define CL_SIGNED_INT16                             0x10D8
 +-#define CL_SIGNED_INT32                             0x10D9
 +-#define CL_UNSIGNED_INT8                            0x10DA
 +-#define CL_UNSIGNED_INT16                           0x10DB
 +-#define CL_UNSIGNED_INT32                           0x10DC
 +-#define CL_HALF_FLOAT                               0x10DD
 +-#define CL_FLOAT                                    0x10DE
- -#define CL_UNORM_INT24                              0x10DF
 +-
 +-/* cl_mem_object_type */
 +-#define CL_MEM_OBJECT_BUFFER                        0x10F0
 +-#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
 +-#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
- -#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
- -#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
- -#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
- -#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
 +-
 +-/* cl_mem_info */
 +-#define CL_MEM_TYPE                                 0x1100
 +-#define CL_MEM_FLAGS                                0x1101
 +-#define CL_MEM_SIZE                                 0x1102
 +-#define CL_MEM_HOST_PTR                             0x1103
 +-#define CL_MEM_MAP_COUNT                            0x1104
 +-#define CL_MEM_REFERENCE_COUNT                      0x1105
 +-#define CL_MEM_CONTEXT                              0x1106
 +-#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
 +-#define CL_MEM_OFFSET                               0x1108
 +-
 +-/* cl_image_info */
 +-#define CL_IMAGE_FORMAT                             0x1110
 +-#define CL_IMAGE_ELEMENT_SIZE                       0x1111
 +-#define CL_IMAGE_ROW_PITCH                          0x1112
 +-#define CL_IMAGE_SLICE_PITCH                        0x1113
 +-#define CL_IMAGE_WIDTH                              0x1114
 +-#define CL_IMAGE_HEIGHT                             0x1115
 +-#define CL_IMAGE_DEPTH                              0x1116
- -#define CL_IMAGE_ARRAY_SIZE                         0x1117
- -#define CL_IMAGE_BUFFER                             0x1118
- -#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
- -#define CL_IMAGE_NUM_SAMPLES                        0x111A
 +-
 +-/* cl_addressing_mode */
 +-#define CL_ADDRESS_NONE                             0x1130
 +-#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
 +-#define CL_ADDRESS_CLAMP                            0x1132
 +-#define CL_ADDRESS_REPEAT                           0x1133
 +-#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
 +-
 +-/* cl_filter_mode */
 +-#define CL_FILTER_NEAREST                           0x1140
 +-#define CL_FILTER_LINEAR                            0x1141
 +-
 +-/* cl_sampler_info */
 +-#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
 +-#define CL_SAMPLER_CONTEXT                          0x1151
 +-#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
 +-#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
 +-#define CL_SAMPLER_FILTER_MODE                      0x1154
 +-
 +-/* cl_map_flags - bitfield */
 +-#define CL_MAP_READ                                 (1 << 0)
 +-#define CL_MAP_WRITE                                (1 << 1)
- -#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
 +-
 +-/* cl_program_info */
 +-#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
 +-#define CL_PROGRAM_CONTEXT                          0x1161
 +-#define CL_PROGRAM_NUM_DEVICES                      0x1162
 +-#define CL_PROGRAM_DEVICES                          0x1163
 +-#define CL_PROGRAM_SOURCE                           0x1164
 +-#define CL_PROGRAM_BINARY_SIZES                     0x1165
 +-#define CL_PROGRAM_BINARIES                         0x1166
- -#define CL_PROGRAM_NUM_KERNELS                      0x1167
- -#define CL_PROGRAM_KERNEL_NAMES                     0x1168
 +-
 +-/* cl_program_build_info */
 +-#define CL_PROGRAM_BUILD_STATUS                     0x1181
 +-#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
 +-#define CL_PROGRAM_BUILD_LOG                        0x1183
- -#define CL_PROGRAM_BINARY_TYPE                      0x1184
- -    
- -/* cl_program_binary_type */
- -#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
- -#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
- -#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
- -#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
 +-
 +-/* cl_build_status */
 +-#define CL_BUILD_SUCCESS                            0
 +-#define CL_BUILD_NONE                               -1
 +-#define CL_BUILD_ERROR                              -2
 +-#define CL_BUILD_IN_PROGRESS                        -3
 +-
 +-/* cl_kernel_info */
 +-#define CL_KERNEL_FUNCTION_NAME                     0x1190
 +-#define CL_KERNEL_NUM_ARGS                          0x1191
 +-#define CL_KERNEL_REFERENCE_COUNT                   0x1192
 +-#define CL_KERNEL_CONTEXT                           0x1193
 +-#define CL_KERNEL_PROGRAM                           0x1194
- -#define CL_KERNEL_ATTRIBUTES                        0x1195
- -
- -/* cl_kernel_arg_info */
- -#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
- -#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
- -#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
- -#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
- -#define CL_KERNEL_ARG_NAME                          0x119A
- -
- -/* cl_kernel_arg_address_qualifier */
- -#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
- -#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
- -#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
- -#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
- -
- -/* cl_kernel_arg_access_qualifier */
- -#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
- -#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
- -#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
- -#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
- -    
- -/* cl_kernel_arg_type_qualifer */
- -#define CL_KERNEL_ARG_TYPE_NONE                     0
- -#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
- -#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
- -#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
 +-
 +-/* cl_kernel_work_group_info */
 +-#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
 +-#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
 +-#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
 +-#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
 +-#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
- -#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
 +-
 +-/* cl_event_info  */
 +-#define CL_EVENT_COMMAND_QUEUE                      0x11D0
 +-#define CL_EVENT_COMMAND_TYPE                       0x11D1
 +-#define CL_EVENT_REFERENCE_COUNT                    0x11D2
 +-#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
 +-#define CL_EVENT_CONTEXT                            0x11D4
 +-
 +-/* cl_command_type */
 +-#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
 +-#define CL_COMMAND_TASK                             0x11F1
 +-#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
 +-#define CL_COMMAND_READ_BUFFER                      0x11F3
 +-#define CL_COMMAND_WRITE_BUFFER                     0x11F4
 +-#define CL_COMMAND_COPY_BUFFER                      0x11F5
 +-#define CL_COMMAND_READ_IMAGE                       0x11F6
 +-#define CL_COMMAND_WRITE_IMAGE                      0x11F7
 +-#define CL_COMMAND_COPY_IMAGE                       0x11F8
 +-#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
 +-#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
 +-#define CL_COMMAND_MAP_BUFFER                       0x11FB
 +-#define CL_COMMAND_MAP_IMAGE                        0x11FC
 +-#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
 +-#define CL_COMMAND_MARKER                           0x11FE
 +-#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
 +-#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
 +-#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
 +-#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
 +-#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
 +-#define CL_COMMAND_USER                             0x1204
- -#define CL_COMMAND_BARRIER                          0x1205
- -#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
- -#define CL_COMMAND_FILL_BUFFER                      0x1207
- -#define CL_COMMAND_FILL_IMAGE                       0x1208
 +-
 +-/* command execution status */
 +-#define CL_COMPLETE                                 0x0
 +-#define CL_RUNNING                                  0x1
 +-#define CL_SUBMITTED                                0x2
 +-#define CL_QUEUED                                   0x3
- -
++-  
 +-/* cl_buffer_create_type  */
 +-#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
 +-
 +-/* cl_profiling_info  */
 +-#define CL_PROFILING_COMMAND_QUEUED                 0x1280
 +-#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
 +-#define CL_PROFILING_COMMAND_START                  0x1282
 +-#define CL_PROFILING_COMMAND_END                    0x1283
 +-
 +-/********************************************************************************************************/
 +-
 +-/* Platform API */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetPlatformIDs(cl_uint          /* num_entries */,
 +-                 cl_platform_id * /* platforms */,
 +-                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL 
 +-clGetPlatformInfo(cl_platform_id   /* platform */, 
 +-                  cl_platform_info /* param_name */,
 +-                  size_t           /* param_value_size */, 
 +-                  void *           /* param_value */,
 +-                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Device APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetDeviceIDs(cl_platform_id   /* platform */,
 +-               cl_device_type   /* device_type */, 
 +-               cl_uint          /* num_entries */, 
 +-               cl_device_id *   /* devices */, 
 +-               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetDeviceInfo(cl_device_id    /* device */,
 +-                cl_device_info  /* param_name */, 
 +-                size_t          /* param_value_size */, 
 +-                void *          /* param_value */,
 +-                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
- -    
- -extern CL_API_ENTRY cl_int CL_API_CALL
- -clCreateSubDevices(cl_device_id                         /* in_device */,
- -                   const cl_device_partition_property * /* properties */,
- -                   cl_uint                              /* num_devices */,
- -                   cl_device_id *                       /* out_devices */,
- -                   cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-
- -extern CL_API_ENTRY cl_int CL_API_CALL
- -clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
- -    
- -extern CL_API_ENTRY cl_int CL_API_CALL
- -clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
- -    
 +-/* Context APIs  */
 +-extern CL_API_ENTRY cl_context CL_API_CALL
 +-clCreateContext(const cl_context_properties * /* properties */,
- -                cl_uint                 /* num_devices */,
- -                const cl_device_id *    /* devices */,
++-                cl_uint                       /* num_devices */,
++-                const cl_device_id *          /* devices */,
 +-                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
- -                void *                  /* user_data */,
- -                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
++-                void *                        /* user_data */,
++-                cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_context CL_API_CALL
 +-clCreateContextFromType(const cl_context_properties * /* properties */,
- -                        cl_device_type          /* device_type */,
++-                        cl_device_type                /* device_type */,
 +-                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
- -                        void *                  /* user_data */,
- -                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
++-                        void *                        /* user_data */,
++-                        cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetContextInfo(cl_context         /* context */, 
 +-                 cl_context_info    /* param_name */, 
 +-                 size_t             /* param_value_size */, 
 +-                 void *             /* param_value */, 
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Command Queue APIs */
 +-extern CL_API_ENTRY cl_command_queue CL_API_CALL
 +-clCreateCommandQueue(cl_context                     /* context */, 
 +-                     cl_device_id                   /* device */, 
 +-                     cl_command_queue_properties    /* properties */,
 +-                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
 +-                      cl_command_queue_info /* param_name */,
 +-                      size_t                /* param_value_size */,
 +-                      void *                /* param_value */,
 +-                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
++-#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
++-#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
++-/* 
++- *  WARNING:
++- *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
++- *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
++- *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
++- *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
++- *
++- *  Software developers previously relying on this API are instructed to set the command queue 
++- *  properties when creating the queue, instead. 
++- */
++-extern CL_API_ENTRY cl_int CL_API_CALL
++-clSetCommandQueueProperty(cl_command_queue              /* command_queue */,
++-                          cl_command_queue_properties   /* properties */, 
++-                          cl_bool                        /* enable */,
++-                          cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
++-#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
++-
 +-/* Memory Object APIs */
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateBuffer(cl_context   /* context */,
 +-               cl_mem_flags /* flags */,
 +-               size_t       /* size */,
 +-               void *       /* host_ptr */,
 +-               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateSubBuffer(cl_mem                   /* buffer */,
 +-                  cl_mem_flags             /* flags */,
 +-                  cl_buffer_create_type    /* buffer_create_type */,
 +-                  const void *             /* buffer_create_info */,
 +-                  cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
- -clCreateImage(cl_context              /* context */,
- -              cl_mem_flags            /* flags */,
- -              const cl_image_format * /* image_format */,
- -              const cl_image_desc *   /* image_desc */, 
- -              void *                  /* host_ptr */,
- -              cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
++-clCreateImage2D(cl_context              /* context */,
++-                cl_mem_flags            /* flags */,
++-                const cl_image_format * /* image_format */,
++-                size_t                  /* image_width */,
++-                size_t                  /* image_height */,
++-                size_t                  /* image_row_pitch */, 
++-                void *                  /* host_ptr */,
++-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
++-                        
++-extern CL_API_ENTRY cl_mem CL_API_CALL
++-clCreateImage3D(cl_context              /* context */,
++-                cl_mem_flags            /* flags */,
++-                const cl_image_format * /* image_format */,
++-                size_t                  /* image_width */, 
++-                size_t                  /* image_height */,
++-                size_t                  /* image_depth */, 
++-                size_t                  /* image_row_pitch */, 
++-                size_t                  /* image_slice_pitch */, 
++-                void *                  /* host_ptr */,
++-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                        
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetSupportedImageFormats(cl_context           /* context */,
 +-                           cl_mem_flags         /* flags */,
 +-                           cl_mem_object_type   /* image_type */,
 +-                           cl_uint              /* num_entries */,
 +-                           cl_image_format *    /* image_formats */,
 +-                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
 +-                                    
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetMemObjectInfo(cl_mem           /* memobj */,
 +-                   cl_mem_info      /* param_name */, 
 +-                   size_t           /* param_value_size */,
 +-                   void *           /* param_value */,
 +-                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetImageInfo(cl_mem           /* image */,
 +-               cl_image_info    /* param_name */, 
 +-               size_t           /* param_value_size */,
 +-               void *           /* param_value */,
 +-               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetMemObjectDestructorCallback(  cl_mem /* memobj */, 
 +-                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
 +-                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
 +-
- -/* Sampler APIs */
++-/* Sampler APIs  */
 +-extern CL_API_ENTRY cl_sampler CL_API_CALL
 +-clCreateSampler(cl_context          /* context */,
 +-                cl_bool             /* normalized_coords */, 
 +-                cl_addressing_mode  /* addressing_mode */, 
 +-                cl_filter_mode      /* filter_mode */,
 +-                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetSamplerInfo(cl_sampler         /* sampler */,
 +-                 cl_sampler_info    /* param_name */,
 +-                 size_t             /* param_value_size */,
 +-                 void *             /* param_value */,
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-/* Program Object APIs  */
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clCreateProgramWithSource(cl_context        /* context */,
 +-                          cl_uint           /* count */,
 +-                          const char **     /* strings */,
 +-                          const size_t *    /* lengths */,
 +-                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clCreateProgramWithBinary(cl_context                     /* context */,
 +-                          cl_uint                        /* num_devices */,
 +-                          const cl_device_id *           /* device_list */,
 +-                          const size_t *                 /* lengths */,
 +-                          const unsigned char **         /* binaries */,
 +-                          cl_int *                       /* binary_status */,
 +-                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
- -extern CL_API_ENTRY cl_program CL_API_CALL
- -clCreateProgramWithBuiltInKernels(cl_context            /* context */,
- -                                  cl_uint               /* num_devices */,
- -                                  const cl_device_id *  /* device_list */,
- -                                  const char *          /* kernel_names */,
- -                                  cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
- -
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clBuildProgram(cl_program           /* program */,
 +-               cl_uint              /* num_devices */,
 +-               const cl_device_id * /* device_list */,
 +-               const char *         /* options */, 
 +-               void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
 +-               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clCompileProgram(cl_program           /* program */,
- -                 cl_uint              /* num_devices */,
- -                 const cl_device_id * /* device_list */,
- -                 const char *         /* options */, 
- -                 cl_uint              /* num_input_headers */,
- -                 const cl_program *   /* input_headers */,
- -                 const char **        /* header_include_names */,
- -                 void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- -                 void *               /* user_data */) CL_API_SUFFIX__VERSION_1_2;
- -
- -extern CL_API_ENTRY cl_program CL_API_CALL
- -clLinkProgram(cl_context           /* context */,
- -              cl_uint              /* num_devices */,
- -              const cl_device_id * /* device_list */,
- -              const char *         /* options */, 
- -              cl_uint              /* num_input_programs */,
- -              const cl_program *   /* input_programs */,
- -              void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
- -              void *               /* user_data */,
- -              cl_int *             /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
- -
- -
- -extern CL_API_ENTRY cl_int CL_API_CALL
- -clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
++-clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetProgramInfo(cl_program         /* program */,
 +-                 cl_program_info    /* param_name */,
 +-                 size_t             /* param_value_size */,
 +-                 void *             /* param_value */,
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetProgramBuildInfo(cl_program            /* program */,
 +-                      cl_device_id          /* device */,
 +-                      cl_program_build_info /* param_name */,
 +-                      size_t                /* param_value_size */,
 +-                      void *                /* param_value */,
 +-                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-/* Kernel Object APIs */
 +-extern CL_API_ENTRY cl_kernel CL_API_CALL
 +-clCreateKernel(cl_program      /* program */,
 +-               const char *    /* kernel_name */,
 +-               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clCreateKernelsInProgram(cl_program     /* program */,
 +-                         cl_uint        /* num_kernels */,
 +-                         cl_kernel *    /* kernels */,
 +-                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainKernel(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetKernelArg(cl_kernel    /* kernel */,
 +-               cl_uint      /* arg_index */,
 +-               size_t       /* arg_size */,
 +-               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetKernelInfo(cl_kernel       /* kernel */,
 +-                cl_kernel_info  /* param_name */,
 +-                size_t          /* param_value_size */,
 +-                void *          /* param_value */,
 +-                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clGetKernelArgInfo(cl_kernel       /* kernel */,
- -                   cl_uint         /* arg_indx */,
- -                   cl_kernel_arg_info  /* param_name */,
- -                   size_t          /* param_value_size */,
- -                   void *          /* param_value */,
- -                   size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
- -
- -extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
 +-                         cl_device_id               /* device */,
 +-                         cl_kernel_work_group_info  /* param_name */,
 +-                         size_t                     /* param_value_size */,
 +-                         void *                     /* param_value */,
 +-                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
- -/* Event Object APIs */
++-/* Event Object APIs  */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clWaitForEvents(cl_uint             /* num_events */,
 +-                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetEventInfo(cl_event         /* event */,
 +-               cl_event_info    /* param_name */,
 +-               size_t           /* param_value_size */,
 +-               void *           /* param_value */,
 +-               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_event CL_API_CALL
 +-clCreateUserEvent(cl_context    /* context */,
 +-                  cl_int *      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;               
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetUserEventStatus(cl_event   /* event */,
 +-                     cl_int     /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
 +-                     
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetEventCallback( cl_event    /* event */,
 +-                    cl_int      /* command_exec_callback_type */,
 +-                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
 +-                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
 +-
- -/* Profiling APIs */
++-/* Profiling APIs  */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetEventProfilingInfo(cl_event            /* event */,
 +-                        cl_profiling_info   /* param_name */,
 +-                        size_t              /* param_value_size */,
 +-                        void *              /* param_value */,
 +-                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                                
 +-/* Flush and Finish APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Enqueued Commands APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
 +-                    cl_mem              /* buffer */,
 +-                    cl_bool             /* blocking_read */,
 +-                    size_t              /* offset */,
- -                    size_t              /* size */, 
++-                    size_t              /* cb */, 
 +-                    void *              /* ptr */,
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
 +-                        cl_mem              /* buffer */,
 +-                        cl_bool             /* blocking_read */,
- -                        const size_t *      /* buffer_offset */,
- -                        const size_t *      /* host_offset */, 
++-                        const size_t *      /* buffer_origin */,
++-                        const size_t *      /* host_origin */, 
 +-                        const size_t *      /* region */,
 +-                        size_t              /* buffer_row_pitch */,
 +-                        size_t              /* buffer_slice_pitch */,
 +-                        size_t              /* host_row_pitch */,
 +-                        size_t              /* host_slice_pitch */,                        
 +-                        void *              /* ptr */,
 +-                        cl_uint             /* num_events_in_wait_list */,
 +-                        const cl_event *    /* event_wait_list */,
 +-                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteBuffer(cl_command_queue   /* command_queue */, 
 +-                     cl_mem             /* buffer */, 
 +-                     cl_bool            /* blocking_write */, 
 +-                     size_t             /* offset */, 
- -                     size_t             /* size */, 
++-                     size_t             /* cb */, 
 +-                     const void *       /* ptr */, 
 +-                     cl_uint            /* num_events_in_wait_list */, 
 +-                     const cl_event *   /* event_wait_list */, 
 +-                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
 +-                         cl_mem              /* buffer */,
 +-                         cl_bool             /* blocking_write */,
- -                         const size_t *      /* buffer_offset */,
- -                         const size_t *      /* host_offset */, 
++-                         const size_t *      /* buffer_origin */,
++-                         const size_t *      /* host_origin */, 
 +-                         const size_t *      /* region */,
 +-                         size_t              /* buffer_row_pitch */,
 +-                         size_t              /* buffer_slice_pitch */,
 +-                         size_t              /* host_row_pitch */,
 +-                         size_t              /* host_slice_pitch */,                        
 +-                         const void *        /* ptr */,
 +-                         cl_uint             /* num_events_in_wait_list */,
 +-                         const cl_event *    /* event_wait_list */,
 +-                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clEnqueueFillBuffer(cl_command_queue   /* command_queue */,
- -                    cl_mem             /* buffer */, 
- -                    const void *       /* pattern */, 
- -                    size_t             /* pattern_size */, 
- -                    size_t             /* offset */, 
- -                    size_t             /* size */, 
- -                    cl_uint            /* num_events_in_wait_list */, 
- -                    const cl_event *   /* event_wait_list */, 
- -                    cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
- -                            
- -extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
 +-                    cl_mem              /* src_buffer */,
 +-                    cl_mem              /* dst_buffer */, 
 +-                    size_t              /* src_offset */,
 +-                    size_t              /* dst_offset */,
- -                    size_t              /* size */, 
++-                    size_t              /* cb */, 
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBufferRect(cl_command_queue    /* command_queue */, 
 +-                        cl_mem              /* src_buffer */,
 +-                        cl_mem              /* dst_buffer */, 
 +-                        const size_t *      /* src_origin */,
 +-                        const size_t *      /* dst_origin */,
 +-                        const size_t *      /* region */, 
 +-                        size_t              /* src_row_pitch */,
 +-                        size_t              /* src_slice_pitch */,
 +-                        size_t              /* dst_row_pitch */,
 +-                        size_t              /* dst_slice_pitch */,
 +-                        cl_uint             /* num_events_in_wait_list */,
 +-                        const cl_event *    /* event_wait_list */,
 +-                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadImage(cl_command_queue     /* command_queue */,
 +-                   cl_mem               /* image */,
 +-                   cl_bool              /* blocking_read */, 
 +-                   const size_t *       /* origin[3] */,
 +-                   const size_t *       /* region[3] */,
 +-                   size_t               /* row_pitch */,
 +-                   size_t               /* slice_pitch */, 
 +-                   void *               /* ptr */,
 +-                   cl_uint              /* num_events_in_wait_list */,
 +-                   const cl_event *     /* event_wait_list */,
 +-                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteImage(cl_command_queue    /* command_queue */,
 +-                    cl_mem              /* image */,
 +-                    cl_bool             /* blocking_write */, 
 +-                    const size_t *      /* origin[3] */,
 +-                    const size_t *      /* region[3] */,
 +-                    size_t              /* input_row_pitch */,
 +-                    size_t              /* input_slice_pitch */, 
 +-                    const void *        /* ptr */,
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clEnqueueFillImage(cl_command_queue   /* command_queue */,
- -                   cl_mem             /* image */, 
- -                   const void *       /* fill_color */, 
- -                   const size_t *     /* origin[3] */, 
- -                   const size_t *     /* region[3] */, 
- -                   cl_uint            /* num_events_in_wait_list */, 
- -                   const cl_event *   /* event_wait_list */, 
- -                   cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
- -                            
- -extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyImage(cl_command_queue     /* command_queue */,
 +-                   cl_mem               /* src_image */,
 +-                   cl_mem               /* dst_image */, 
 +-                   const size_t *       /* src_origin[3] */,
 +-                   const size_t *       /* dst_origin[3] */,
 +-                   const size_t *       /* region[3] */, 
 +-                   cl_uint              /* num_events_in_wait_list */,
 +-                   const cl_event *     /* event_wait_list */,
 +-                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
 +-                           cl_mem           /* src_image */,
 +-                           cl_mem           /* dst_buffer */, 
 +-                           const size_t *   /* src_origin[3] */,
 +-                           const size_t *   /* region[3] */, 
 +-                           size_t           /* dst_offset */,
 +-                           cl_uint          /* num_events_in_wait_list */,
 +-                           const cl_event * /* event_wait_list */,
 +-                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
 +-                           cl_mem           /* src_buffer */,
 +-                           cl_mem           /* dst_image */, 
 +-                           size_t           /* src_offset */,
 +-                           const size_t *   /* dst_origin[3] */,
 +-                           const size_t *   /* region[3] */, 
 +-                           cl_uint          /* num_events_in_wait_list */,
 +-                           const cl_event * /* event_wait_list */,
 +-                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY void * CL_API_CALL
 +-clEnqueueMapBuffer(cl_command_queue /* command_queue */,
 +-                   cl_mem           /* buffer */,
 +-                   cl_bool          /* blocking_map */, 
 +-                   cl_map_flags     /* map_flags */,
 +-                   size_t           /* offset */,
- -                   size_t           /* size */,
++-                   size_t           /* cb */,
 +-                   cl_uint          /* num_events_in_wait_list */,
 +-                   const cl_event * /* event_wait_list */,
 +-                   cl_event *       /* event */,
 +-                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY void * CL_API_CALL
 +-clEnqueueMapImage(cl_command_queue  /* command_queue */,
 +-                  cl_mem            /* image */, 
 +-                  cl_bool           /* blocking_map */, 
 +-                  cl_map_flags      /* map_flags */, 
 +-                  const size_t *    /* origin[3] */,
 +-                  const size_t *    /* region[3] */,
 +-                  size_t *          /* image_row_pitch */,
 +-                  size_t *          /* image_slice_pitch */,
 +-                  cl_uint           /* num_events_in_wait_list */,
 +-                  const cl_event *  /* event_wait_list */,
 +-                  cl_event *        /* event */,
 +-                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
 +-                        cl_mem           /* memobj */,
 +-                        void *           /* mapped_ptr */,
 +-                        cl_uint          /* num_events_in_wait_list */,
 +-                        const cl_event *  /* event_wait_list */,
 +-                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clEnqueueMigrateMemObjects(cl_command_queue       /* command_queue */,
- -                           cl_uint                /* num_mem_objects */,
- -                           const cl_mem *         /* mem_objects */,
- -                           cl_mem_migration_flags /* flags */,
- -                           cl_uint                /* num_events_in_wait_list */,
- -                           const cl_event *       /* event_wait_list */,
- -                           cl_event *             /* event */) CL_API_SUFFIX__VERSION_1_2;
- -
- -extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
 +-                       cl_kernel        /* kernel */,
 +-                       cl_uint          /* work_dim */,
 +-                       const size_t *   /* global_work_offset */,
 +-                       const size_t *   /* global_work_size */,
 +-                       const size_t *   /* local_work_size */,
 +-                       cl_uint          /* num_events_in_wait_list */,
 +-                       const cl_event * /* event_wait_list */,
 +-                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueTask(cl_command_queue  /* command_queue */,
 +-              cl_kernel         /* kernel */,
 +-              cl_uint           /* num_events_in_wait_list */,
 +-              const cl_event *  /* event_wait_list */,
 +-              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
- -					  void (CL_CALLBACK * /*user_func*/)(void *), 
++-					  void (CL_CALLBACK *user_func)(void *), 
 +-                      void *            /* args */,
 +-                      size_t            /* cb_args */, 
 +-                      cl_uint           /* num_mem_objects */,
 +-                      const cl_mem *    /* mem_list */,
 +-                      const void **     /* args_mem_loc */,
 +-                      cl_uint           /* num_events_in_wait_list */,
 +-                      const cl_event *  /* event_wait_list */,
 +-                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
- -                            cl_uint           /* num_events_in_wait_list */,
- -                            const cl_event *  /* event_wait_list */,
- -                            cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
++-clEnqueueMarker(cl_command_queue    /* command_queue */,
++-                cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
- -clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
- -                             cl_uint           /* num_events_in_wait_list */,
- -                             const cl_event *  /* event_wait_list */,
- -                             cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
++-clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
++-                       cl_uint          /* num_events */,
++-                       const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
 +-
++-extern CL_API_ENTRY cl_int CL_API_CALL
++-clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Extension function access
 +- *
 +- * Returns the extension function address for the given function name,
 +- * or NULL if a valid function can not be found.  The client must
 +- * check to make sure the address is not NULL, before using or 
 +- * calling the returned function address.
 +- */
- -extern CL_API_ENTRY void * CL_API_CALL 
- -clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
- -                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
- -    
- -
- -// Deprecated OpenCL 1.1 APIs
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
- -clCreateImage2D(cl_context              /* context */,
- -                cl_mem_flags            /* flags */,
- -                const cl_image_format * /* image_format */,
- -                size_t                  /* image_width */,
- -                size_t                  /* image_height */,
- -                size_t                  /* image_row_pitch */, 
- -                void *                  /* host_ptr */,
- -                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
- -clCreateImage3D(cl_context              /* context */,
- -                cl_mem_flags            /* flags */,
- -                const cl_image_format * /* image_format */,
- -                size_t                  /* image_width */, 
- -                size_t                  /* image_height */,
- -                size_t                  /* image_depth */, 
- -                size_t                  /* image_row_pitch */, 
- -                size_t                  /* image_slice_pitch */, 
- -                void *                  /* host_ptr */,
- -                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
- -clEnqueueMarker(cl_command_queue    /* command_queue */,
- -                cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
- -clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
- -                        cl_uint          /* num_events */,
- -                        const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
- -clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
- -clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
- -clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
++-extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __OPENCL_CL_H */
 +-
 ++#include_next <CL/cl.h>
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl_platform.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_platform.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_platform.h	2013-05-21 10:41:03.327941820 +0200
- @@ -1,1254 +1 @@
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_platform.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl_platform.h	2013-06-19 21:04:33.478666953 +0200
++@@ -1,1198 +1 @@
 +-/**********************************************************************************
- - * Copyright (c) 2008-2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
 +-/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
 +-
 +-#ifndef __CL_PLATFORM_H
 +-#define __CL_PLATFORM_H
 +-
 +-#ifdef __APPLE__
 +-    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
 +-    #include <AvailabilityMacros.h>
 +-#endif
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#if defined(_WIN32)
 +-    #define CL_API_ENTRY
 +-    #define CL_API_CALL     __stdcall
 +-    #define CL_CALLBACK     __stdcall
 +-#else
 +-    #define CL_API_ENTRY
 +-    #define CL_API_CALL
 +-    #define CL_CALLBACK
 +-#endif
 +-
 +-#ifdef __APPLE__
- -    #define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))
- -    #define CL_API_SUFFIX__VERSION_1_0                  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
- -    #define CL_EXT_SUFFIX__VERSION_1_0                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
- -    #define CL_API_SUFFIX__VERSION_1_1                  AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -    #define GCL_API_SUFFIX__VERSION_1_1                 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -    #define CL_EXT_SUFFIX__VERSION_1_1                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED       CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
- -    
- -    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
- -        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
- -        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
- -        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
- -        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
- -        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
- -    #else
- -        #warning  This path should never happen outside of internal operating system development.  AvailabilityMacros do not function correctly here!
- -        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
- -    #endif
++-    #define CL_EXTENSION_WEAK_LINK                  __attribute__((weak_import))       
++-    #define CL_API_SUFFIX__VERSION_1_0              AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
++-    #define CL_EXT_SUFFIX__VERSION_1_0              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
++-    #define CL_API_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
++-    #define CL_EXT_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
++-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
 +-#else
- -    #define CL_EXTENSION_WEAK_LINK  
++-    #define CL_EXTENSION_WEAK_LINK                         
 +-    #define CL_API_SUFFIX__VERSION_1_0
 +-    #define CL_EXT_SUFFIX__VERSION_1_0
 +-    #define CL_API_SUFFIX__VERSION_1_1
 +-    #define CL_EXT_SUFFIX__VERSION_1_1
- -    #define CL_API_SUFFIX__VERSION_1_2
- -    #define CL_EXT_SUFFIX__VERSION_1_2
- -    
- -    #ifdef __GNUC__
- -        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
- -            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
- -            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
- -        #else
- -            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
- -            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
- -        #endif
- -    
- -        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
- -            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED    
- -            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
- -        #else
- -            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
- -            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
- -        #endif
- -    #elif _WIN32
- -        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
- -            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED    
- -            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
- -        #else
- -            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED 
- -            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)     
- -        #endif
- -    
- -        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
- -            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
- -            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
- -        #else
- -            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED 
- -            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)     
- -        #endif
- -    #else
- -        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
- -        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
- -    
- -        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
- -        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
- -    #endif
++-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
 +-#endif
 +-
 +-#if (defined (_WIN32) && defined(_MSC_VER))
 +-
 +-/* scalar types  */
 +-typedef signed   __int8         cl_char;
 +-typedef unsigned __int8         cl_uchar;
 +-typedef signed   __int16        cl_short;
 +-typedef unsigned __int16        cl_ushort;
 +-typedef signed   __int32        cl_int;
 +-typedef unsigned __int32        cl_uint;
 +-typedef signed   __int64        cl_long;
 +-typedef unsigned __int64        cl_ulong;
 +-
 +-typedef unsigned __int16        cl_half;
 +-typedef float                   cl_float;
 +-typedef double                  cl_double;
 +-
 +-/* Macro names and corresponding values defined by OpenCL */
 +-#define CL_CHAR_BIT         8
 +-#define CL_SCHAR_MAX        127
 +-#define CL_SCHAR_MIN        (-127-1)
 +-#define CL_CHAR_MAX         CL_SCHAR_MAX
 +-#define CL_CHAR_MIN         CL_SCHAR_MIN
 +-#define CL_UCHAR_MAX        255
 +-#define CL_SHRT_MAX         32767
 +-#define CL_SHRT_MIN         (-32767-1)
 +-#define CL_USHRT_MAX        65535
 +-#define CL_INT_MAX          2147483647
 +-#define CL_INT_MIN          (-2147483647-1)
 +-#define CL_UINT_MAX         0xffffffffU
 +-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
 +-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
 +-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
 +-
 +-#define CL_FLT_DIG          6
 +-#define CL_FLT_MANT_DIG     24
 +-#define CL_FLT_MAX_10_EXP   +38
 +-#define CL_FLT_MAX_EXP      +128
 +-#define CL_FLT_MIN_10_EXP   -37
 +-#define CL_FLT_MIN_EXP      -125
 +-#define CL_FLT_RADIX        2
 +-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
 +-#define CL_FLT_MIN          1.175494350822287507969e-38f
 +-#define CL_FLT_EPSILON      0x1.0p-23f
 +-
 +-#define CL_DBL_DIG          15
 +-#define CL_DBL_MANT_DIG     53
 +-#define CL_DBL_MAX_10_EXP   +308
 +-#define CL_DBL_MAX_EXP      +1024
 +-#define CL_DBL_MIN_10_EXP   -307
 +-#define CL_DBL_MIN_EXP      -1021
 +-#define CL_DBL_RADIX        2
 +-#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
 +-#define CL_DBL_MIN          2.225073858507201383090e-308
 +-#define CL_DBL_EPSILON      2.220446049250313080847e-16
 +-
 +-#define  CL_M_E             2.718281828459045090796
 +-#define  CL_M_LOG2E         1.442695040888963387005
 +-#define  CL_M_LOG10E        0.434294481903251816668
 +-#define  CL_M_LN2           0.693147180559945286227
 +-#define  CL_M_LN10          2.302585092994045901094
 +-#define  CL_M_PI            3.141592653589793115998
 +-#define  CL_M_PI_2          1.570796326794896557999
 +-#define  CL_M_PI_4          0.785398163397448278999
 +-#define  CL_M_1_PI          0.318309886183790691216
 +-#define  CL_M_2_PI          0.636619772367581382433
 +-#define  CL_M_2_SQRTPI      1.128379167095512558561
 +-#define  CL_M_SQRT2         1.414213562373095145475
 +-#define  CL_M_SQRT1_2       0.707106781186547572737
 +-
 +-#define  CL_M_E_F           2.71828174591064f
 +-#define  CL_M_LOG2E_F       1.44269502162933f
 +-#define  CL_M_LOG10E_F      0.43429449200630f
 +-#define  CL_M_LN2_F         0.69314718246460f
 +-#define  CL_M_LN10_F        2.30258512496948f
 +-#define  CL_M_PI_F          3.14159274101257f
 +-#define  CL_M_PI_2_F        1.57079637050629f
 +-#define  CL_M_PI_4_F        0.78539818525314f
 +-#define  CL_M_1_PI_F        0.31830987334251f
 +-#define  CL_M_2_PI_F        0.63661974668503f
 +-#define  CL_M_2_SQRTPI_F    1.12837922573090f
 +-#define  CL_M_SQRT2_F       1.41421353816986f
 +-#define  CL_M_SQRT1_2_F     0.70710676908493f
 +-
 +-#define CL_NAN              (CL_INFINITY - CL_INFINITY)
 +-#define CL_HUGE_VALF        ((cl_float) 1e50)
 +-#define CL_HUGE_VAL         ((cl_double) 1e500)
 +-#define CL_MAXFLOAT         CL_FLT_MAX
 +-#define CL_INFINITY         CL_HUGE_VALF
 +-
 +-#else
 +-
 +-#include <stdint.h>
 +-
 +-/* scalar types  */
 +-typedef int8_t          cl_char;
 +-typedef uint8_t         cl_uchar;
 +-typedef int16_t         cl_short    __attribute__((aligned(2)));
 +-typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
 +-typedef int32_t         cl_int      __attribute__((aligned(4)));
 +-typedef uint32_t        cl_uint     __attribute__((aligned(4)));
 +-typedef int64_t         cl_long     __attribute__((aligned(8)));
 +-typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
 +-
 +-typedef uint16_t        cl_half     __attribute__((aligned(2)));
 +-typedef float           cl_float    __attribute__((aligned(4)));
 +-typedef double          cl_double   __attribute__((aligned(8)));
 +-
 +-/* Macro names and corresponding values defined by OpenCL */
 +-#define CL_CHAR_BIT         8
 +-#define CL_SCHAR_MAX        127
 +-#define CL_SCHAR_MIN        (-127-1)
 +-#define CL_CHAR_MAX         CL_SCHAR_MAX
 +-#define CL_CHAR_MIN         CL_SCHAR_MIN
 +-#define CL_UCHAR_MAX        255
 +-#define CL_SHRT_MAX         32767
 +-#define CL_SHRT_MIN         (-32767-1)
 +-#define CL_USHRT_MAX        65535
 +-#define CL_INT_MAX          2147483647
 +-#define CL_INT_MIN          (-2147483647-1)
 +-#define CL_UINT_MAX         0xffffffffU
 +-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
 +-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
 +-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
 +-
 +-#define CL_FLT_DIG          6
 +-#define CL_FLT_MANT_DIG     24
 +-#define CL_FLT_MAX_10_EXP   +38
 +-#define CL_FLT_MAX_EXP      +128
 +-#define CL_FLT_MIN_10_EXP   -37
 +-#define CL_FLT_MIN_EXP      -125
 +-#define CL_FLT_RADIX        2
 +-#define CL_FLT_MAX          0x1.fffffep127f
 +-#define CL_FLT_MIN          0x1.0p-126f
 +-#define CL_FLT_EPSILON      0x1.0p-23f
 +-
 +-#define CL_DBL_DIG          15
 +-#define CL_DBL_MANT_DIG     53
 +-#define CL_DBL_MAX_10_EXP   +308
 +-#define CL_DBL_MAX_EXP      +1024
 +-#define CL_DBL_MIN_10_EXP   -307
 +-#define CL_DBL_MIN_EXP      -1021
 +-#define CL_DBL_RADIX        2
 +-#define CL_DBL_MAX          0x1.fffffffffffffp1023
 +-#define CL_DBL_MIN          0x1.0p-1022
 +-#define CL_DBL_EPSILON      0x1.0p-52
 +-
 +-#define  CL_M_E             2.718281828459045090796
 +-#define  CL_M_LOG2E         1.442695040888963387005
 +-#define  CL_M_LOG10E        0.434294481903251816668
 +-#define  CL_M_LN2           0.693147180559945286227
 +-#define  CL_M_LN10          2.302585092994045901094
 +-#define  CL_M_PI            3.141592653589793115998
 +-#define  CL_M_PI_2          1.570796326794896557999
 +-#define  CL_M_PI_4          0.785398163397448278999
 +-#define  CL_M_1_PI          0.318309886183790691216
 +-#define  CL_M_2_PI          0.636619772367581382433
 +-#define  CL_M_2_SQRTPI      1.128379167095512558561
 +-#define  CL_M_SQRT2         1.414213562373095145475
 +-#define  CL_M_SQRT1_2       0.707106781186547572737
 +-
 +-#define  CL_M_E_F           2.71828174591064f
 +-#define  CL_M_LOG2E_F       1.44269502162933f
 +-#define  CL_M_LOG10E_F      0.43429449200630f
 +-#define  CL_M_LN2_F         0.69314718246460f
 +-#define  CL_M_LN10_F        2.30258512496948f
 +-#define  CL_M_PI_F          3.14159274101257f
 +-#define  CL_M_PI_2_F        1.57079637050629f
 +-#define  CL_M_PI_4_F        0.78539818525314f
 +-#define  CL_M_1_PI_F        0.31830987334251f
 +-#define  CL_M_2_PI_F        0.63661974668503f
 +-#define  CL_M_2_SQRTPI_F    1.12837922573090f
 +-#define  CL_M_SQRT2_F       1.41421353816986f
 +-#define  CL_M_SQRT1_2_F     0.70710676908493f
 +-
 +-#if defined( __GNUC__ )
 +-   #define CL_HUGE_VALF     __builtin_huge_valf()
 +-   #define CL_HUGE_VAL      __builtin_huge_val()
 +-   #define CL_NAN           __builtin_nanf( "" )
 +-#else
 +-   #define CL_HUGE_VALF     ((cl_float) 1e50)
 +-   #define CL_HUGE_VAL      ((cl_double) 1e500)
 +-   float nanf( const char * );
 +-   #define CL_NAN           nanf( "" )  
 +-#endif
 +-#define CL_MAXFLOAT         CL_FLT_MAX
 +-#define CL_INFINITY         CL_HUGE_VALF
 +-
 +-#endif
 +-
 +-#include <stddef.h>
 +-
- -/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
++-/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
 +-typedef unsigned int cl_GLuint;
 +-typedef int          cl_GLint;
 +-typedef unsigned int cl_GLenum;
 +-
 +-/*
 +- * Vector types 
 +- *
 +- *  Note:   OpenCL requires that all types be naturally aligned. 
 +- *          This means that vector types must be naturally aligned.
 +- *          For example, a vector of four floats must be aligned to
 +- *          a 16 byte boundary (calculated as 4 * the natural 4-byte 
 +- *          alignment of the float).  The alignment qualifiers here
 +- *          will only function properly if your compiler supports them
 +- *          and if you don't actively work to defeat them.  For example,
 +- *          in order for a cl_float4 to be 16 byte aligned in a struct,
 +- *          the start of the struct must itself be 16-byte aligned. 
 +- *
 +- *          Maintaining proper alignment is the user's responsibility.
 +- */
 +-
 +-/* Define basic vector types */
 +-#if defined( __VEC__ )
 +-   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
 +-   typedef vector unsigned char     __cl_uchar16;
 +-   typedef vector signed char       __cl_char16;
 +-   typedef vector unsigned short    __cl_ushort8;
 +-   typedef vector signed short      __cl_short8;
 +-   typedef vector unsigned int      __cl_uint4;
 +-   typedef vector signed int        __cl_int4;
 +-   typedef vector float             __cl_float4;
 +-   #define  __CL_UCHAR16__  1
 +-   #define  __CL_CHAR16__   1
 +-   #define  __CL_USHORT8__  1
 +-   #define  __CL_SHORT8__   1
 +-   #define  __CL_UINT4__    1
 +-   #define  __CL_INT4__     1
 +-   #define  __CL_FLOAT4__   1
 +-#endif
 +-
 +-#if defined( __SSE__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <xmmintrin.h>
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef float __cl_float4   __attribute__((vector_size(16)));
 +-    #else
 +-        typedef __m128 __cl_float4;
 +-    #endif
 +-    #define __CL_FLOAT4__   1
 +-#endif
 +-
 +-#if defined( __SSE2__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <emmintrin.h>
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
 +-        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
 +-        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
 +-        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
 +-        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
 +-        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
 +-        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
 +-        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
 +-        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
 +-    #else
 +-        typedef __m128i __cl_uchar16;
 +-        typedef __m128i __cl_char16;
 +-        typedef __m128i __cl_ushort8;
 +-        typedef __m128i __cl_short8;
 +-        typedef __m128i __cl_uint4;
 +-        typedef __m128i __cl_int4;
 +-        typedef __m128i __cl_ulong2;
 +-        typedef __m128i __cl_long2;
 +-        typedef __m128d __cl_double2;
 +-    #endif
 +-    #define __CL_UCHAR16__  1
 +-    #define __CL_CHAR16__   1
 +-    #define __CL_USHORT8__  1
 +-    #define __CL_SHORT8__   1
 +-    #define __CL_INT4__     1
 +-    #define __CL_UINT4__    1
 +-    #define __CL_ULONG2__   1
 +-    #define __CL_LONG2__    1
 +-    #define __CL_DOUBLE2__  1
 +-#endif
 +-
 +-#if defined( __MMX__ )
 +-    #include <mmintrin.h>
 +-    #if defined( __GNUC__ )
 +-        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
 +-        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
 +-        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
 +-        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
 +-        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
 +-        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
 +-        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
 +-        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
 +-        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
 +-    #else
 +-        typedef __m64       __cl_uchar8;
 +-        typedef __m64       __cl_char8;
 +-        typedef __m64       __cl_ushort4;
 +-        typedef __m64       __cl_short4;
 +-        typedef __m64       __cl_uint2;
 +-        typedef __m64       __cl_int2;
 +-        typedef __m64       __cl_ulong1;
 +-        typedef __m64       __cl_long1;
 +-        typedef __m64       __cl_float2;
 +-    #endif
 +-    #define __CL_UCHAR8__   1
 +-    #define __CL_CHAR8__    1
 +-    #define __CL_USHORT4__  1
 +-    #define __CL_SHORT4__   1
 +-    #define __CL_INT2__     1
 +-    #define __CL_UINT2__    1
 +-    #define __CL_ULONG1__   1
 +-    #define __CL_LONG1__    1
 +-    #define __CL_FLOAT2__   1
 +-#endif
 +-
 +-#if defined( __AVX__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <immintrin.h> 
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
 +-        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
 +-    #else
 +-        typedef __m256      __cl_float8;
 +-        typedef __m256d     __cl_double4;
 +-    #endif
 +-    #define __CL_FLOAT8__   1
 +-    #define __CL_DOUBLE4__  1
 +-#endif
 +-
 +-/* Define alignment keys */
 +-#if defined( __GNUC__ )
 +-    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
 +-#elif defined( _WIN32) && (_MSC_VER)
 +-    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
 +-    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
 +-    /* #include <crtdefs.h>                                                                                             */
 +-    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
 +-    #define CL_ALIGNED(_x)
 +-#else
 +-   #warning  Need to implement some method to align data here
 +-   #define  CL_ALIGNED(_x)
 +-#endif
 +-
 +-/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-    /* .xyzw and .s0123...{f|F} are supported */
 +-    #define CL_HAS_NAMED_VECTOR_FIELDS 1
 +-    /* .hi and .lo are supported */
 +-    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
 +-#endif
 +-
 +-/* Define cl_vector types */
 +-
 +-/* ---- cl_charn ---- */
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(2) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y; };
 +-   __extension__ struct{ cl_char  s0, s1; };
 +-   __extension__ struct{ cl_char  lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2;
 +-#endif
 +-}cl_char2;
 +-
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(4) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_char2 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[2];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4;
 +-#endif
 +-}cl_char4;
 +-
 +-/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
 +-typedef  cl_char4  cl_char3;
 +-
 +-typedef union
 +-{
 +-    cl_char   CL_ALIGNED(8) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_char4 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[4];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4[2];
 +-#endif
 +-#if defined( __CL_CHAR8__ )
 +-    __cl_char8     v8;
 +-#endif
 +-}cl_char8;
 +-
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(16) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_char8 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[8];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4[4];
 +-#endif
 +-#if defined( __CL_CHAR8__ )
 +-    __cl_char8     v8[2];
 +-#endif
 +-#if defined( __CL_CHAR16__ )
 +-    __cl_char16    v16;
 +-#endif
 +-}cl_char16;
 +-
 +-
 +-/* ---- cl_ucharn ---- */
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(2) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y; };
 +-   __extension__ struct{ cl_uchar  s0, s1; };
 +-   __extension__ struct{ cl_uchar  lo, hi; };
 +-#endif
 +-#if defined( __cl_uchar2__) 
 +-    __cl_uchar2     v2;
 +-#endif
 +-}cl_uchar2;
 +-
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(4) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_uchar2 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[2];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4;
 +-#endif
 +-}cl_uchar4;
 +-
 +-/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
 +-typedef  cl_uchar4  cl_uchar3;
 +-
 +-typedef union
 +-{
 +-    cl_uchar   CL_ALIGNED(8) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_uchar4 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[4];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4[2];
 +-#endif
 +-#if defined( __CL_UCHAR8__ )
 +-    __cl_uchar8     v8;
 +-#endif
 +-}cl_uchar8;
 +-
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(16) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_uchar8 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[8];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4[4];
 +-#endif
 +-#if defined( __CL_UCHAR8__ )
 +-    __cl_uchar8     v8[2];
 +-#endif
 +-#if defined( __CL_UCHAR16__ )
 +-    __cl_uchar16    v16;
 +-#endif
 +-}cl_uchar16;
 +-
 +-
 +-/* ---- cl_shortn ---- */
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(4) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y; };
 +-   __extension__ struct{ cl_short  s0, s1; };
 +-   __extension__ struct{ cl_short  lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2;
 +-#endif
 +-}cl_short2;
 +-
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(8) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_short2 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[2];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4;
 +-#endif
 +-}cl_short4;
 +-
 +-/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
 +-typedef  cl_short4  cl_short3;
 +-
 +-typedef union
 +-{
 +-    cl_short   CL_ALIGNED(16) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_short4 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[4];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4[2];
 +-#endif
 +-#if defined( __CL_SHORT8__ )
 +-    __cl_short8     v8;
 +-#endif
 +-}cl_short8;
 +-
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(32) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_short8 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[8];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4[4];
 +-#endif
 +-#if defined( __CL_SHORT8__ )
 +-    __cl_short8     v8[2];
 +-#endif
 +-#if defined( __CL_SHORT16__ )
 +-    __cl_short16    v16;
 +-#endif
 +-}cl_short16;
 +-
 +-
 +-/* ---- cl_ushortn ---- */
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(4) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y; };
 +-   __extension__ struct{ cl_ushort  s0, s1; };
 +-   __extension__ struct{ cl_ushort  lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2;
 +-#endif
 +-}cl_ushort2;
 +-
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(8) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_ushort2 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[2];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4;
 +-#endif
 +-}cl_ushort4;
 +-
 +-/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
 +-typedef  cl_ushort4  cl_ushort3;
 +-
 +-typedef union
 +-{
 +-    cl_ushort   CL_ALIGNED(16) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_ushort4 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[4];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4[2];
 +-#endif
 +-#if defined( __CL_USHORT8__ )
 +-    __cl_ushort8     v8;
 +-#endif
 +-}cl_ushort8;
 +-
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(32) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_ushort8 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[8];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4[4];
 +-#endif
 +-#if defined( __CL_USHORT8__ )
 +-    __cl_ushort8     v8[2];
 +-#endif
 +-#if defined( __CL_USHORT16__ )
 +-    __cl_ushort16    v16;
 +-#endif
 +-}cl_ushort16;
 +-
 +-/* ---- cl_intn ---- */
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y; };
 +-   __extension__ struct{ cl_int  s0, s1; };
 +-   __extension__ struct{ cl_int  lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2;
 +-#endif
 +-}cl_int2;
 +-
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_int2 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[2];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4;
 +-#endif
 +-}cl_int4;
 +-
 +-/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
 +-typedef  cl_int4  cl_int3;
 +-
 +-typedef union
 +-{
 +-    cl_int   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_int4 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[4];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4[2];
 +-#endif
 +-#if defined( __CL_INT8__ )
 +-    __cl_int8     v8;
 +-#endif
 +-}cl_int8;
 +-
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_int8 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[8];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4[4];
 +-#endif
 +-#if defined( __CL_INT8__ )
 +-    __cl_int8     v8[2];
 +-#endif
 +-#if defined( __CL_INT16__ )
 +-    __cl_int16    v16;
 +-#endif
 +-}cl_int16;
 +-
 +-
 +-/* ---- cl_uintn ---- */
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y; };
 +-   __extension__ struct{ cl_uint  s0, s1; };
 +-   __extension__ struct{ cl_uint  lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2;
 +-#endif
 +-}cl_uint2;
 +-
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_uint2 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[2];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4;
 +-#endif
 +-}cl_uint4;
 +-
 +-/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
 +-typedef  cl_uint4  cl_uint3;
 +-
 +-typedef union
 +-{
 +-    cl_uint   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_uint4 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[4];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4[2];
 +-#endif
 +-#if defined( __CL_UINT8__ )
 +-    __cl_uint8     v8;
 +-#endif
 +-}cl_uint8;
 +-
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_uint8 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[8];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4[4];
 +-#endif
 +-#if defined( __CL_UINT8__ )
 +-    __cl_uint8     v8[2];
 +-#endif
 +-#if defined( __CL_UINT16__ )
 +-    __cl_uint16    v16;
 +-#endif
 +-}cl_uint16;
 +-
 +-/* ---- cl_longn ---- */
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y; };
 +-   __extension__ struct{ cl_long  s0, s1; };
 +-   __extension__ struct{ cl_long  lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2;
 +-#endif
 +-}cl_long2;
 +-
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_long2 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[2];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4;
 +-#endif
 +-}cl_long4;
 +-
 +-/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
 +-typedef  cl_long4  cl_long3;
 +-
 +-typedef union
 +-{
 +-    cl_long   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_long4 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[4];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4[2];
 +-#endif
 +-#if defined( __CL_LONG8__ )
 +-    __cl_long8     v8;
 +-#endif
 +-}cl_long8;
 +-
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_long8 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[8];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4[4];
 +-#endif
 +-#if defined( __CL_LONG8__ )
 +-    __cl_long8     v8[2];
 +-#endif
 +-#if defined( __CL_LONG16__ )
 +-    __cl_long16    v16;
 +-#endif
 +-}cl_long16;
 +-
 +-
 +-/* ---- cl_ulongn ---- */
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y; };
 +-   __extension__ struct{ cl_ulong  s0, s1; };
 +-   __extension__ struct{ cl_ulong  lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2;
 +-#endif
 +-}cl_ulong2;
 +-
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_ulong2 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[2];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4;
 +-#endif
 +-}cl_ulong4;
 +-
 +-/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
 +-typedef  cl_ulong4  cl_ulong3;
 +-
 +-typedef union
 +-{
 +-    cl_ulong   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_ulong4 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[4];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4[2];
 +-#endif
 +-#if defined( __CL_ULONG8__ )
 +-    __cl_ulong8     v8;
 +-#endif
 +-}cl_ulong8;
 +-
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_ulong8 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[8];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4[4];
 +-#endif
 +-#if defined( __CL_ULONG8__ )
 +-    __cl_ulong8     v8[2];
 +-#endif
 +-#if defined( __CL_ULONG16__ )
 +-    __cl_ulong16    v16;
 +-#endif
 +-}cl_ulong16;
 +-
 +-
 +-/* --- cl_floatn ---- */
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float  x, y; };
 +-   __extension__ struct{ cl_float  s0, s1; };
 +-   __extension__ struct{ cl_float  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2;
 +-#endif
 +-}cl_float2;
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float   x, y, z, w; };
 +-   __extension__ struct{ cl_float   s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_float2  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[2];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4;
 +-#endif
 +-}cl_float4;
 +-
 +-/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
 +-typedef  cl_float4  cl_float3;
 +-
 +-typedef union
 +-{
 +-    cl_float   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float   x, y, z, w; };
 +-   __extension__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_float4  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[4];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4[2];
 +-#endif
 +-#if defined( __CL_FLOAT8__ )
 +-    __cl_float8     v8;
 +-#endif
 +-}cl_float8;
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_float8 lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[8];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4[4];
 +-#endif
 +-#if defined( __CL_FLOAT8__ )
 +-    __cl_float8     v8[2];
 +-#endif
 +-#if defined( __CL_FLOAT16__ )
 +-    __cl_float16    v16;
 +-#endif
 +-}cl_float16;
 +-
 +-/* --- cl_doublen ---- */
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y; };
 +-   __extension__ struct{ cl_double s0, s1; };
 +-   __extension__ struct{ cl_double lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2;
 +-#endif
 +-}cl_double2;
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_double2 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[2];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4;
 +-#endif
 +-}cl_double4;
 +-
 +-/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
 +-typedef  cl_double4  cl_double3;
 +-
 +-typedef union
 +-{
 +-    cl_double   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_double4 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[4];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4[2];
 +-#endif
 +-#if defined( __CL_DOUBLE8__ )
 +-    __cl_double8     v8;
 +-#endif
 +-}cl_double8;
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_double8 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[8];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4[4];
 +-#endif
 +-#if defined( __CL_DOUBLE8__ )
 +-    __cl_double8     v8[2];
 +-#endif
 +-#if defined( __CL_DOUBLE16__ )
 +-    __cl_double16    v16;
 +-#endif
 +-}cl_double16;
 +-
 +-/* Macro to facilitate debugging 
 +- * Usage:
 +- *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
- - *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
++- *   The first line ends with:   CL_PROGRAM_STRING_BEGIN \"
 +- *   Each line thereafter of OpenCL C source must end with: \n\
 +- *   The last line ends in ";
 +- *
 +- *   Example:
 +- *
- - *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
++- *   const char *my_program = CL_PROGRAM_STRING_BEGIN "\
 +- *   kernel void foo( int a, float * b )             \n\
 +- *   {                                               \n\
 +- *      // my comment                                \n\
 +- *      *b[ get_global_id(0)] = a;                   \n\
 +- *   }                                               \n\
 +- *   ";
 +- *
 +- * This should correctly set up the line, (column) and file information for your source 
 +- * string so you can do source level debugging.
 +- */
 +-#define  __CL_STRINGIFY( _x )               # _x
 +-#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
 +-#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" 
 +-  
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __CL_PLATFORM_H  */
 ++#include_next <CL/cl_platform.h>
- Index: beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl_gl.h
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/include/CL/cl_gl.h	2013-05-21 10:38:37.207948335 +0200
- +++ beignet-0.1+git20130521+a7ea35c/include/CL/cl_gl.h	2013-05-21 10:41:03.327941820 +0200
- @@ -1,161 +1 @@
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_gl.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl_gl.h	2013-06-19 21:04:33.478666953 +0200
++@@ -1,155 +1 @@
 +-/**********************************************************************************
- - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
++-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
++-
++-/*
++- * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
++- * OpenGL dependencies. The application is responsible for #including
++- * OpenGL or OpenGL ES headers before #including cl_gl.h.
++- */
++-
 +-#ifndef __OPENCL_CL_GL_H
 +-#define __OPENCL_CL_GL_H
 +-
 +-#ifdef __APPLE__
 +-#include <OpenCL/cl.h>
++-#include <OpenGL/CGLDevice.h>
 +-#else
 +-#include <CL/cl.h>
 +-#endif	
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-typedef cl_uint     cl_gl_object_type;
 +-typedef cl_uint     cl_gl_texture_info;
 +-typedef cl_uint     cl_gl_platform_info;
 +-typedef struct __GLsync *cl_GLsync;
 +-
- -/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
- -#define CL_GL_OBJECT_BUFFER                     0x2000
- -#define CL_GL_OBJECT_TEXTURE2D                  0x2001
- -#define CL_GL_OBJECT_TEXTURE3D                  0x2002
- -#define CL_GL_OBJECT_RENDERBUFFER               0x2003
- -#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
- -#define CL_GL_OBJECT_TEXTURE1D                  0x200F
- -#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
- -#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
- -
- -/* cl_gl_texture_info           */
- -#define CL_GL_TEXTURE_TARGET                    0x2004
- -#define CL_GL_MIPMAP_LEVEL                      0x2005
++-/* cl_gl_object_type */
++-#define CL_GL_OBJECT_BUFFER             0x2000
++-#define CL_GL_OBJECT_TEXTURE2D          0x2001
++-#define CL_GL_OBJECT_TEXTURE3D          0x2002
++-#define CL_GL_OBJECT_RENDERBUFFER       0x2003
 +-
++-/* cl_gl_texture_info */
++-#define CL_GL_TEXTURE_TARGET            0x2004
++-#define CL_GL_MIPMAP_LEVEL              0x2005
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateFromGLBuffer(cl_context     /* context */,
 +-                     cl_mem_flags   /* flags */,
 +-                     cl_GLuint      /* bufobj */,
 +-                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
- -clCreateFromGLTexture(cl_context      /* context */,
- -                      cl_mem_flags    /* flags */,
- -                      cl_GLenum       /* target */,
- -                      cl_GLint        /* miplevel */,
- -                      cl_GLuint       /* texture */,
- -                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
- -    
++-clCreateFromGLTexture2D(cl_context      /* context */,
++-                        cl_mem_flags    /* flags */,
++-                        cl_GLenum       /* target */,
++-                        cl_GLint        /* miplevel */,
++-                        cl_GLuint       /* texture */,
++-                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
++-
++-extern CL_API_ENTRY cl_mem CL_API_CALL
++-clCreateFromGLTexture3D(cl_context      /* context */,
++-                        cl_mem_flags    /* flags */,
++-                        cl_GLenum       /* target */,
++-                        cl_GLint        /* miplevel */,
++-                        cl_GLuint       /* texture */,
++-                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
++-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateFromGLRenderbuffer(cl_context   /* context */,
 +-                           cl_mem_flags /* flags */,
 +-                           cl_GLuint    /* renderbuffer */,
 +-                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLObjectInfo(cl_mem                /* memobj */,
 +-                  cl_gl_object_type *   /* gl_object_type */,
- -                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
++-                  cl_GLuint *              /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
 +-                  
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLTextureInfo(cl_mem               /* memobj */,
 +-                   cl_gl_texture_info   /* param_name */,
 +-                   size_t               /* param_value_size */,
 +-                   void *               /* param_value */,
 +-                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
 +-                          cl_uint               /* num_objects */,
 +-                          const cl_mem *        /* mem_objects */,
 +-                          cl_uint               /* num_events_in_wait_list */,
 +-                          const cl_event *      /* event_wait_list */,
 +-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
 +-                          cl_uint               /* num_objects */,
 +-                          const cl_mem *        /* mem_objects */,
 +-                          cl_uint               /* num_events_in_wait_list */,
 +-                          const cl_event *      /* event_wait_list */,
 +-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
- -
- -// Deprecated OpenCL 1.1 APIs
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
- -clCreateFromGLTexture2D(cl_context      /* context */,
- -                        cl_mem_flags    /* flags */,
- -                        cl_GLenum       /* target */,
- -                        cl_GLint        /* miplevel */,
- -                        cl_GLuint       /* texture */,
- -                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
- -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
- -clCreateFromGLTexture3D(cl_context      /* context */,
- -                        cl_mem_flags    /* flags */,
- -                        cl_GLenum       /* target */,
- -                        cl_GLint        /* miplevel */,
- -                        cl_GLuint       /* texture */,
- -                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
- -    
 +-/* cl_khr_gl_sharing extension  */
- -    
++-
 +-#define cl_khr_gl_sharing 1
- -    
++-
 +-typedef cl_uint     cl_gl_context_info;
- -    
++-
 +-/* Additional Error Codes  */
 +-#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
- -    
++-
 +-/* cl_gl_context_info  */
 +-#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 +-#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
- -    
++-
 +-/* Additional cl_context_properties  */
 +-#define CL_GL_CONTEXT_KHR                       0x2008
 +-#define CL_EGL_DISPLAY_KHR                      0x2009
 +-#define CL_GLX_DISPLAY_KHR                      0x200A
 +-#define CL_WGL_HDC_KHR                          0x200B
 +-#define CL_CGL_SHAREGROUP_KHR                   0x200C
- -    
++-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
 +-                      cl_gl_context_info            /* param_name */,
 +-                      size_t                        /* param_value_size */,
 +-                      void *                        /* param_value */,
 +-                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
- -    
++-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
 +-    const cl_context_properties * properties,
 +-    cl_gl_context_info            param_name,
 +-    size_t                        param_value_size,
 +-    void *                        param_value,
 +-    size_t *                      param_value_size_ret);
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
- -#endif  /* __OPENCL_CL_GL_H */
++-#endif  /* __OPENCL_CL_GL_H  */
 ++#include_next <CL/cl_gl.h>
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl_gl_ext.h
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl_gl_ext.h	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl_gl_ext.h	2013-06-19 21:04:33.478666953 +0200
++@@ -1,69 +1 @@
++-/**********************************************************************************
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
++- *
++- * Permission is hereby granted, free of charge, to any person obtaining a
++- * copy of this software and/or associated documentation files (the
++- * "Materials"), to deal in the Materials without restriction, including
++- * without limitation the rights to use, copy, modify, merge, publish,
++- * distribute, sublicense, and/or sell copies of the Materials, and to
++- * permit persons to whom the Materials are furnished to do so, subject to
++- * the following conditions:
++- *
++- * The above copyright notice and this permission notice shall be included
++- * in all copies or substantial portions of the Materials.
++- *
++- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
++- **********************************************************************************/
++-
++-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
++-
++-/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
++-/* OpenGL dependencies.                                                         */
++-
++-#ifndef __OPENCL_CL_GL_EXT_H
++-#define __OPENCL_CL_GL_EXT_H
++-
++-#ifdef __cplusplus
++-extern "C" {
++-#endif
++-
++-#ifdef __APPLE__
++-    #include <OpenCL/cl_gl.h>
++-#else
++-    #include <CL/cl_gl.h>
++-#endif
++-
++-/*
++- * For each extension, follow this template
++- * /* cl_VEN_extname extension  */
++-/* #define cl_VEN_extname 1
++- * ... define new types, if any
++- * ... define new tokens, if any
++- * ... define new APIs, if any
++- *
++- *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
++- *  This allows us to avoid having to decide whether to include GL headers or GLES here.
++- */
++-
++-/* 
++- *  cl_khr_gl_event  extension
++- *  See section 9.9 in the OpenCL 1.1 spec for more information
++- */
++-#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
++-
++-extern CL_API_ENTRY cl_event CL_API_CALL
++-clCreateEventFromGLsyncKHR(cl_context           /* context */,
++-                           cl_GLsync            /* cl_GLsync */,
++-                           cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
++-
++-#ifdef __cplusplus
++-}
++-#endif
++-
++-#endif	/* __OPENCL_CL_GL_EXT_H  */
+++#include_next <CL/cl_gl_ext.h>
++Index: beignet-0.1+git20130619+42967d2/include/CL/cl.hpp
++===================================================================
++--- beignet-0.1+git20130619+42967d2.orig/include/CL/cl.hpp	2013-06-19 21:04:24.122667370 +0200
+++++ beignet-0.1+git20130619+42967d2/include/CL/cl.hpp	2013-06-19 21:04:33.482666952 +0200
++@@ -1,4011 +1 @@
++-/*******************************************************************************
++- * Copyright (c) 2008-2010 The Khronos Group Inc.
++- *
++- * Permission is hereby granted, free of charge, to any person obtaining a
++- * copy of this software and/or associated documentation files (the
++- * "Materials"), to deal in the Materials without restriction, including
++- * without limitation the rights to use, copy, modify, merge, publish,
++- * distribute, sublicense, and/or sell copies of the Materials, and to
++- * permit persons to whom the Materials are furnished to do so, subject to
++- * the following conditions:
++- *
++- * The above copyright notice and this permission notice shall be included
++- * in all copies or substantial portions of the Materials.
++- *
++- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
++- ******************************************************************************/
++-
++-/*! \file
++- *
++- *   \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)    
++- *   \author Benedict R. Gaster and Laurent Morichetti
++- *   
++- *   Additions and fixes from Brian Cole, March 3rd 2010.
++- *   
++- *   \version 1.1
++- *   \date June 2010
++- *
++- *   Optional extension support
++- *
++- *         cl
++- *         cl_ext_device_fission
++- *				#define USE_CL_DEVICE_FISSION
++- */
++-
++-/*! \mainpage
++- * \section intro Introduction
++- * For many large applications C++ is the language of choice and so it seems
++- * reasonable to define C++ bindings for OpenCL.
++- *
++- *
++- * The interface is contained with a single C++ header file \em cl.hpp and all
++- * definitions are contained within the namespace \em cl. There is no additional
++- * requirement to include \em cl.h and to use either the C++ or original C
++- * bindings it is enough to simply include \em cl.hpp.
++- *
++- * The bindings themselves are lightweight and correspond closely to the
++- * underlying C API. Using the C++ bindings introduces no additional execution
++- * overhead.
++- *
++- * For detail documentation on the bindings see:
++- *
++- * The OpenCL C++ Wrapper API 1.1 (revision 04)
++- *  http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf
++- *
++- * \section example Example
++- *
++- * The following example shows a general use case for the C++
++- * bindings, including support for the optional exception feature and
++- * also the supplied vector and string classes, see following sections for
++- * decriptions of these features.
++- *
++- * \code
++- * #define __CL_ENABLE_EXCEPTIONS
++- * 
++- * #if defined(__APPLE__) || defined(__MACOSX)
++- * #include <OpenCL/cl.hpp>
++- * #else
++- * #include <CL/cl.hpp>
++- * #endif
++- * #include <cstdio>
++- * #include <cstdlib>
++- * #include <iostream>
++- * 
++- *  const char * helloStr  = "__kernel void "
++- *                           "hello(void) "
++- *                           "{ "
++- *                           "  "
++- *                           "} ";
++- * 
++- *  int
++- *  main(void)
++- *  {
++- *     cl_int err = CL_SUCCESS;
++- *     try {
++- *
++- *       std::vector<cl::Platform> platforms;
++- *       cl::Platform::get(&platforms);
++- *       if (platforms.size() == 0) {
++- *           std::cout << "Platform size 0\n";
++- *           return -1;
++- *       }
++- *
++- *       cl_context_properties properties[] = 
++- *          { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
++- *       cl::Context context(CL_DEVICE_TYPE_CPU, properties); 
++- * 
++- *       std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
++- * 
++- *       cl::Program::Sources source(1,
++- *           std::make_pair(helloStr,strlen(helloStr)));
++- *       cl::Program program_ = cl::Program(context, source);
++- *       program_.build(devices);
++- * 
++- *       cl::Kernel kernel(program_, "hello", &err);
++- * 
++- *       cl::Event event;
++- *       cl::CommandQueue queue(context, devices[0], 0, &err);
++- *       queue.enqueueNDRangeKernel(
++- *           kernel, 
++- *           cl::NullRange, 
++- *           cl::NDRange(4,4),
++- *           cl::NullRange,
++- *           NULL,
++- *           &event); 
++- * 
++- *       event.wait();
++- *     }
++- *     catch (cl::Error err) {
++- *        std::cerr 
++- *           << "ERROR: "
++- *           << err.what()
++- *           << "("
++- *           << err.err()
++- *           << ")"
++- *           << std::endl;
++- *     }
++- * 
++- *    return EXIT_SUCCESS;
++- *  }
++- * 
++- * \endcode
++- *
++- */
++-#ifndef CL_HPP_
++-#define CL_HPP_
++-
++-#ifdef _WIN32
++-#include <windows.h>
++-#include <malloc.h>
++-#if defined(USE_DX_INTEROP)
++-#include <CL/cl_d3d10.h>
++-#endif
++-#endif // _WIN32
++-
++-// 
++-#if defined(USE_CL_DEVICE_FISSION)
++-#include <CL/cl_ext.h>
++-#endif
++-
++-#if defined(__APPLE__) || defined(__MACOSX)
++-#include <OpenGL/OpenGL.h>
++-#include <OpenCL/opencl.h>
++-#else
++-#include <GL/gl.h>
++-#include <CL/opencl.h>
++-#endif // !__APPLE__
++-
++-#if !defined(CL_CALLBACK)
++-#define CL_CALLBACK
++-#endif //CL_CALLBACK
++-
++-#include <utility>
++-
++-#if !defined(__NO_STD_VECTOR)
++-#include <vector>
++-#endif
++-
++-#if !defined(__NO_STD_STRING)
++-#include <string>
++-#endif 
++-
++-#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
++-# include <alloca.h>
++-#endif // linux
++-
++-#include <cstring>
++-
++-/*! \namespace cl
++- *
++- * \brief The OpenCL C++ bindings are defined within this namespace.
++- *
++- */
++-namespace cl {
++-
++-#define __INIT_CL_EXT_FCN_PTR(name) \
++-    if(!pfn_##name) { \
++-        pfn_##name = (PFN_##name) \
++-            clGetExtensionFunctionAddress(#name); \
++-        if(!pfn_##name) { \
++-        } \
++-    }
++-
++-class Program;
++-class Device;
++-class Context;
++-class CommandQueue;
++-class Memory;
++-
++-#if defined(__CL_ENABLE_EXCEPTIONS)
++-#include <exception>
++-/*! \class Error
++- * \brief Exception class
++- */
++-class Error : public std::exception
++-{
++-private:
++-    cl_int err_;
++-    const char * errStr_;
++-public:
++-    /*! Create a new CL error exception for a given error code
++-     *  and corresponding message.
++-     */
++-    Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
++-    {}
++-
++-    ~Error() throw() {}
++-
++-    /*! \brief Get error string associated with exception
++-     *
++-     * \return A memory pointer to the error message string.
++-     */
++-    virtual const char * what() const throw ()
++-    {
++-        if (errStr_ == NULL) {
++-            return "empty";
++-        }
++-        else {
++-            return errStr_;
++-        }
++-    }
++-
++-    /*! \brief Get error code associated with exception
++-     *
++-     *  \return The error code.
++-     */
++-    const cl_int err(void) const { return err_; }
++-};
++-
++-#define __ERR_STR(x) #x
++-#else
++-#define __ERR_STR(x) NULL
++-#endif // __CL_ENABLE_EXCEPTIONS
++-
++-//! \cond DOXYGEN_DETAIL
++-#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
++-#define __GET_DEVICE_INFO_ERR               __ERR_STR(clgetDeviceInfo)
++-#define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
++-#define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
++-#define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
++-#define __GET_CONTEXT_INFO_ERR              __ERR_STR(clGetContextInfo)
++-#define __GET_EVENT_INFO_ERR                __ERR_STR(clGetEventInfo)
++-#define __GET_EVENT_PROFILE_INFO_ERR        __ERR_STR(clGetEventProfileInfo)
++-#define __GET_MEM_OBJECT_INFO_ERR           __ERR_STR(clGetMemObjectInfo)
++-#define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
++-#define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
++-#define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
++-#define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
++-#define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
++-#define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
++-#define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)
++-
++-#define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
++-#define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)
++-
++-#define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
++-#define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
++-#define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
++-#define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
++-#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
++-#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
++-#define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
++-#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
++-
++-#define __CREATE_USER_EVENT_ERR             __ERR_STR(clCreateUserEvent)
++-#define __SET_USER_EVENT_STATUS_ERR         __ERR_STR(clSetUserEventStatus)
++-#define __SET_EVENT_CALLBACK_ERR            __ERR_STR(clSetEventCallback)
++-#define __WAIT_FOR_EVENTS_ERR               __ERR_STR(clWaitForEvents)
++-
++-#define __CREATE_KERNEL_ERR                 __ERR_STR(clCreateKernel)
++-#define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
++-#define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
++-#define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
++-#define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
++-#define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)
++-
++-#define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
++-#define __SET_COMMAND_QUEUE_PROPERTY_ERR    __ERR_STR(clSetCommandQueueProperty)
++-#define __ENQUEUE_READ_BUFFER_ERR           __ERR_STR(clEnqueueReadBuffer)
++-#define __ENQUEUE_READ_BUFFER_RECT_ERR      __ERR_STR(clEnqueueReadBufferRect)
++-#define __ENQUEUE_WRITE_BUFFER_ERR          __ERR_STR(clEnqueueWriteBuffer)
++-#define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
++-#define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
++-#define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
++-#define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
++-#define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
++-#define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
++-#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
++-#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
++-#define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
++-#define __ENQUEUE_MAP_IMAGE_ERR             __ERR_STR(clEnqueueMapImage)
++-#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR      __ERR_STR(clEnqueueUnMapMemObject)
++-#define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
++-#define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
++-#define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
++-#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
++-#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
++-#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
++-
++-#define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
++-#define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)
++-
++-#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
++-
++-#define __FLUSH_ERR                         __ERR_STR(clFlush)
++-#define __FINISH_ERR                        __ERR_STR(clFinish)
++-
++-#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevicesEXT)
++-#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
++-//! \endcond
++-
++-/*! \class string
++- * \brief Simple string class, that provides a limited subset of std::string
++- * functionality but avoids many of the issues that come with that class.
++- */
++-class string
++-{
++-private:
++-    ::size_t size_;
++-    char * str_;
++-public:
++-    string(void) : size_(0), str_(NULL)
++-    {
++-    }
++-
++-    string(char * str, ::size_t size) :
++-        size_(size),
++-        str_(NULL)
++-    {
++-        str_ = new char[size_+1];
++-        if (str_ != NULL) {
++-            memcpy(str_, str, size_  * sizeof(char));
++-            str_[size_] = '\0';
++-        }
++-        else {
++-            size_ = 0;
++-        }
++-    }
++-
++-    string(char * str) :
++-        str_(NULL)
++-    {
++-        size_= ::strlen(str);
++-        str_ = new char[size_ + 1];
++-        if (str_ != NULL) {
++-            memcpy(str_, str, (size_ + 1) * sizeof(char));
++-        }
++-        else {
++-            size_ = 0;
++-        }
++-    }
++-
++-    string& operator=(const string& rhs)
++-    {
++-        if (this == &rhs) {
++-            return *this;
++-        }
++-
++-        if (rhs.size_ == 0 || rhs.str_ == NULL) {
++-            size_ = 0;
++-            str_  = NULL;
++-        } 
++-        else {
++-            size_ = rhs.size_;
++-            str_ = new char[size_ + 1];
++-            if (str_ != NULL) {
++-                memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
++-            }
++-            else {
++-                size_ = 0;
++-            }
++-        }
++-
++-        return *this;
++-    }
++-
++-    string(const string& rhs)
++-    {
++-        *this = rhs;
++-    }
++-
++-    ~string()
++-    {
++-        if (str_ != NULL) {
++-            delete[] str_;
++-        }
++-    }
++-
++-    ::size_t size(void) const   { return size_; }
++-    ::size_t length(void) const { return size(); }
++-
++-    const char * c_str(void) const { return (str_) ? str_ : "";}
++-};
++-
++-#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
++-#include <string>
++-typedef std::string STRING_CLASS;
++-#elif !defined(__USE_DEV_STRING) 
++-typedef cl::string STRING_CLASS;
++-#endif
++-
++-#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
++-#include <vector>
++-#define VECTOR_CLASS std::vector
++-#elif !defined(__USE_DEV_VECTOR) 
++-#define VECTOR_CLASS cl::vector 
++-#endif
++-
++-#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
++-#define __MAX_DEFAULT_VECTOR_SIZE 10
++-#endif
++-
++-/*! \class vector
++- * \brief Fixed sized vector implementation that mirroring 
++- * std::vector functionality.
++- */
++-template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
++-class vector
++-{
++-private:
++-    T data_[N];
++-    unsigned int size_;
++-    bool empty_;
++-public:
++-    vector() : 
++-        size_(-1),
++-        empty_(true)
++-    {}
++-
++-    ~vector() {}
++-
++-    unsigned int size(void) const
++-    {
++-        return size_ + 1;
++-    }
++-
++-    void clear()
++-    {
++-        size_ = -1;
++-        empty_ = true;
++-    }
++-
++-    void push_back (const T& x)
++-    { 
++-        if (size() < N) {
++-            size_++;  
++-            data_[size_] = x;
++-            empty_ = false;
++-        }
++-    }
++-
++-    void pop_back(void)
++-    {
++-        if (!empty_) {
++-            data_[size_].~T();
++-            size_--;
++-            if (size_ == -1) {
++-                empty_ = true;
++-            }
++-        }
++-    }
++-  
++-    vector(const vector<T, N>& vec) : 
++-        size_(vec.size_),
++-        empty_(vec.empty_)
++-    {
++-        if (!empty_) {
++-            memcpy(&data_[0], &vec.data_[0], size() * sizeof(T));
++-        }
++-    } 
++-
++-    vector(unsigned int size, const T& val = T()) :
++-        size_(-1),
++-        empty_(true)
++-    {
++-        for (unsigned int i = 0; i < size; i++) {
++-            push_back(val);
++-        }
++-    }
++-
++-    vector<T, N>& operator=(const vector<T, N>& rhs)
++-    {
++-        if (this == &rhs) {
++-            return *this;
++-        }
++-
++-        size_  = rhs.size_;
++-        empty_ = rhs.empty_;
++-
++-        if (!empty_) {	
++-            memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T));
++-        }
++-    
++-        return *this;
++-    }
++-
++-    bool operator==(vector<T,N> &vec)
++-    {
++-        if (empty_ && vec.empty_) {
++-            return true;
++-        }
++-
++-        if (size() != vec.size()) {
++-            return false;
++-        }
++-
++-        return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false;
++-    }
++-  
++-    operator T* ()             { return data_; }
++-    operator const T* () const { return data_; }
++-   
++-    bool empty (void) const
++-    {
++-        return empty_;
++-    }
++-  
++-    unsigned int max_size (void) const
++-    {
++-        return N;
++-    }
++-
++-    unsigned int capacity () const
++-    {
++-        return sizeof(T) * N;
++-    }
++-
++-    T& operator[](int index)
++-    {
++-        return data_[index];
++-    }
++-  
++-    T operator[](int index) const
++-    {
++-        return data_[index];
++-    }
++-  
++-    template<class I>
++-    void assign(I start, I end)
++-    {
++-        clear();   
++-        while(start < end) {
++-            push_back(*start);
++-            start++;
++-        }
++-    }
++-
++-    /*! \class iterator
++-     * \brief Iterator class for vectors
++-     */
++-    class iterator
++-    {
++-    private:
++-        vector<T,N> vec_;
++-        int index_;
++-        bool initialized_;
++-    public:
++-        iterator(void) : 
++-            index_(-1),
++-            initialized_(false)
++-        {
++-            index_ = -1;
++-            initialized_ = false;
++-        }
++-
++-        ~iterator(void) {}
++-
++-        static iterator begin(vector<T,N> &vec)
++-        {
++-            iterator i;
++-
++-            if (!vec.empty()) {
++-                i.index_ = 0;
++-            }
++-
++-            i.vec_ = vec;
++-            i.initialized_ = true;
++-            return i;
++-        }
++-
++-        static iterator end(vector<T,N> &vec)
++-        {
++-            iterator i;
++-
++-            if (!vec.empty()) {
++-                i.index_ = vec.size();
++-            }
++-            i.vec_ = vec;
++-            i.initialized_ = true;
++-            return i;
++-        }
++-    
++-        bool operator==(iterator i)
++-        {
++-            return ((vec_ == i.vec_) && 
++-                    (index_ == i.index_) && 
++-                    (initialized_ == i.initialized_));
++-        }
++-
++-        bool operator!=(iterator i)
++-        {
++-            return (!(*this==i));
++-        }
++-
++-        void operator++()
++-        {
++-            index_++;
++-        }
++-
++-        void operator++(int x)
++-        {
++-            index_ += x;
++-        }
++-
++-        void operator--()
++-        {
++-            index_--;
++-        }
++-
++-        void operator--(int x)
++-        {
++-            index_ -= x;
++-        }
++-
++-        T operator *()
++-        {
++-            return vec_[index_];
++-        }
++-    };
++-
++-    iterator begin(void)
++-    {
++-        return iterator::begin(*this);
++-    }
++-
++-    iterator end(void)
++-    {
++-        return iterator::end(*this);
++-    }
++-
++-    T& front(void)
++-    {
++-        return data_[0];
++-    }
++-
++-    T& back(void)
++-    {
++-        return data_[size_];
++-    }
++-
++-    const T& front(void) const
++-    {
++-        return data_[0];
++-    }
++-
++-    const T& back(void) const
++-    {
++-        return data_[size_];
++-    }
++-};  
++-    
++-/*!
++- * \brief size_t class used to interface between C++ and
++- * OpenCL C calls that require arrays of size_t values, who's
++- * size is known statically.
++- */
++-template <int N>
++-struct size_t : public cl::vector< ::size_t, N> { };
++-
++-namespace detail {
++-
++-// GetInfo help struct
++-template <typename Functor, typename T>
++-struct GetInfoHelper
++-{
++-    static cl_int
++-    get(Functor f, cl_uint name, T* param)
++-    {
++-        return f(name, sizeof(T), param, NULL);
++-    }
++-};
++-
++-// Specialized GetInfoHelper for VECTOR_CLASS params
++-template <typename Func, typename T>
++-struct GetInfoHelper<Func, VECTOR_CLASS<T> >
++-{
++-    static cl_int get(Func f, cl_uint name, VECTOR_CLASS<T>* param)
++-    {
++-        ::size_t required;
++-        cl_int err = f(name, 0, NULL, &required);
++-        if (err != CL_SUCCESS) {
++-            return err;
++-        }
++-
++-        T* value = (T*) alloca(required);
++-        err = f(name, required, value, NULL);
++-        if (err != CL_SUCCESS) {
++-            return err;
++-        }
++-
++-        param->assign(&value[0], &value[required/sizeof(T)]);
++-        return CL_SUCCESS;
++-    }
++-};
++-
++-// Specialized for getInfo<CL_PROGRAM_BINARIES>
++-template <typename Func>
++-struct GetInfoHelper<Func, VECTOR_CLASS<char *> >
++-{
++-    static cl_int
++-    get(Func f, cl_uint name, VECTOR_CLASS<char *>* param)
++-    {
++-      cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
++-      if (err != CL_SUCCESS) {
++-        return err;
++-      }
++-      
++-      return CL_SUCCESS;
++-    }
++-};
++-
++-// Specialized GetInfoHelper for STRING_CLASS params
++-template <typename Func>
++-struct GetInfoHelper<Func, STRING_CLASS>
++-{
++-    static cl_int get(Func f, cl_uint name, STRING_CLASS* param)
++-    {
++-        ::size_t required;
++-        cl_int err = f(name, 0, NULL, &required);
++-        if (err != CL_SUCCESS) {
++-            return err;
++-        }
++-
++-        char* value = (char*) alloca(required);
++-        err = f(name, required, value, NULL);
++-        if (err != CL_SUCCESS) {
++-            return err;
++-        }
++-
++-        *param = value;
++-        return CL_SUCCESS;
++-    }
++-};
++-
++-#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \
++-namespace detail { \
++-template <typename Func> \
++-struct GetInfoHelper<Func, CPP_TYPE> \
++-{ \
++-    static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \
++-    { \
++-      cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \
++-      if (err != CL_SUCCESS) { \
++-        return err; \
++-      } \
++-      \
++-      return ReferenceHandler<CPP_TYPE::cl_type>::retain((*param)()); \
++-    } \
++-}; \
++-} 
++-
++-
++-#define __PARAM_NAME_INFO_1_0(F) \
++-    F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
++-    F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
++-    F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
++-    F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
++-    F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
++-    \
++-    F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
++-    F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \
++-    F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
++-    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
++-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
++-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
++-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
++-    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
++-    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
++-    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
++-    F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
++-    F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
++-    F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
++-    F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
++-    F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
++-    F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
++-    F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
++-    F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
++-    F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
++-    F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \
++-    F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \
++-    F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \
++-    F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \
++-    F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \
++-    F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \
++-    \
++-    F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
++-    F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
++-    F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
++-    \
++-    F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
++-    F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
++-    F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
++-    F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
++-    \
++-    F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
++-    F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
++-    F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
++-    F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
++-    \
++-    F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
++-    F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
++-    F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
++-    F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
++-    F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
++-    F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
++-    F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
++-    \
++-    F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
++-    F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
++-    F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
++-    F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
++-    F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
++-    F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
++-    F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
++-    \
++-    F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
++-    F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
++-    F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
++-    F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
++-    F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
++-    \
++-    F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
++-    F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
++-    F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
++-    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \
++-    F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
++-    F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
++-    F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
++-    \
++-    F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
++-    F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
++-    F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
++-    \
++-    F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
++-    F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
++-    F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
++-    F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
++-    F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
++-    \
++-    F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
++-    F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
++-    F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
++-    \
++-    F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
++-    F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
++-    F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
++-    F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)
++-
++-#if defined(CL_VERSION_1_1)
++-#define __PARAM_NAME_INFO_1_1(F) \
++-    F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
++-    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
++-    F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
++-    F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
++-    F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
++-    \
++-    F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
++-    F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
++-    \
++-    F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
++-    F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
++-    \
++-    F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
++-#endif // CL_VERSION_1_1
++-
++-#if defined(USE_CL_DEVICE_FISSION)
++-#define __PARAM_NAME_DEVICE_FISSION(F) \
++-    F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
++-	F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
++-	F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
++-	F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
++-	F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
++-#endif // USE_CL_DEVICE_FISSION
++-
++-template <typename enum_type, cl_int Name>
++-struct param_traits {};
++-
++-#define __DECLARE_PARAM_TRAITS(token, param_name, T) \
++-struct token;                                        \
++-template<>                                           \
++-struct param_traits<detail:: token,param_name>       \
++-{                                                    \
++-    enum { value = param_name };                     \
++-    typedef T param_type;                            \
++-};
++-
++-__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS);
++-#if defined(CL_VERSION_1_1)
++-__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS);
++-#endif // CL_VERSION_1_1
++-
++-#if defined(USE_CL_DEVICE_FISSION)
++-__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS);
++-#endif // USE_CL_DEVICE_FISSION
++-
++-#undef __DECLARE_PARAM_TRAITS
++-
++-// Convenience functions
++-
++-template <typename Func, typename T>
++-inline cl_int
++-getInfo(Func f, cl_uint name, T* param)
++-{
++-    return GetInfoHelper<Func, T>::get(f, name, param);
++-}
++-
++-template <typename Func, typename Arg0>
++-struct GetInfoFunctor0
++-{
++-    Func f_; const Arg0& arg0_;
++-    cl_int operator ()(
++-        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
++-    { return f_(arg0_, param, size, value, size_ret); }
++-};
++-
++-template <typename Func, typename Arg0, typename Arg1>
++-struct GetInfoFunctor1
++-{
++-    Func f_; const Arg0& arg0_; const Arg1& arg1_;
++-    cl_int operator ()(
++-        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
++-    { return f_(arg0_, arg1_, param, size, value, size_ret); }
++-};
++-
++-template <typename Func, typename Arg0, typename T>
++-inline cl_int
++-getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
++-{
++-    GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
++-    return GetInfoHelper<GetInfoFunctor0<Func, Arg0>, T>
++-        ::get(f0, name, param);
++-}
++-
++-template <typename Func, typename Arg0, typename Arg1, typename T>
++-inline cl_int
++-getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
++-{
++-    GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
++-    return GetInfoHelper<GetInfoFunctor1<Func, Arg0, Arg1>, T>
++-        ::get(f0, name, param);
++-}
++-
++-template<typename T>
++-struct ReferenceHandler
++-{ };
++-
++-template <>
++-struct ReferenceHandler<cl_device_id>
++-{
++-    // cl_device_id does not have retain().
++-    static cl_int retain(cl_device_id)
++-    { return CL_INVALID_DEVICE; }
++-    // cl_device_id does not have release().
++-    static cl_int release(cl_device_id)
++-    { return CL_INVALID_DEVICE; }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_platform_id>
++-{
++-    // cl_platform_id does not have retain().
++-    static cl_int retain(cl_platform_id)
++-    { return CL_INVALID_PLATFORM; }
++-    // cl_platform_id does not have release().
++-    static cl_int release(cl_platform_id)
++-    { return CL_INVALID_PLATFORM; }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_context>
++-{
++-    static cl_int retain(cl_context context)
++-    { return ::clRetainContext(context); }
++-    static cl_int release(cl_context context)
++-    { return ::clReleaseContext(context); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_command_queue>
++-{
++-    static cl_int retain(cl_command_queue queue)
++-    { return ::clRetainCommandQueue(queue); }
++-    static cl_int release(cl_command_queue queue)
++-    { return ::clReleaseCommandQueue(queue); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_mem>
++-{
++-    static cl_int retain(cl_mem memory)
++-    { return ::clRetainMemObject(memory); }
++-    static cl_int release(cl_mem memory)
++-    { return ::clReleaseMemObject(memory); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_sampler>
++-{
++-    static cl_int retain(cl_sampler sampler)
++-    { return ::clRetainSampler(sampler); }
++-    static cl_int release(cl_sampler sampler)
++-    { return ::clReleaseSampler(sampler); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_program>
++-{
++-    static cl_int retain(cl_program program)
++-    { return ::clRetainProgram(program); }
++-    static cl_int release(cl_program program)
++-    { return ::clReleaseProgram(program); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_kernel>
++-{
++-    static cl_int retain(cl_kernel kernel)
++-    { return ::clRetainKernel(kernel); }
++-    static cl_int release(cl_kernel kernel)
++-    { return ::clReleaseKernel(kernel); }
++-};
++-
++-template <>
++-struct ReferenceHandler<cl_event>
++-{
++-    static cl_int retain(cl_event event)
++-    { return ::clRetainEvent(event); }
++-    static cl_int release(cl_event event)
++-    { return ::clReleaseEvent(event); }
++-};
++-
++-template <typename T>
++-class Wrapper
++-{
++-public:
++-    typedef T cl_type;
++-
++-protected:
++-    cl_type object_;
++-
++-public:
++-    Wrapper() : object_(NULL) { }
++-
++-    ~Wrapper()
++-    {
++-        if (object_ != NULL) { release(); }
++-    }
++-
++-    Wrapper(const Wrapper<cl_type>& rhs)
++-    {
++-        object_ = rhs.object_;
++-        if (object_ != NULL) { retain(); }
++-    }
++-
++-    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
++-    {
++-        if (object_ != NULL) { release(); }
++-        object_ = rhs.object_;
++-        if (object_ != NULL) { retain(); }
++-        return *this;
++-    }
++-
++-    cl_type operator ()() const { return object_; }
++-
++-    cl_type& operator ()() { return object_; }
++-
++-protected:
++-
++-    cl_int retain() const
++-    {
++-        return ReferenceHandler<cl_type>::retain(object_);
++-    }
++-
++-    cl_int release() const
++-    {
++-        return ReferenceHandler<cl_type>::release(object_);
++-    }
++-};
++-
++-#if defined(__CL_ENABLE_EXCEPTIONS)
++-static inline cl_int errHandler (
++-    cl_int err,
++-    const char * errStr = NULL) throw(Error)
++-{
++-    if (err != CL_SUCCESS) {
++-        throw Error(err, errStr);
++-    }
++-    return err;
++-}
++-#else
++-static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
++-{
++-    return err;
++-}
++-#endif // __CL_ENABLE_EXCEPTIONS
++-
++-} // namespace detail
++-//! \endcond
++-
++-/*! \stuct ImageFormat
++- * \brief ImageFormat interface fro cl_image_format.
++- */
++-struct ImageFormat : public cl_image_format
++-{
++-    ImageFormat(){}
++-
++-    ImageFormat(cl_channel_order order, cl_channel_type type)
++-    {
++-        image_channel_order = order;
++-        image_channel_data_type = type;
++-    }
++-
++-    ImageFormat& operator = (const ImageFormat& rhs)
++-    {
++-        if (this != &rhs) {
++-            this->image_channel_data_type = rhs.image_channel_data_type;
++-            this->image_channel_order     = rhs.image_channel_order;
++-        }
++-        return *this;
++-    }
++-};
++-
++-/*! \class Device
++- * \brief Device interface for cl_device_id.
++- */
++-class Device : public detail::Wrapper<cl_device_id>
++-{
++-public:
++-    Device(cl_device_id device) { object_ = device; }
++-
++-    Device() : detail::Wrapper<cl_type>() { }
++-
++-    Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
++-
++-    Device& operator = (const Device& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_device_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetDeviceInfo, object_, name, param),
++-            __GET_DEVICE_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_device_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_device_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-#if defined(USE_CL_DEVICE_FISSION)
++-	cl_int createSubDevices(
++-		const cl_device_partition_property_ext * properties,
++-		VECTOR_CLASS<Device>* devices)
++-	{
++-		typedef CL_API_ENTRY cl_int 
++-			( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
++-				cl_device_id /*in_device*/,
++-                const cl_device_partition_property_ext * /* properties */,
++-                cl_uint /*num_entries*/,
++-                cl_device_id * /*out_devices*/,
++-                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
++-
++-		static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
++-		__INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
++-
++-		cl_uint n = 0;
++-        cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
++-        }
++-
++-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
++-        err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
++-        }
++-
++-        devices->assign(&ids[0], &ids[n]);
++-        return CL_SUCCESS;
++- 	}
++-#endif
++-};
++-
++-/*! \class Platform
++- *  \brief Platform interface.
++- */
++-class Platform : public detail::Wrapper<cl_platform_id>
++-{
++-public:
++-    static const Platform null();
++-
++-    Platform(cl_platform_id platform) { object_ = platform; }
++-
++-    Platform() : detail::Wrapper<cl_type>()  { }
++-
++-    Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
++-
++-    Platform& operator = (const Platform& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetPlatformInfo, object_, name, param),
++-            __GET_PLATFORM_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_platform_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_platform_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    cl_int getDevices(
++-        cl_device_type type,
++-        VECTOR_CLASS<Device>* devices) const
++-    {
++-        cl_uint n = 0;
++-        cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
++-        }
++-
++-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
++-        err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
++-        }
++-
++-        devices->assign(&ids[0], &ids[n]);
++-        return CL_SUCCESS;
++-    }
++-
++-#if defined(USE_DX_INTEROP)
++-   /*! \brief Get the list of available D3D10 devices.
++-     *
++-     *  \param d3d_device_source.
++-     *
++-     *  \param d3d_object.
++-     *
++-     *  \param d3d_device_set.
++-     *
++-     *  \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
++-     *  values returned in devices can be used to identify a specific OpenCL
++-     *  device. If \a devices argument is NULL, this argument is ignored.
++-     *
++-     *  \return One of the following values:
++-     *    - CL_SUCCESS if the function is executed successfully.
++-     *
++-     *  The application can query specific capabilities of the OpenCL device(s)
++-     *  returned by cl::getDevices. This can be used by the application to
++-     *  determine which device(s) to use.
++-     *
++-     * \note In the case that exceptions are enabled and a return value
++-     * other than CL_SUCCESS is generated, then cl::Error exception is
++-     * generated.
++-     */
++-    cl_int getDevices(
++-        cl_d3d10_device_source_khr d3d_device_source,
++-        void *                     d3d_object,
++-        cl_d3d10_device_set_khr    d3d_device_set,
++-        VECTOR_CLASS<Device>* devices) const
++-    {
++-        typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
++-            cl_platform_id platform, 
++-            cl_d3d10_device_source_khr d3d_device_source, 
++-            void * d3d_object,
++-            cl_d3d10_device_set_khr d3d_device_set,
++-            cl_uint num_entries,
++-            cl_device_id * devices,
++-            cl_uint* num_devices);
++-
++-        static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
++-        __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR);
++-
++-        cl_uint n = 0;
++-        cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
++-            object_, 
++-            d3d_device_source, 
++-            d3d_object,
++-            d3d_device_set, 
++-            0, 
++-            NULL, 
++-            &n);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
++-        }
++-
++-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
++-        err = pfn_clGetDeviceIDsFromD3D10KHR(
++-            object_, 
++-            d3d_device_source, 
++-            d3d_object,
++-            d3d_device_set,
++-            n, 
++-            ids, 
++-            NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
++-        }
++-
++-        devices->assign(&ids[0], &ids[n]);
++-        return CL_SUCCESS;
++-    }
++-#endif
++-
++-    static cl_int get(
++-        VECTOR_CLASS<Platform>* platforms)
++-    {
++-        cl_uint n = 0;
++-        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
++-        }
++-
++-        cl_platform_id* ids = (cl_platform_id*) alloca(
++-            n * sizeof(cl_platform_id));
++-        err = ::clGetPlatformIDs(n, ids, NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
++-        }
++-
++-        platforms->assign(&ids[0], &ids[n]);
++-        return CL_SUCCESS;
++-    }
++-};
++-
++-static inline cl_int
++-UnloadCompiler()
++-{
++-    return ::clUnloadCompiler();
++-}
++-
++-class Context : public detail::Wrapper<cl_context>
++-{
++-public:
++-    Context(
++-        const VECTOR_CLASS<Device>& devices,
++-        cl_context_properties* properties = NULL,
++-        void (CL_CALLBACK * notifyFptr)(
++-            const char *,
++-            const void *,
++-            ::size_t,
++-            void *) = NULL,
++-        void* data = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateContext(
++-            properties, (cl_uint) devices.size(),
++-            (cl_device_id*) &devices.front(),
++-            notifyFptr, data, &error);
++-
++-        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Context(
++-        cl_device_type type,
++-        cl_context_properties* properties = NULL,
++-        void (CL_CALLBACK * notifyFptr)(
++-            const char *,
++-            const void *,
++-            ::size_t,
++-            void *) = NULL,
++-        void* data = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateContextFromType(
++-            properties, type, notifyFptr, data, &error);
++-
++-        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Context() : detail::Wrapper<cl_type>() { }
++-
++-    Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
++-
++-    Context& operator = (const Context& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_context_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetContextInfo, object_, name, param),
++-            __GET_CONTEXT_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_context_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_context_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    cl_int getSupportedImageFormats(
++-        cl_mem_flags flags,
++-        cl_mem_object_type type,
++-        VECTOR_CLASS<ImageFormat>* formats) const
++-    {
++-        cl_uint numEntries;
++-        cl_int err = ::clGetSupportedImageFormats(
++-           object_, 
++-           flags,
++-           type, 
++-           0, 
++-           NULL, 
++-           &numEntries);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
++-        }
++-
++-        ImageFormat* value = (ImageFormat*)
++-            alloca(numEntries * sizeof(ImageFormat));
++-        err = ::clGetSupportedImageFormats(
++-            object_, 
++-            flags, 
++-            type, 
++-            numEntries,
++-            (cl_image_format*) value, 
++-            NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
++-        }
++-
++-        formats->assign(&value[0], &value[numEntries]);
++-        return CL_SUCCESS;
++-    }
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Context)
++-
++-/*! \class Event
++- * \brief Event interface for cl_event.
++- */
++-class Event : public detail::Wrapper<cl_event>
++-{
++-public:
++-    Event() : detail::Wrapper<cl_type>() { }
++-
++-    Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
++-
++-    Event& operator = (const Event& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_event_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetEventInfo, object_, name, param),
++-            __GET_EVENT_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_event_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_event_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    template <typename T>
++-    cl_int getProfilingInfo(cl_profiling_info name, T* param) const
++-    {
++-        return detail::errHandler(detail::getInfo(
++-            &::clGetEventProfilingInfo, object_, name, param),
++-            __GET_EVENT_PROFILE_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_profiling_info, name>::param_type
++-    getProfilingInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_profiling_info, name>::param_type param;
++-        cl_int result = getProfilingInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    cl_int wait() const
++-    {
++-        return detail::errHandler(
++-            ::clWaitForEvents(1, &object_),
++-            __WAIT_FOR_EVENTS_ERR);
++-    }
++-
++-#if defined(CL_VERSION_1_1)
++-    cl_int setCallback(
++-        cl_int type,
++-        void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),		
++-        void * user_data = NULL)
++-    {
++-        return detail::errHandler(
++-            ::clSetEventCallback(
++-                object_,
++-                type,
++-                pfn_notify,
++-                user_data), 
++-            __SET_EVENT_CALLBACK_ERR);
++-    }
++-#endif
++-
++-    static cl_int
++-    waitForEvents(const VECTOR_CLASS<Event>& events)
++-    {
++-        return detail::errHandler(
++-            ::clWaitForEvents(
++-                (cl_uint) events.size(), (cl_event*)&events.front()),
++-            __WAIT_FOR_EVENTS_ERR);
++-    }
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Event)
++-
++-#if defined(CL_VERSION_1_1)
++-/*! \class UserEvent
++- * \brief User event interface for cl_event.
++- */
++-class UserEvent : public Event
++-{
++-public:
++-    UserEvent(
++-        const Context& context,
++-        cl_int * err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateUserEvent(
++-            context(),
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_USER_EVENT_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    UserEvent() : Event() { }
++-
++-    UserEvent(const UserEvent& event) : Event(event) { }
++-
++-    UserEvent& operator = (const UserEvent& rhs)
++-    {
++-        if (this != &rhs) {
++-            Event::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    cl_int setStatus(cl_int status)
++-    {
++-        return detail::errHandler(
++-            ::clSetUserEventStatus(object_,status), 
++-            __SET_USER_EVENT_STATUS_ERR);
++-    }
++-};
++-#endif
++-
++-inline static cl_int
++-WaitForEvents(const VECTOR_CLASS<Event>& events)
++-{
++-    return detail::errHandler(
++-        ::clWaitForEvents(
++-            (cl_uint) events.size(), (cl_event*)&events.front()),
++-        __WAIT_FOR_EVENTS_ERR);
++-}
++-
++-/*! \class Memory
++- * \brief Memory interface for cl_mem.
++- */
++-class Memory : public detail::Wrapper<cl_mem>
++-{
++-public:
++-    Memory() : detail::Wrapper<cl_type>() { }
++-
++-    Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
++-
++-    Memory& operator = (const Memory& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_mem_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
++-            __GET_MEM_OBJECT_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_mem_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_mem_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-#if defined(CL_VERSION_1_1)
++-    cl_int setDestructorCallback(
++-        void (CL_CALLBACK * pfn_notify)(cl_mem, void *),		
++-        void * user_data = NULL)
++-    {
++-        return detail::errHandler(
++-            ::clSetMemObjectDestructorCallback(
++-                object_,
++-                pfn_notify,
++-                user_data), 
++-            __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
++-    }
++-#endif
++-
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Memory)
++-
++-/*! \class Buffer
++- * \brief Memory buffer interface.
++- */
++-class Buffer : public Memory
++-{
++-public:
++-    Buffer(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        ::size_t size,
++-        void* host_ptr = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
++-
++-        detail::errHandler(error, __CREATE_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Buffer() : Memory() { }
++-
++-    Buffer(const Buffer& buffer) : Memory(buffer) { }
++-
++-    Buffer& operator = (const Buffer& rhs)
++-    {
++-        if (this != &rhs) {
++-            Memory::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-#if defined(CL_VERSION_1_1)
++-    Buffer createSubBuffer(
++-        cl_mem_flags flags,
++-        cl_buffer_create_type buffer_create_type,
++-        const void * buffer_create_info,
++-        cl_int * err = NULL)
++-    {
++-        Buffer result;
++-        cl_int error;
++-        result.object_ = ::clCreateSubBuffer(
++-            object_, 
++-            flags, 
++-            buffer_create_type, 
++-            buffer_create_info, 
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-
++-        return result;
++-	}		
++-#endif
++-};
++-
++-#if defined (USE_DX_INTEROP)
++-class BufferD3D10 : public Buffer
++-{
++-public:
++-    typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
++-    cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
++-    cl_int* errcode_ret);
++-
++-    BufferD3D10(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        ID3D10Buffer* bufobj,
++-        cl_int * err = NULL)
++-    {
++-        static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
++-        __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
++-
++-        cl_int error;
++-        object_ = pfn_clCreateFromD3D10BufferKHR(
++-            context(),
++-            flags,
++-            bufobj,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    BufferD3D10() : Buffer() { }
++-
++-    BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
++-
++-    BufferD3D10& operator = (const BufferD3D10& rhs)
++-    {
++-        if (this != &rhs) {
++-            Buffer::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-};
++-#endif
++-
++-/*! \class BufferGL
++- * \brief Memory buffer interface for GL interop.
++- */
++-class BufferGL : public Buffer
++-{
++-public:
++-    BufferGL(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        GLuint bufobj,
++-        cl_int * err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateFromGLBuffer(
++-            context(),
++-            flags,
++-            bufobj,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    BufferGL() : Buffer() { }
++-
++-    BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
++-
++-    BufferGL& operator = (const BufferGL& rhs)
++-    {
++-        if (this != &rhs) {
++-            Buffer::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    cl_int getObjectInfo(
++-        cl_gl_object_type *type,
++-        GLuint * gl_object_name)
++-    {
++-        return detail::errHandler(
++-            ::clGetGLObjectInfo(object_,type,gl_object_name),
++-            __GET_GL_OBJECT_INFO_ERR);
++-    }
++-};
++-
++-/*! \class BufferRenderGL
++- * \brief Memory buffer interface for GL interop with renderbuffer.
++- */
++-class BufferRenderGL : public Buffer
++-{
++-public:
++-    BufferRenderGL(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        GLuint bufobj,
++-        cl_int * err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateFromGLRenderbuffer(
++-            context(),
++-            flags,
++-            bufobj,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    BufferRenderGL() : Buffer() { }
++-
++-    BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
++-
++-    BufferRenderGL& operator = (const BufferRenderGL& rhs)
++-    {
++-        if (this != &rhs) {
++-            Buffer::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    cl_int getObjectInfo(
++-        cl_gl_object_type *type,
++-        GLuint * gl_object_name)
++-    {
++-        return detail::errHandler(
++-            ::clGetGLObjectInfo(object_,type,gl_object_name),
++-            __GET_GL_OBJECT_INFO_ERR);
++-    }
++-};
++-
++-/*! \class Image
++- * \brief Base class  interface for all images.
++- */
++-class Image : public Memory
++-{
++-protected:
++-    Image() : Memory() { }
++-
++-    Image(const Image& image) : Memory(image) { }
++-
++-    Image& operator = (const Image& rhs)
++-    {
++-        if (this != &rhs) {
++-            Memory::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-public:
++-    template <typename T>
++-    cl_int getImageInfo(cl_image_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetImageInfo, object_, name, param),
++-            __GET_IMAGE_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_image_info, name>::param_type
++-    getImageInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_image_info, name>::param_type param;
++-        cl_int result = getImageInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-};
++-
++-/*! \class Image2D
++- * \brief Image interface for 2D images.
++- */
++-class Image2D : public Image
++-{
++-public:
++-    Image2D(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        ImageFormat format,
++-        ::size_t width,
++-        ::size_t height,
++-        ::size_t row_pitch = 0,
++-        void* host_ptr = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateImage2D(
++-            context(), flags,&format, width, height, row_pitch, host_ptr, &error);
++-
++-        detail::errHandler(error, __CREATE_IMAGE2D_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Image2D() { }
++-
++-    Image2D(const Image2D& image2D) : Image(image2D) { }
++-
++-    Image2D& operator = (const Image2D& rhs)
++-    {
++-        if (this != &rhs) {
++-            Image::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-};
++-
++-/*! \class Image2DGL
++- * \brief 2D image interface for GL interop.
++- */
++-class Image2DGL : public Image2D
++-{
++-public:
++-    Image2DGL(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        GLenum target,
++-        GLint  miplevel,
++-        GLuint texobj,
++-        cl_int * err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateFromGLTexture2D(
++-            context(),
++-            flags,
++-            target,
++-            miplevel,
++-            texobj,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Image2DGL() : Image2D() { }
++-
++-    Image2DGL(const Image2DGL& image) : Image2D(image) { }
++-
++-    Image2DGL& operator = (const Image2DGL& rhs)
++-    {
++-        if (this != &rhs) {
++-            Image2D::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-};
++-
++-/*! \class Image3D
++- * \brief Image interface for 3D images.
++- */
++-class Image3D : public Image
++-{
++-public:
++-    Image3D(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        ImageFormat format,
++-        ::size_t width,
++-        ::size_t height,
++-        ::size_t depth,
++-        ::size_t row_pitch = 0,
++-        ::size_t slice_pitch = 0,
++-        void* host_ptr = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateImage3D(
++-            context(), flags, &format, width, height, depth, row_pitch,
++-            slice_pitch, host_ptr, &error);
++-
++-        detail::errHandler(error, __CREATE_IMAGE3D_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Image3D() { }
++-
++-    Image3D(const Image3D& image3D) : Image(image3D) { }
++-
++-    Image3D& operator = (const Image3D& rhs)
++-    {
++-        if (this != &rhs) {
++-            Image::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-};
++-
++-/*! \class Image2DGL
++- * \brief 2D image interface for GL interop.
++- */
++-class Image3DGL : public Image3D
++-{
++-public:
++-    Image3DGL(
++-        const Context& context,
++-        cl_mem_flags flags,
++-        GLenum target,
++-        GLint  miplevel,
++-        GLuint texobj,
++-        cl_int * err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateFromGLTexture3D(
++-            context(),
++-            flags,
++-            target,
++-            miplevel,
++-            texobj,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Image3DGL() : Image3D() { }
++-
++-    Image3DGL(const Image3DGL& image) : Image3D(image) { }
++-
++-    Image3DGL& operator = (const Image3DGL& rhs)
++-    {
++-        if (this != &rhs) {
++-            Image3D::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-};
++-
++-/*! \class Sampler
++- * \brief Sampler interface for cl_sampler.
++- */
++-class Sampler : public detail::Wrapper<cl_sampler>
++-{
++-public:
++-    Sampler() { }
++-
++-    Sampler(
++-        const Context& context,
++-        cl_bool normalized_coords,
++-        cl_addressing_mode addressing_mode,
++-        cl_filter_mode filter_mode,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateSampler(
++-            context(), 
++-            normalized_coords,
++-            addressing_mode,
++-            filter_mode,
++-            &error);
++-
++-        detail::errHandler(error, __CREATE_SAMPLER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
++-
++-    Sampler& operator = (const Sampler& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_sampler_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetSamplerInfo, object_, name, param),
++-            __GET_SAMPLER_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_sampler_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_sampler_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler)
++-
++-class Program;
++-class CommandQueue;
++-class Kernel;
++-
++-/*! \class NDRange
++- * \brief NDRange interface
++- */
++-class NDRange
++-{
++-private:
++-    size_t<3> sizes_;
++-    cl_uint dimensions_;
++-
++-public:
++-    NDRange()
++-        : dimensions_(0)
++-    { }
++-
++-    NDRange(::size_t size0)
++-        : dimensions_(1)
++-    {
++-        sizes_.push_back(size0);
++-    }
++-
++-    NDRange(::size_t size0, ::size_t size1)
++-        : dimensions_(2)
++-    {
++-        sizes_.push_back(size0);
++-        sizes_.push_back(size1);
++-    }
++-
++-    NDRange(::size_t size0, ::size_t size1, ::size_t size2)
++-        : dimensions_(3)
++-    {
++-        sizes_.push_back(size0);
++-        sizes_.push_back(size1);
++-        sizes_.push_back(size2);
++-    }
++-
++-    operator const ::size_t*() const { return (const ::size_t*) sizes_; }
++-    ::size_t dimensions() const { return dimensions_; }
++-};
++-
++-static const NDRange NullRange;
++-
++-/*!
++- * \struct LocalSpaceArg
++- * \brief Local address raper for use with Kernel::setArg
++- */
++-struct LocalSpaceArg
++-{
++-    ::size_t size_;
++-};
++-
++-namespace detail {
++-
++-template <typename T>
++-struct KernelArgumentHandler
++-{
++-    static ::size_t size(const T&) { return sizeof(T); }
++-    static T* ptr(T& value) { return &value; }
++-};
++-
++-template <>
++-struct KernelArgumentHandler<LocalSpaceArg>
++-{
++-    static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
++-    static void* ptr(LocalSpaceArg&) { return NULL; }
++-};
++-
++-} 
++-//! \endcond
++-
++-inline LocalSpaceArg
++-__local(::size_t size)
++-{
++-    LocalSpaceArg ret = { size };
++-    return ret;
++-}
++-
++-class KernelFunctor;
++-
++-/*! \class Kernel
++- * \brief Kernel interface that implements cl_kernel
++- */
++-class Kernel : public detail::Wrapper<cl_kernel>
++-{
++-public:
++-    inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
++-
++-    Kernel() { }
++-
++-    Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
++-
++-    Kernel& operator = (const Kernel& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_kernel_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetKernelInfo, object_, name, param),
++-            __GET_KERNEL_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_kernel_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_kernel_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    template <typename T>
++-    cl_int getWorkGroupInfo(
++-        const Device& device, cl_kernel_work_group_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(
++-                &::clGetKernelWorkGroupInfo, object_, device(), name, param),
++-                __GET_KERNEL_WORK_GROUP_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
++-        getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-        detail::cl_kernel_work_group_info, name>::param_type param;
++-        cl_int result = getWorkGroupInfo(device, name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    template <typename T>
++-    cl_int setArg(cl_uint index, T value)
++-    {
++-        return detail::errHandler(
++-            ::clSetKernelArg(
++-                object_,
++-                index,
++-                detail::KernelArgumentHandler<T>::size(value),
++-                detail::KernelArgumentHandler<T>::ptr(value)),
++-            __SET_KERNEL_ARGS_ERR);
++-    }
++-
++-    cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
++-    {
++-        return detail::errHandler(
++-            ::clSetKernelArg(object_, index, size, argPtr),
++-            __SET_KERNEL_ARGS_ERR);
++-    }
++-
++-    KernelFunctor bind(
++-        const CommandQueue& queue,
++-        const NDRange& offset,
++-        const NDRange& global,
++-        const NDRange& local);
++-
++-    KernelFunctor bind(
++-        const CommandQueue& queue,
++-        const NDRange& global,
++-        const NDRange& local);
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel)
++-
++-/*! \class Program
++- * \brief Program interface that implements cl_program.
++- */
++-class Program : public detail::Wrapper<cl_program>
++-{
++-public:
++-    typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
++-    typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
++-
++-    Program(
++-        const Context& context,
++-        const Sources& sources,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-
++-        const ::size_t n = (::size_t)sources.size();
++-        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
++-        const char** strings = (const char**) alloca(n * sizeof(const char*));
++-
++-        for (::size_t i = 0; i < n; ++i) {
++-            strings[i] = sources[(int)i].first;
++-            lengths[i] = sources[(int)i].second;
++-        }
++-
++-        object_ = ::clCreateProgramWithSource(
++-            context(), (cl_uint)n, strings, lengths, &error);
++-
++-        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Program(
++-        const Context& context,
++-        const VECTOR_CLASS<Device>& devices,
++-        const Binaries& binaries,
++-        VECTOR_CLASS<cl_int>* binaryStatus = NULL,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        const ::size_t n = binaries.size();
++-        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
++-        const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*));
++-
++-        for (::size_t i = 0; i < n; ++i) {
++-            images[i] = (const unsigned char*)binaries[(int)i].first;
++-            lengths[i] = binaries[(int)i].second;
++-        }
++-
++-        object_ = ::clCreateProgramWithBinary(
++-            context(), (cl_uint) devices.size(),
++-            (cl_device_id*)&devices.front(),
++-            lengths, images, binaryStatus != NULL
++-               ? (cl_int*) &binaryStatus->front()
++-               : NULL, &error);
++-
++-        detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    Program() { }
++-
++-    Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
++-
++-    Program& operator = (const Program& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    cl_int build(
++-        const VECTOR_CLASS<Device>& devices,
++-        const char* options = NULL,
++-        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
++-        void* data = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clBuildProgram(
++-                object_,
++-                (cl_uint)
++-                devices.size(),
++-                (cl_device_id*)&devices.front(),
++-                options,
++-                notifyFptr,
++-                data),
++-                __BUILD_PROGRAM_ERR);
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_program_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(&::clGetProgramInfo, object_, name, param),
++-            __GET_PROGRAM_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_program_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_program_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    template <typename T>
++-    cl_int getBuildInfo(
++-        const Device& device, cl_program_build_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(
++-                &::clGetProgramBuildInfo, object_, device(), name, param),
++-                __GET_PROGRAM_BUILD_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_program_build_info, name>::param_type
++-    getBuildInfo(const Device& device, cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_program_build_info, name>::param_type param;
++-        cl_int result = getBuildInfo(device, name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
++-    {
++-        cl_uint numKernels;
++-        cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
++-        }
++-
++-        Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
++-        err = ::clCreateKernelsInProgram(
++-            object_, numKernels, (cl_kernel*) value, NULL);
++-        if (err != CL_SUCCESS) {
++-            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
++-        }
++-
++-        kernels->assign(&value[0], &value[numKernels]);
++-        return CL_SUCCESS;
++-    }
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::Program)
++-
++-inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
++-{
++-    cl_int error;
++-
++-    object_ = ::clCreateKernel(program(), name, &error);
++-    detail::errHandler(error, __CREATE_KERNEL_ERR);
++-
++-    if (err != NULL) {
++-        *err = error;
++-    }
++-
++-}
++-
++-/*! \class CommandQueue
++- * \brief CommandQueue interface for cl_command_queue.
++- */
++-class CommandQueue : public detail::Wrapper<cl_command_queue>
++-{
++-public:
++-    CommandQueue(
++-        const Context& context,
++-        const Device& device,
++-        cl_command_queue_properties properties = 0,
++-        cl_int* err = NULL)
++-    {
++-        cl_int error;
++-        object_ = ::clCreateCommandQueue(
++-            context(), device(), properties, &error);
++-
++-        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-    }
++-
++-    CommandQueue() { }
++-
++-    CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
++-
++-    CommandQueue& operator = (const CommandQueue& rhs)
++-    {
++-        if (this != &rhs) {
++-            detail::Wrapper<cl_type>::operator=(rhs);
++-        }
++-        return *this;
++-    }
++-
++-    template <typename T>
++-    cl_int getInfo(cl_command_queue_info name, T* param) const
++-    {
++-        return detail::errHandler(
++-            detail::getInfo(
++-                &::clGetCommandQueueInfo, object_, name, param),
++-                __GET_COMMAND_QUEUE_INFO_ERR);
++-    }
++-
++-    template <cl_int name> typename
++-    detail::param_traits<detail::cl_command_queue_info, name>::param_type
++-    getInfo(cl_int* err = NULL) const
++-    {
++-        typename detail::param_traits<
++-            detail::cl_command_queue_info, name>::param_type param;
++-        cl_int result = getInfo(name, &param);
++-        if (err != NULL) {
++-            *err = result;
++-        }
++-        return param;
++-    }
++-
++-    cl_int enqueueReadBuffer(
++-        const Buffer& buffer,
++-        cl_bool blocking,
++-        ::size_t offset,
++-        ::size_t size,
++-        void* ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueReadBuffer(
++-                object_, buffer(), blocking, offset, size,
++-                ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_READ_BUFFER_ERR);
++-    }
++-
++-    cl_int enqueueWriteBuffer(
++-        const Buffer& buffer,
++-        cl_bool blocking,
++-        ::size_t offset,
++-        ::size_t size,
++-        const void* ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueWriteBuffer(
++-                object_, buffer(), blocking, offset, size,
++-                ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-                __ENQUEUE_WRITE_BUFFER_ERR);
++-    }
++-
++-    cl_int enqueueCopyBuffer(
++-        const Buffer& src,
++-        const Buffer& dst,
++-        ::size_t src_offset,
++-        ::size_t dst_offset,
++-        ::size_t size,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueCopyBuffer(
++-                object_, src(), dst(), src_offset, dst_offset, size,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQEUE_COPY_BUFFER_ERR);
++-    }
++-
++-#if defined(CL_VERSION_1_1)
++-    cl_int enqueueReadBufferRect(
++-        const Buffer& buffer,
++-        cl_bool blocking,
++-        const size_t<3>& buffer_offset,
++-        const size_t<3>& host_offset,
++-        const size_t<3>& region,
++-        ::size_t buffer_row_pitch,
++-        ::size_t buffer_slice_pitch,
++-        ::size_t host_row_pitch,
++-        ::size_t host_slice_pitch,
++-        void *ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueReadBufferRect(
++-                object_, 
++-                buffer(), 
++-                blocking, 
++-                (const ::size_t *)buffer_offset,
++-                (const ::size_t *)host_offset,
++-                (const ::size_t *)region,
++-                buffer_row_pitch,
++-                buffer_slice_pitch,
++-                host_row_pitch,
++-                host_slice_pitch,
++-                ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-                __ENQUEUE_READ_BUFFER_RECT_ERR);
++-    }
++-
++-
++-    cl_int enqueueWriteBufferRect(
++-        const Buffer& buffer,
++-        cl_bool blocking,
++-        const size_t<3>& buffer_offset,
++-        const size_t<3>& host_offset,
++-        const size_t<3>& region,
++-        ::size_t buffer_row_pitch,
++-        ::size_t buffer_slice_pitch,
++-        ::size_t host_row_pitch,
++-        ::size_t host_slice_pitch,
++-        void *ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueWriteBufferRect(
++-                object_, 
++-                buffer(), 
++-                blocking, 
++-                (const ::size_t *)buffer_offset,
++-                (const ::size_t *)host_offset,
++-                (const ::size_t *)region,
++-                buffer_row_pitch,
++-                buffer_slice_pitch,
++-                host_row_pitch,
++-                host_slice_pitch,
++-                ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-                __ENQUEUE_WRITE_BUFFER_RECT_ERR);
++-    }
++-
++-    cl_int enqueueCopyBufferRect(
++-        const Buffer& src,
++-        const Buffer& dst,
++-        const size_t<3>& src_origin,
++-        const size_t<3>& dst_origin,
++-        const size_t<3>& region,
++-        ::size_t src_row_pitch,
++-        ::size_t src_slice_pitch,
++-        ::size_t dst_row_pitch,
++-        ::size_t dst_slice_pitch,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueCopyBufferRect(
++-                object_, 
++-                src(), 
++-                dst(), 
++-                (const ::size_t *)src_origin, 
++-                (const ::size_t *)dst_origin, 
++-                (const ::size_t *)region,
++-                src_row_pitch,
++-                src_slice_pitch,
++-                dst_row_pitch,
++-                dst_slice_pitch,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQEUE_COPY_BUFFER_RECT_ERR);
++-    }
++-#endif
++-
++-    cl_int enqueueReadImage(
++-        const Image& image,
++-        cl_bool blocking,
++-        const size_t<3>& origin,
++-        const size_t<3>& region,
++-        ::size_t row_pitch,
++-        ::size_t slice_pitch,
++-        void* ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueReadImage(
++-                object_, image(), blocking, (const ::size_t *) origin,
++-                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_READ_IMAGE_ERR);
++-    }
++-
++-    cl_int enqueueWriteImage(
++-        const Image& image,
++-        cl_bool blocking,
++-        const size_t<3>& origin,
++-        const size_t<3>& region,
++-        ::size_t row_pitch,
++-        ::size_t slice_pitch,
++-        void* ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueWriteImage(
++-                object_, image(), blocking, (const ::size_t *) origin,
++-                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_WRITE_IMAGE_ERR);
++-    }
++-
++-    cl_int enqueueCopyImage(
++-        const Image& src,
++-        const Image& dst,
++-        const size_t<3>& src_origin,
++-        const size_t<3>& dst_origin,
++-        const size_t<3>& region,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueCopyImage(
++-                object_, src(), dst(), (const ::size_t *) src_origin,
++-                (const ::size_t *)dst_origin, (const ::size_t *) region,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_COPY_IMAGE_ERR);
++-    }
++-
++-    cl_int enqueueCopyImageToBuffer(
++-        const Image& src,
++-        const Buffer& dst,
++-        const size_t<3>& src_origin,
++-        const size_t<3>& region,
++-        ::size_t dst_offset,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueCopyImageToBuffer(
++-                object_, src(), dst(), (const ::size_t *) src_origin,
++-                (const ::size_t *) region, dst_offset,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
++-    }
++-
++-    cl_int enqueueCopyBufferToImage(
++-        const Buffer& src,
++-        const Image& dst,
++-        ::size_t src_offset,
++-        const size_t<3>& dst_origin,
++-        const size_t<3>& region,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueCopyBufferToImage(
++-                object_, src(), dst(), src_offset,
++-                (const ::size_t *) dst_origin, (const ::size_t *) region,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
++-    }
++-
++-    void* enqueueMapBuffer(
++-        const Buffer& buffer,
++-        cl_bool blocking,
++-        cl_map_flags flags,
++-        ::size_t offset,
++-        ::size_t size,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL,
++-        cl_int* err = NULL) const
++-    {
++-        cl_int error;
++-        void * result = ::clEnqueueMapBuffer(
++-            object_, buffer(), blocking, flags, offset, size,
++-            (events != NULL) ? (cl_uint) events->size() : 0,
++-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-            (cl_event*) event,
++-            &error);
++-
++-        detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
++-        if (err != NULL) {
++-            *err = error;
++-        }
++-        return result;
++-    }
++-
++-    void* enqueueMapImage(
++-        const Image& buffer,
++-        cl_bool blocking,
++-        cl_map_flags flags,
++-        const size_t<3>& origin,
++-        const size_t<3>& region,
++-        ::size_t * row_pitch,
++-        ::size_t * slice_pitch,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL,
++-        cl_int* err = NULL) const
++-    {
++-        cl_int error;
++-        void * result = ::clEnqueueMapImage(
++-            object_, buffer(), blocking, flags,
++-            (const ::size_t *) origin, (const ::size_t *) region,
++-            row_pitch, slice_pitch,
++-            (events != NULL) ? (cl_uint) events->size() : 0,
++-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-            (cl_event*) event,
++-            &error);
++-
++-        detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
++-        if (err != NULL) {
++-              *err = error;
++-        }
++-        return result;
++-    }
++-
++-    cl_int enqueueUnmapMemObject(
++-        const Memory& memory,
++-        void* mapped_ptr,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueUnmapMemObject(
++-                object_, memory(), mapped_ptr,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
++-    }
++-
++-    cl_int enqueueNDRangeKernel(
++-        const Kernel& kernel,
++-        const NDRange& offset,
++-        const NDRange& global,
++-        const NDRange& local,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueNDRangeKernel(
++-                object_, kernel(), (cl_uint) global.dimensions(),
++-                offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
++-                (const ::size_t*) global,
++-                local.dimensions() != 0 ? (const ::size_t*) local : NULL,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_NDRANGE_KERNEL_ERR);
++-    }
++-
++-    cl_int enqueueTask(
++-        const Kernel& kernel,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueTask(
++-                object_, kernel(),
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_TASK_ERR);
++-    }
++-
++-    cl_int enqueueNativeKernel(
++-        void (*userFptr)(void *),
++-        std::pair<void*, ::size_t> args,
++-        const VECTOR_CLASS<Memory>* mem_objects = NULL,
++-        const VECTOR_CLASS<const void*>* mem_locs = NULL,
++-        const VECTOR_CLASS<Event>* events = NULL,
++-        Event* event = NULL) const
++-    {
++-        cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) 
++-            ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem))
++-            : NULL;
++-
++-        if (mems != NULL) {
++-            for (unsigned int i = 0; i < mem_objects->size(); i++) {
++-                mems[i] = ((*mem_objects)[i])();
++-            }
++-        }
++-
++-        return detail::errHandler(
++-            ::clEnqueueNativeKernel(
++-                object_, userFptr, args.first, args.second,
++-                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
++-                mems,
++-                (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_NATIVE_KERNEL);
++-    }
++-
++-    cl_int enqueueMarker(Event* event = NULL) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueMarker(object_, (cl_event*) event),
++-            __ENQUEUE_MARKER_ERR);
++-    }
++-
++-    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueWaitForEvents(
++-                object_,
++-                (cl_uint) events.size(),
++-                (const cl_event*) &events.front()),
++-            __ENQUEUE_WAIT_FOR_EVENTS_ERR);
++-    }
++-
++-    cl_int enqueueAcquireGLObjects(
++-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
++-         const VECTOR_CLASS<Event>* events = NULL,
++-         Event* event = NULL) const
++-     {
++-         return detail::errHandler(
++-             ::clEnqueueAcquireGLObjects(
++-                 object_,
++-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
++-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
++-                 (events != NULL) ? (cl_uint) events->size() : 0,
++-                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                 (cl_event*) event),
++-             __ENQUEUE_ACQUIRE_GL_ERR);
++-     }
++-
++-    cl_int enqueueReleaseGLObjects(
++-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
++-         const VECTOR_CLASS<Event>* events = NULL,
++-         Event* event = NULL) const
++-     {
++-         return detail::errHandler(
++-             ::clEnqueueReleaseGLObjects(
++-                 object_,
++-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
++-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
++-                 (events != NULL) ? (cl_uint) events->size() : 0,
++-                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
++-                 (cl_event*) event),
++-             __ENQUEUE_RELEASE_GL_ERR);
++-     }
++-
++-#if defined (USE_DX_INTEROP)
++-typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
++-    cl_command_queue command_queue, cl_uint num_objects,
++-    const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
++-    const cl_event* event_wait_list, cl_event* event);
++-typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
++-    cl_command_queue command_queue, cl_uint num_objects,
++-    const cl_mem* mem_objects,  cl_uint num_events_in_wait_list,
++-    const cl_event* event_wait_list, cl_event* event);
++-
++-    cl_int enqueueAcquireD3D10Objects(
++-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
++-         const VECTOR_CLASS<Event>* events = NULL,
++-         Event* event = NULL) const
++-     {
++-         static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
++-         __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
++-		
++-         return detail::errHandler(
++-             pfn_clEnqueueAcquireD3D10ObjectsKHR(
++-                 object_,
++-                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
++-                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
++-                 (events != NULL) ? (cl_uint) events->size() : 0,
++-                 (events != NULL) ? (cl_event*) &events->front() : NULL,
++-                 (cl_event*) event),
++-             __ENQUEUE_ACQUIRE_GL_ERR);
++-     }
++-
++-    cl_int enqueueReleaseD3D10Objects(
++-         const VECTOR_CLASS<Memory>* mem_objects = NULL,
++-         const VECTOR_CLASS<Event>* events = NULL,
++-         Event* event = NULL) const
++-    {
++-        static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
++-        __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
++-
++-        return detail::errHandler(
++-            pfn_clEnqueueReleaseD3D10ObjectsKHR(
++-                object_,
++-                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
++-                (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
++-                (events != NULL) ? (cl_uint) events->size() : 0,
++-                (events != NULL) ? (cl_event*) &events->front() : NULL,
++-                (cl_event*) event),
++-            __ENQUEUE_RELEASE_GL_ERR);
++-    }
++-#endif
++-
++-    cl_int enqueueBarrier() const
++-    {
++-        return detail::errHandler(
++-            ::clEnqueueBarrier(object_),
++-            __ENQUEUE_BARRIER_ERR);
++-    }
++-
++-    cl_int flush() const
++-    {
++-        return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
++-    }
++-
++-    cl_int finish() const
++-    {
++-        return detail::errHandler(::clFinish(object_), __FINISH_ERR);
++-    }
++-};
++-
++-__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue)
++-
++-/*! \class KernelFunctor
++- * \brief Kernel functor interface
++- *
++- * \note Currently only functors of zero to ten arguments are supported. It
++- * is straightforward to add more and a more general solution, similar to
++- * Boost.Lambda could be followed if required in the future.
++- */
++-class KernelFunctor
++-{
++-private:
++-    Kernel kernel_;
++-    CommandQueue queue_;
++-    NDRange offset_;
++-    NDRange global_;
++-    NDRange local_;
++-
++-    cl_int err_;
++-public:
++-    KernelFunctor() { }
++-
++-    KernelFunctor(
++-        const Kernel& kernel,
++-        const CommandQueue& queue,
++-        const NDRange& offset,
++-        const NDRange& global,
++-        const NDRange& local) :
++-            kernel_(kernel),
++-            queue_(queue),
++-            offset_(offset),
++-            global_(global),
++-            local_(local),
++-            err_(CL_SUCCESS)
++-    {}
++-
++-    KernelFunctor& operator=(const KernelFunctor& rhs);
++-
++-    KernelFunctor(const KernelFunctor& rhs);
++-
++-    cl_int getError() { return err_; }
++-
++-    inline Event operator()(const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<typename A1>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4, class A5>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4, class A5, class A6>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4,
++-             class A5, class A6, class A7>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6, 
++-        const A7& a7,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6, 
++-        const A7& a7, 
++-        const A8& a8,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6, 
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10,
++-             class A11>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10, 
++-        const A11& a11,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10,
++-             class A11, class A12>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10, 
++-        const A11& a11, 
++-        const A12& a12,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10,
++-             class A11, class A12, class A13>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10, 
++-        const A11& a11, 
++-        const A12& a12, 
++-        const A13& a13,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10,
++-             class A11, class A12, class A13, class A14>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10, 
++-        const A11& a11,
++-        const A12& a12, 
++-        const A13& a13, 
++-        const A14& a14,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-    
++-    template<class A1, class A2, class A3, class A4, class A5,
++-             class A6, class A7, class A8, class A9, class A10,
++-             class A11, class A12, class A13, class A14, class A15>
++-    inline Event operator()(
++-        const A1& a1, 
++-        const A2& a2, 
++-        const A3& a3, 
++-        const A4& a4, 
++-        const A5& a5, 
++-        const A6& a6,
++-        const A7& a7, 
++-        const A8& a8, 
++-        const A9& a9, 
++-        const A10& a10, 
++-        const A11& a11,
++-        const A12& a12, 
++-        const A13& a13, 
++-        const A14& a14, 
++-        const A15& a15,
++-        const VECTOR_CLASS<Event>* events = NULL);
++-};
++-
++-inline KernelFunctor Kernel::bind(
++-    const CommandQueue& queue,
++-    const NDRange& offset,
++-    const NDRange& global,
++-    const NDRange& local)
++-{
++-    return KernelFunctor(*this,queue,offset,global,local);
++-}
++-
++-inline KernelFunctor Kernel::bind(
++-    const CommandQueue& queue,
++-    const NDRange& global,
++-    const NDRange& local)
++-{
++-    return KernelFunctor(*this,queue,NullRange,global,local);
++-}
++-
++-inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs)
++-{
++-    if (this == &rhs) {
++-        return *this;
++-    }
++-    
++-    kernel_ = rhs.kernel_;
++-    queue_  = rhs.queue_;
++-    offset_ = rhs.offset_;
++-    global_ = rhs.global_;
++-    local_  = rhs.local_;
++-    
++-    return *this;
++-}
++-
++-inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) :
++-    kernel_(rhs.kernel_),
++-    queue_(rhs.queue_),
++-    offset_(rhs.offset_),
++-    global_(rhs.global_),
++-    local_(rhs.local_)
++-{
++-}
++-
++-Event KernelFunctor::operator()(const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4, typename A5>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4, typename A5,
++-         typename A6>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4,
++-         typename A5, typename A6, typename A7>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6, 
++-    const A7& a7,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4, typename A5,
++-         typename A6, typename A7, typename A8>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6, 
++-    const A7& a7, 
++-    const A8& a8,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4, typename A5,
++-         typename A6, typename A7, typename A8, typename A9>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5,
++-    const A6& a6, 
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<typename A1, typename A2, typename A3, typename A4, typename A5,
++-         typename A6, typename A7, typename A8, typename A9, typename A10>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<class A1, class A2, class A3, class A4, class A5,
++-         class A6, class A7, class A8, class A9, class A10,
++-         class A11>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10, 
++-    const A11& a11,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-    kernel_.setArg(10,a11);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<class A1, class A2, class A3, class A4, class A5,
++-         class A6, class A7, class A8, class A9, class A10,
++-         class A11, class A12>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10, 
++-    const A11& a11, 
++-    const A12& a12,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-    kernel_.setArg(10,a11);
++-    kernel_.setArg(11,a12);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<class A1, class A2, class A3, class A4, class A5,
++-         class A6, class A7, class A8, class A9, class A10,
++-         class A11, class A12, class A13>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10, 
++-    const A11& a11, 
++-    const A12& a12, 
++-    const A13& a13,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-    
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-    kernel_.setArg(10,a11);
++-    kernel_.setArg(11,a12);
++-    kernel_.setArg(12,a13);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<class A1, class A2, class A3, class A4, class A5,
++-         class A6, class A7, class A8, class A9, class A10,
++-         class A11, class A12, class A13, class A14>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5, 
++-    const A6& a6,
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10, 
++-    const A11& a11,
++-    const A12& a12, 
++-    const A13& a13, 
++-    const A14& a14,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-    
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-    kernel_.setArg(10,a11);
++-    kernel_.setArg(11,a12);
++-    kernel_.setArg(12,a13);
++-    kernel_.setArg(13,a14);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-template<class A1, class A2, class A3, class A4, class A5,
++-         class A6, class A7, class A8, class A9, class A10,
++-         class A11, class A12, class A13, class A14, class A15>
++-Event KernelFunctor::operator()(
++-    const A1& a1, 
++-    const A2& a2, 
++-    const A3& a3, 
++-    const A4& a4, 
++-    const A5& a5,
++-    const A6& a6, 
++-    const A7& a7, 
++-    const A8& a8, 
++-    const A9& a9, 
++-    const A10& a10, 
++-    const A11& a11,
++-    const A12& a12, 
++-    const A13& a13, 
++-    const A14& a14, 
++-    const A15& a15,
++-    const VECTOR_CLASS<Event>* events)
++-{
++-    Event event;
++-    
++-    kernel_.setArg(0,a1);
++-    kernel_.setArg(1,a2);
++-    kernel_.setArg(2,a3);
++-    kernel_.setArg(3,a4);
++-    kernel_.setArg(4,a5);
++-    kernel_.setArg(5,a6);
++-    kernel_.setArg(6,a7);
++-    kernel_.setArg(7,a8);
++-    kernel_.setArg(8,a9);
++-    kernel_.setArg(9,a10);
++-    kernel_.setArg(10,a11);
++-    kernel_.setArg(11,a12);
++-    kernel_.setArg(12,a13);
++-    kernel_.setArg(13,a14);
++-    kernel_.setArg(14,a15);
++-
++-    err_ = queue_.enqueueNDRangeKernel(
++-        kernel_,
++-        offset_,
++-        global_,
++-        local_,
++-        NULL,    // bgaster_fixme - do we want to allow wait event lists?
++-        &event);
++-
++-    return event;
++-}
++-
++-#undef __ERR_STR
++-#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
++-#undef __GET_DEVICE_INFO_ERR
++-#undef __GET_PLATFORM_INFO_ERR
++-#undef __GET_DEVICE_IDS_ERR
++-#undef __GET_CONTEXT_INFO_ERR
++-#undef __GET_EVENT_INFO_ERR
++-#undef __GET_EVENT_PROFILE_INFO_ERR
++-#undef __GET_MEM_OBJECT_INFO_ERR
++-#undef __GET_IMAGE_INFO_ERR
++-#undef __GET_SAMPLER_INFO_ERR
++-#undef __GET_KERNEL_INFO_ERR
++-#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
++-#undef __GET_PROGRAM_INFO_ERR
++-#undef __GET_PROGRAM_BUILD_INFO_ERR
++-#undef __GET_COMMAND_QUEUE_INFO_ERR
++-
++-#undef __CREATE_CONTEXT_FROM_TYPE_ERR
++-#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
++-
++-#undef __CREATE_BUFFER_ERR
++-#undef __CREATE_SUBBUFFER_ERR
++-#undef __CREATE_IMAGE2D_ERR
++-#undef __CREATE_IMAGE3D_ERR
++-#undef __CREATE_SAMPLER_ERR
++-#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
++-
++-#undef __CREATE_USER_EVENT_ERR
++-#undef __SET_USER_EVENT_STATUS_ERR
++-#undef __SET_EVENT_CALLBACK_ERR
++-
++-#undef __WAIT_FOR_EVENTS_ERR
++-
++-#undef __CREATE_KERNEL_ERR
++-#undef __SET_KERNEL_ARGS_ERR
++-#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
++-#undef __CREATE_PROGRAM_WITH_BINARY_ERR
++-#undef __BUILD_PROGRAM_ERR
++-#undef __CREATE_KERNELS_IN_PROGRAM_ERR
++-
++-#undef __CREATE_COMMAND_QUEUE_ERR
++-#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
++-#undef __ENQUEUE_READ_BUFFER_ERR
++-#undef __ENQUEUE_WRITE_BUFFER_ERR
++-#undef __ENQUEUE_READ_BUFFER_RECT_ERR
++-#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
++-#undef __ENQEUE_COPY_BUFFER_ERR
++-#undef __ENQEUE_COPY_BUFFER_RECT_ERR
++-#undef __ENQUEUE_READ_IMAGE_ERR
++-#undef __ENQUEUE_WRITE_IMAGE_ERR
++-#undef __ENQUEUE_COPY_IMAGE_ERR
++-#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
++-#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
++-#undef __ENQUEUE_MAP_BUFFER_ERR
++-#undef __ENQUEUE_MAP_IMAGE_ERR
++-#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
++-#undef __ENQUEUE_NDRANGE_KERNEL_ERR
++-#undef __ENQUEUE_TASK_ERR
++-#undef __ENQUEUE_NATIVE_KERNEL
++-
++-#undef __UNLOAD_COMPILER_ERR
++-#endif //__CL_USER_OVERRIDE_ERROR_STRINGS
++-
++-#undef __GET_INFO_HELPER_WITH_RETAIN
++-
++-// Extensions
++-#undef __INIT_CL_EXT_FCN_PTR
++-#undef __CREATE_SUB_DEVICES
++-
++-#if defined(USE_CL_DEVICE_FISSION)
++-#undef __PARAM_NAME_DEVICE_FISSION
++-#endif // USE_CL_DEVICE_FISSION
++-
++-} // namespace cl
++-
++-#endif // CL_HPP_
+++#include_next <CL/cl.hpp>
diff --cc debian/patches/private
index 81f26a0,0000000..99da0da
mode 100644,000000..100644
--- a/debian/patches/private
+++ b/debian/patches/private
@@@ -1,35 -1,0 +1,35 @@@
 +Description: Install as private library
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-05-21
 +
- Index: beignet-0.1+git20130521+a7ea35c/backend/src/CMakeLists.txt
++Index: beignet-0.1+git20130619+42967d2/backend/src/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/backend/src/CMakeLists.txt	2013-05-21 10:38:35.571948408 +0200
- +++ beignet-0.1+git20130521+a7ea35c/backend/src/CMakeLists.txt	2013-05-21 10:44:32.000000000 +0200
- @@ -120,6 +120,6 @@
++--- beignet-0.1+git20130619+42967d2.orig/backend/src/CMakeLists.txt	2013-06-19 21:04:23.346667404 +0200
+++++ beignet-0.1+git20130619+42967d2/backend/src/CMakeLists.txt	2013-06-19 21:04:36.470666819 +0200
++@@ -121,6 +121,6 @@
 +                       ${CMAKE_THREAD_LIBS_INIT}
 +                       ${CMAKE_DL_LIBS})
 + 
 +-install (TARGETS gbe LIBRARY DESTINATION lib)
 ++install (TARGETS gbe LIBRARY DESTINATION lib/beignet)
 + install (FILES backend/program.h DESTINATION include/gen)
 + 
- Index: beignet-0.1+git20130521+a7ea35c/src/CMakeLists.txt
++Index: beignet-0.1+git20130619+42967d2/src/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130521+a7ea35c.orig/src/CMakeLists.txt	2013-05-21 10:38:35.571948408 +0200
- +++ beignet-0.1+git20130521+a7ea35c/src/CMakeLists.txt	2013-05-21 10:45:20.603930350 +0200
++--- beignet-0.1+git20130619+42967d2.orig/src/CMakeLists.txt	2013-06-19 21:04:23.346667404 +0200
+++++ beignet-0.1+git20130619+42967d2/src/CMakeLists.txt	2013-06-19 21:04:36.470666819 +0200
 +@@ -47,6 +47,8 @@
 + 
 + SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic")
 + 
 ++SET(CMAKE_INSTALL_RPATH /usr/lib/beignet)
 ++
 + link_directories (${LLVM_LIBRARY_DIR})
 + add_library(cl SHARED ${OPENCL_SRC})
 + target_link_libraries(
 +@@ -60,4 +62,4 @@
 +                       ${OPENGL_LIBRARIES}
 +                       ${OPTIONAL_EGL_LIBRARY}
 +                       ${OPTIONAL_GBM_LIBRARY})
 +-install (TARGETS cl LIBRARY DESTINATION lib)
 ++install (TARGETS cl LIBRARY DESTINATION lib/beignet)
diff --cc debian/patches/series
index 9971719,0000000..3b9d91f
mode 100644,000000..100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@@ -1,8 -1,0 +1,16 @@@
 +debug
 +flags
 +khronos
++deprecated-in-utest
 +private
 +0001-Generate-all-supported-as_-functions.patch
 +0002-Define-all-convert_-functions.patch
 +0003-Add-long-and-ulong-types-to-generated-functions.patch
 +0004-Add-vector-argument-test-case.patch
++0005-Fix-several-typos-in-unit-test.patch
++0006-Support-64-bit-float.patch
++0007-test-case-for-64-bit-float.patch
++0009-Enable-cl_khr_fp64-extension-for-OpenCL-stdlib-heade.patch
++0010-Define-double-vector-types.patch
++0011-Enable-generation-of-convert_-and-as_-functions-for-.patch
++0012-GBE-Fixed-one-bug-in-scalarize-pass.patch
diff --cc debian/source/include-binaries
index 0000000,0000000..3481d43
new file mode 100644
--- /dev/null
+++ b/debian/source/include-binaries
@@@ -1,0 -1,0 +1,152 @@@
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/context.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen/gen_mesa_disasm.c.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_context.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_encoder.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_insn_scheduling.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_insn_selection.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_program.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/gen_reg_allocation.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/backend/program.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/constant.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/context.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/function.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/image.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/instruction.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/liveness.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/lowering.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/profile.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/register.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/sampler.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/type.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/unit.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ir/value.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_gen_backend.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_passes.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_scalarize.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/llvm/llvm_to_gen.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ocl_common_defines_str.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/ocl_stdlib_str.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/alloc.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/assert.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/cvar.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/intrusive_list.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/mutex.cpp.o
++obj-x86_64-linux-gnu/backend/src/CMakeFiles/gbe.dir/sys/platform.cpp.o
++obj-x86_64-linux-gnu/backend/src/libgbe.so
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_alloc.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_api.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_command_queue.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_command_queue_gen7.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_context.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_device_id.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_driver.cpp.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_driver_defs.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_event.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_extensions.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_image.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_kernel.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_khr_icd.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_mem.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_platform_id.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_program.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/cl_sampler.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_batchbuffer.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_driver.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/intel/intel_gpgpu.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/x11/dricommon.c.o
++obj-x86_64-linux-gnu/src/CMakeFiles/cl.dir/x11/va_dri2.c.o
++obj-x86_64-linux-gnu/src/libcl.so
++obj-x86_64-linux-gnu/utests/CMakeFiles/flat_address_space.dir/runtime_flat_address_space.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utest_run.dir/utest_run.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/buildin_work_dim.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/builtin_global_size.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/cl_create_kernel.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_argument_structure.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_arith_shift_right.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_array3.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur_float.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_box_blur_image.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_byte_scatter.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_ceil.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_cl_finish.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_convert_uchar_sat.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_buffer.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_buffer_row.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_copy_image_3d.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_displacement_map_element.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_double.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_double_2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fabs.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image_3d.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_fill_image_3d_2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_argument2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_function_constant1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_get_image_info.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_constant.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_constant_2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_global_memory_barrier.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_group_size.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_if_else.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insert_to_constant.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_masked_min_max.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_max.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_insn_selection_min.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_integer_division.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_integer_remainder.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_barrier.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_barrier_wg64.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_local_memory_two_ptr.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_lower_return2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_mandelbrot.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_mandelbrot_alternate.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_math.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_movforphi_undef.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_multiple_kernels.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_saturate.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_saturate_sub.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_shader_toy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_shift_right.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_short_scatter.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_sub_bytes.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_sub_shorts.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_switch.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint16_copy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint2_copy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint3_copy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint3_unaligned_copy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_uint8_copy.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch0.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch1.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch2.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_unstructured_branch3.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_vector_load_store.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_volatile.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only_bytes.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/compiler_write_only_shorts.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/runtime_createcontext.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/runtime_null_kernel_arg.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_assert.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_error.c.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_file_map.cpp.o
++obj-x86_64-linux-gnu/utests/CMakeFiles/utests.dir/utest_helper.cpp.o
++obj-x86_64-linux-gnu/utests/flat_address_space
++obj-x86_64-linux-gnu/utests/libutests.so
++obj-x86_64-linux-gnu/utests/utest_run

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-devel mailing list