[Pkg-opencl-devel] [beignet] 22/66: Imported Debian patch 0.1+git20130514+19e9c58-1

Andreas Beckmann anbe at moszumanska.debian.org
Fri Oct 31 07:27:03 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit 8bf3bb02dcbfe3645983aaaae203d4a5a4cd6b43
Merge: 18b6859 f4bef4e
Author: Simon Richter <sjr at debian.org>
Date:   Tue May 14 20:04:29 2013 +0200

    Imported Debian patch 0.1+git20130514+19e9c58-1

 CMakeLists.txt                                     |    4 +-
 backend/src/backend/gen_context.cpp                |   14 +-
 backend/src/backend/gen_insn_selection.cpp         |   37 +-
 backend/src/ir/instruction.cpp                     |   12 +-
 backend/src/llvm/llvm_gen_backend.cpp              |   70 +-
 backend/src/llvm/llvm_gen_ocl_function.hxx         |   14 +
 backend/src/llvm/llvm_passes.cpp                   |    4 +-
 backend/src/ocl_stdlib.h                           |   46 +-
 debian/changelog                                   |    7 +
 debian/control                                     |    6 +-
 ...0001-Generate-all-supported-as_-functions.patch | 1283 +++++++++++++++
 .../0002-Define-all-convert_-functions.patch       |  946 +++++++++++
 ...3-Add-long-and-ulong-types-to-conversions.patch | 1719 ++++++++++++++++++++
 ...-Make-libgbm-optional-without-EGL-support.patch |   45 +
 .../0005-Define-clamp-value-lower-upper.patch      |  131 ++
 ...Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch |   58 +
 .../0007-Correct-type-of-device-properties.patch   |   34 +
 debian/patches/0008-Update-gitignore-files.patch   |   65 +
 ...the-sampler-implementation-to-comply-with.patch |  418 +++++
 ...0-CL-Support-kernel-side-defined-samplers.patch |  378 +++++
 ...ts-Add-one-test-cases-for-sampler-support.patch |  150 ++
 .../0012-GBE-remove-sampler-address-space.patch    |   74 +
 ...lar-register-support-in-loadImmInstructio.patch |   41 +
 ...rate-all-samplers-allocation-at-compile-t.patch |  407 +++++
 ...me-Optimize-Sample-TypedWrite-instruction.patch | 1027 ++++++++++++
 debian/patches/clang-3.0                           |   20 -
 debian/patches/const64                             |    8 +-
 debian/patches/implement-gefa                      |   21 -
 debian/patches/khronos                             |   36 +-
 debian/patches/missing-header                      |    6 +-
 debian/patches/respect-flags                       |   23 +-
 debian/patches/series                              |   17 +-
 debian/patches/soname                              |   12 +-
 debian/patches/verbose                             |    6 +-
 kernels/test_copy_image_3d.cl                      |   11 +
 kernels/test_fill_image_3d.cl                      |   14 +
 kernels/test_fill_image_3d_2.cl                    |   10 +
 src/cl_api.c                                       |   56 +-
 src/cl_mem.c                                       |   32 +-
 utests/CMakeLists.txt                              |    3 +
 utests/compiler_copy_image_3d.cpp                  |   55 +
 utests/compiler_fill_image_3d.cpp                  |   44 +
 utests/compiler_fill_image_3d_2.cpp                |   42 +
 43 files changed, 7257 insertions(+), 149 deletions(-)

diff --cc debian/changelog
index d845e58,0000000..0c025f5
mode 100644,000000..100644
--- a/debian/changelog
+++ b/debian/changelog
@@@ -1,80 -1,0 +1,87 @@@
++beignet (0.1+git20130514+19e9c58-1) experimental; urgency=low
++
++  * New upstream release
++  * Added a number of tentative patches
++
++ -- Simon Richter <sjr at debian.org>  Tue, 14 May 2013 20:04:29 +0200
++
 +beignet (0.1+git20130502+63e60ed-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 06 May 2013 06:30:32 +0200
 +
 +beignet (0.1+git20130426+0c8f6fe-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 26 Apr 2013 14:42:21 +0200
 +
 +beignet (0.1+git20130422+003fac5-2) experimental; urgency=low
 +
 +  * Add patch for select()
 +  * Add patch for fmin() / fmax()
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 22 Apr 2013 18:26:01 +0200
 +
 +beignet (0.1+git20130422+003fac5-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 22 Apr 2013 15:10:54 +0200
 +
 +beignet (0.1+git20130419+9c11c18-1) experimental; urgency=low
 +
 +  * Add more functionality patches
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 19 Apr 2013 14:14:39 +0200
 +
 +beignet (0.1+git20130418+0546d2e-2) experimental; urgency=low
 +
 +  * Add functionality patches
 +  * Use clang 3.0 command line syntax
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 19 Apr 2013 09:53:23 +0200
 +
 +beignet (0.1+git20130418+0546d2e-1) experimental; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Thu, 18 Apr 2013 11:51:37 +0200
 +
 +beignet (0.1-1) unstable; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 16 Apr 2013 17:16:18 +0200
 +
 +beignet (0.0.0+git2013.04.11+e6b503e-1) unstable; urgency=low
 +
 +  * New upstream release
 +
 + -- Simon Richter <sjr at debian.org>  Mon, 15 Apr 2013 18:22:45 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-4) unstable; urgency=low
 +
 +  * Build fix for kfreebsd-*
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 12 Apr 2013 11:22:36 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-3) unstable; urgency=low
 +
 +  * Adjust Build-Depends, Architecture list
 +
 + -- Simon Richter <sjr at debian.org>  Fri, 12 Apr 2013 10:32:36 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-2) unstable; urgency=low
 +
 +  * Add patch to support size queries in device info
 +
 + -- Simon Richter <sjr at debian.org>  Thu, 11 Apr 2013 14:00:59 +0200
 +
 +beignet (0.0.0+git2013.04.01+d1b234c-1) unstable; urgency=low
 +
 +  * Initial release.
 +
 + -- Simon Richter <sjr at debian.org>  Tue, 09 Apr 2013 17:14:00 +0200
diff --cc debian/control
index d75231b,0000000..c3c69b7
mode 100644,000000..100644
--- a/debian/control
+++ b/debian/control
@@@ -1,32 -1,0 +1,32 @@@
 +Source: beignet
 +Priority: extra
 +Maintainer: Simon Richter <sjr at debian.org>
- Build-Depends: debhelper (>= 9), cmake, pkg-config, ocl-icd-dev, ocl-icd-opencl-dev, libdrm-dev, libgbm-dev, libxfixes-dev, libxext-dev, llvm-3.2-dev | llvm-dev (>= 1:3.2)
- Build-Conflicts: libegl1-mesa-dev (<< 9)
++Build-Depends: debhelper (>= 9), cmake, pkg-config, ocl-icd-dev, ocl-icd-opencl-dev, libdrm-dev, libxfixes-dev, libxext-dev, llvm-3.2-dev | llvm-dev (>= 1:3.2)
++Build-Conflicts: libegl1-mesa-dev (<< 9), libgbm-dev
 +Standards-Version: 3.9.4
 +Section: libs
 +Homepage: http://cgit.freedesktop.org/beignet/
 +
 +Package: beignet-dev
 +Section: libdevel
 +Architecture: i386 amd64 kfreebsd-i386 kfreebsd-amd64
 +Depends: beignet0.0.1 (= ${binary:Version}), ${misc:Depends}
 +Description: Intel OpenCL library
 + OpenCL (Open Computing Language) is a multivendor open standard for
 + general-purpose parallel programming of heterogeneous systems that include
 + CPUs, GPUs and other processors.
 + .
 + This package contains the development files for directly linking against
 + the Intel implementation.
 +
 +Package: beignet0.0.1
 +Section: libs
 +Architecture: i386 amd64 kfreebsd-i386 kfreebsd-amd64
- Depends: ${shlibs:Depends}, ${misc:Depends}, clang (<< 1:3.1)
++Depends: ${shlibs:Depends}, ${misc:Depends}, clang (>= 1:3.1)
 +Provides: opencl-icd
 +Description: Intel OpenCL library
 + OpenCL (Open Computing Language) is a multivendor open standard for
 + general-purpose parallel programming of heterogeneous systems that include
 + CPUs, GPUs and other processors.
 + .
 + This package contains the shared library for the Intel implementation.
diff --cc debian/patches/0001-Generate-all-supported-as_-functions.patch
index 0000000,0000000..86ad1e7
new file mode 100644
--- /dev/null
+++ b/debian/patches/0001-Generate-all-supported-as_-functions.patch
@@@ -1,0 -1,0 +1,1283 @@@
++From 5cee017bb0148bd253ba1b4b6f986f4e0571e3ac Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Tue, 7 May 2013 15:41:45 +0200
++Subject: [PATCH 01/15] Generate all supported as_* functions
++To: beignet at lists.freedesktop.org
++
++This adds support for all type conversions currently possible.
++
++The conversion functions can be updated by invoking the
++update_conversions.sh script.
++---
++ backend/src/gen_conversions.sh    |   89 +++
++ backend/src/ocl_stdlib.h          | 1138 ++++++++++++++++++++++++++++++++++++-
++ backend/src/update_conversions.sh |   11 +
++ 3 files changed, 1231 insertions(+), 7 deletions(-)
++ create mode 100755 backend/src/gen_conversions.sh
++ create mode 100755 backend/src/update_conversions.sh
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:14.714024732 +0200
++@@ -0,0 +1,89 @@
+++#! /bin/sh -e
+++
+++# Supported base types and their lengths
+++TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
+++
+++# Supported vector lengths
+++VECTOR_LENGTHS="1 2 3 4 8 16"
+++
+++## No user serviceable parts below here
+++
+++# Generate list of union sizes
+++for type in $TYPES; do
+++        size=`IFS=:; set -- dummy $type; echo $3`
+++        for vector_length in $VECTOR_LENGTHS; do
+++                union_sizes="$union_sizes `expr $vector_length \* $size`"
+++        done
+++done
+++union_sizes="`echo $union_sizes | tr ' ' '\n' | sort -n | uniq`"
+++
+++# For each union size
+++for union_size in $union_sizes; do
+++
+++        # Define an union that contains all vector types that have the same size as the union
+++        unionname="union _type_cast_${union_size}_b"
+++        echo "$unionname {"
+++        for type in $TYPES; do
+++                basetype=`IFS=:; set -- dummy $type; echo $2`
+++                basesize=`IFS=:; set -- dummy $type; echo $3`
+++                for vector_length in $VECTOR_LENGTHS; do
+++                        vector_size_in_union="`expr $vector_length \* $basesize`"
+++                        if test $union_size -ne $vector_size_in_union; then
+++                                continue
+++                        fi
+++                        if test $vector_length -eq 1; then
+++                                vectortype=$basetype
+++                        else
+++                                vectortype=$basetype$vector_length
+++                        fi
+++                        echo "  $vectortype _$vectortype;"
+++                done
+++                
+++        done
+++        echo "};"
+++        echo
+++
+++        # For each tuple of vector types that has the same size as the current union size,
+++        # define an as_* function that converts types without changing binary representation.
+++        for ftype in $TYPES; do
+++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+++                fbasesize=`IFS=:; set -- dummy $ftype; echo $3`
+++                for fvector_length in $VECTOR_LENGTHS; do
+++                        fvector_size_in_union="`expr $fvector_length \* $fbasesize`"
+++                        if test $union_size -ne $fvector_size_in_union; then
+++                                continue
+++                        fi
+++                        if test $fvector_length -eq 1; then
+++                                fvectortype=$fbasetype
+++                        else
+++                                fvectortype=$fbasetype$fvector_length
+++                        fi
+++                        for ttype in $TYPES; do
+++                                tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+++                                tbasesize=`IFS=:; set -- dummy $ttype; echo $3`
+++                                if test $fbasetype = $tbasetype; then
+++                                        continue
+++                                fi
+++                                for tvector_length in $VECTOR_LENGTHS; do
+++                                        tvector_size_in_union="`expr $tvector_length \* $tbasesize`"
+++                                        if test $union_size -ne $tvector_size_in_union; then
+++                                                continue
+++                                        fi
+++                                        if test $tvector_length -eq 1; then
+++                                                tvectortype=$tbasetype
+++                                        else
+++                                                tvectortype=$tbasetype$tvector_length
+++                                        fi
+++                                        echo "INLINE OVERLOADABLE $tvectortype as_$tvectortype($fvectortype v) {"
+++                                        echo "  $unionname u;"
+++                                        echo "  u._$fvectortype = v;"
+++                                        echo "  return u._$tvectortype;"
+++                                        echo "}"
+++                                        echo
+++                                done
+++                        done
+++                done
+++                
+++        done
+++
+++done
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:14.718024731 +0200
++@@ -79,18 +79,1142 @@
++ typedef __texture struct _image3d_t* image3d_t;
++ typedef __sampler uint* sampler_t;
++ typedef size_t event_t;
+++
++ /////////////////////////////////////////////////////////////////////////////
++ // OpenCL conversions & type casting
++ /////////////////////////////////////////////////////////////////////////////
++-union type_cast_4_b {
++-  float f;
++-  uchar4 u4;
++-};
++-uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
++-    union type_cast_4_b u;
++-    u.f = f;
++-    return u.u4;
+++
+++// ##BEGIN_CONVERSIONS##
+++union _type_cast_1_b {
+++  char _char;
+++  uchar _uchar;
+++};
+++
+++INLINE OVERLOADABLE uchar as_uchar(char v) {
+++  union _type_cast_1_b u;
+++  u._char = v;
+++  return u._uchar;
+++}
+++
+++INLINE OVERLOADABLE char as_char(uchar v) {
+++  union _type_cast_1_b u;
+++  u._uchar = v;
+++  return u._char;
+++}
+++
+++union _type_cast_2_b {
+++  short _short;
+++  ushort _ushort;
+++  char2 _char2;
+++  uchar2 _uchar2;
+++};
+++
+++INLINE OVERLOADABLE ushort as_ushort(short v) {
+++  union _type_cast_2_b u;
+++  u._short = v;
+++  return u._ushort;
+++}
+++
+++INLINE OVERLOADABLE char2 as_char2(short v) {
+++  union _type_cast_2_b u;
+++  u._short = v;
+++  return u._char2;
+++}
+++
+++INLINE OVERLOADABLE uchar2 as_uchar2(short v) {
+++  union _type_cast_2_b u;
+++  u._short = v;
+++  return u._uchar2;
+++}
+++
+++INLINE OVERLOADABLE short as_short(ushort v) {
+++  union _type_cast_2_b u;
+++  u._ushort = v;
+++  return u._short;
+++}
+++
+++INLINE OVERLOADABLE char2 as_char2(ushort v) {
+++  union _type_cast_2_b u;
+++  u._ushort = v;
+++  return u._char2;
+++}
+++
+++INLINE OVERLOADABLE uchar2 as_uchar2(ushort v) {
+++  union _type_cast_2_b u;
+++  u._ushort = v;
+++  return u._uchar2;
+++}
+++
+++INLINE OVERLOADABLE short as_short(char2 v) {
+++  union _type_cast_2_b u;
+++  u._char2 = v;
+++  return u._short;
+++}
+++
+++INLINE OVERLOADABLE ushort as_ushort(char2 v) {
+++  union _type_cast_2_b u;
+++  u._char2 = v;
+++  return u._ushort;
+++}
+++
+++INLINE OVERLOADABLE uchar2 as_uchar2(char2 v) {
+++  union _type_cast_2_b u;
+++  u._char2 = v;
+++  return u._uchar2;
+++}
+++
+++INLINE OVERLOADABLE short as_short(uchar2 v) {
+++  union _type_cast_2_b u;
+++  u._uchar2 = v;
+++  return u._short;
+++}
+++
+++INLINE OVERLOADABLE ushort as_ushort(uchar2 v) {
+++  union _type_cast_2_b u;
+++  u._uchar2 = v;
+++  return u._ushort;
+++}
+++
+++INLINE OVERLOADABLE char2 as_char2(uchar2 v) {
+++  union _type_cast_2_b u;
+++  u._uchar2 = v;
+++  return u._char2;
+++}
+++
+++union _type_cast_3_b {
+++  char3 _char3;
+++  uchar3 _uchar3;
+++};
+++
+++INLINE OVERLOADABLE uchar3 as_uchar3(char3 v) {
+++  union _type_cast_3_b u;
+++  u._char3 = v;
+++  return u._uchar3;
+++}
+++
+++INLINE OVERLOADABLE char3 as_char3(uchar3 v) {
+++  union _type_cast_3_b u;
+++  u._uchar3 = v;
+++  return u._char3;
+++}
+++
+++union _type_cast_4_b {
+++  int _int;
+++  uint _uint;
+++  short2 _short2;
+++  ushort2 _ushort2;
+++  char4 _char4;
+++  uchar4 _uchar4;
+++  float _float;
+++};
+++
+++INLINE OVERLOADABLE uint as_uint(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._uchar4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(int v) {
+++  union _type_cast_4_b u;
+++  u._int = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._uchar4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(uint v) {
+++  union _type_cast_4_b u;
+++  u._uint = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE uint as_uint(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._uchar4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(short2 v) {
+++  union _type_cast_4_b u;
+++  u._short2 = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE uint as_uint(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._uchar4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(ushort2 v) {
+++  union _type_cast_4_b u;
+++  u._ushort2 = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE uint as_uint(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._uchar4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(char4 v) {
+++  union _type_cast_4_b u;
+++  u._char4 = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE uint as_uint(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE float as_float(uchar4 v) {
+++  union _type_cast_4_b u;
+++  u._uchar4 = v;
+++  return u._float;
+++}
+++
+++INLINE OVERLOADABLE int as_int(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._int;
+++}
+++
+++INLINE OVERLOADABLE uint as_uint(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._uint;
+++}
+++
+++INLINE OVERLOADABLE short2 as_short2(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._short2;
+++}
+++
+++INLINE OVERLOADABLE ushort2 as_ushort2(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._ushort2;
+++}
+++
+++INLINE OVERLOADABLE char4 as_char4(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._char4;
+++}
+++
+++INLINE OVERLOADABLE uchar4 as_uchar4(float v) {
+++  union _type_cast_4_b u;
+++  u._float = v;
+++  return u._uchar4;
+++}
+++
+++union _type_cast_6_b {
+++  short3 _short3;
+++  ushort3 _ushort3;
+++};
+++
+++INLINE OVERLOADABLE ushort3 as_ushort3(short3 v) {
+++  union _type_cast_6_b u;
+++  u._short3 = v;
+++  return u._ushort3;
+++}
+++
+++INLINE OVERLOADABLE short3 as_short3(ushort3 v) {
+++  union _type_cast_6_b u;
+++  u._ushort3 = v;
+++  return u._short3;
+++}
+++
+++union _type_cast_8_b {
+++  int2 _int2;
+++  uint2 _uint2;
+++  short4 _short4;
+++  ushort4 _ushort4;
+++  char8 _char8;
+++  uchar8 _uchar8;
+++  float2 _float2;
+++};
+++
+++INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._uchar8;
+++}
+++
+++union _type_cast_12_b {
+++  int3 _int3;
+++  uint3 _uint3;
+++  float3 _float3;
+++};
+++
+++INLINE OVERLOADABLE uint3 as_uint3(int3 v) {
+++  union _type_cast_12_b u;
+++  u._int3 = v;
+++  return u._uint3;
+++}
+++
+++INLINE OVERLOADABLE float3 as_float3(int3 v) {
+++  union _type_cast_12_b u;
+++  u._int3 = v;
+++  return u._float3;
+++}
+++
+++INLINE OVERLOADABLE int3 as_int3(uint3 v) {
+++  union _type_cast_12_b u;
+++  u._uint3 = v;
+++  return u._int3;
+++}
+++
+++INLINE OVERLOADABLE float3 as_float3(uint3 v) {
+++  union _type_cast_12_b u;
+++  u._uint3 = v;
+++  return u._float3;
+++}
+++
+++INLINE OVERLOADABLE int3 as_int3(float3 v) {
+++  union _type_cast_12_b u;
+++  u._float3 = v;
+++  return u._int3;
+++}
+++
+++INLINE OVERLOADABLE uint3 as_uint3(float3 v) {
+++  union _type_cast_12_b u;
+++  u._float3 = v;
+++  return u._uint3;
+++}
+++
+++union _type_cast_16_b {
+++  int4 _int4;
+++  uint4 _uint4;
+++  short8 _short8;
+++  ushort8 _ushort8;
+++  char16 _char16;
+++  uchar16 _uchar16;
+++  float4 _float4;
+++};
+++
+++INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._uchar16;
+++}
+++
+++union _type_cast_32_b {
+++  int8 _int8;
+++  uint8 _uint8;
+++  short16 _short16;
+++  ushort16 _ushort16;
+++  float8 _float8;
+++};
+++
+++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._ushort16;
+++}
+++
+++union _type_cast_64_b {
+++  int16 _int16;
+++  uint16 _uint16;
+++  float16 _float16;
+++};
+++
+++INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
+++  union _type_cast_64_b u;
+++  u._int16 = v;
+++  return u._uint16;
+++}
+++
+++INLINE OVERLOADABLE float16 as_float16(int16 v) {
+++  union _type_cast_64_b u;
+++  u._int16 = v;
+++  return u._float16;
+++}
+++
+++INLINE OVERLOADABLE int16 as_int16(uint16 v) {
+++  union _type_cast_64_b u;
+++  u._uint16 = v;
+++  return u._int16;
+++}
+++
+++INLINE OVERLOADABLE float16 as_float16(uint16 v) {
+++  union _type_cast_64_b u;
+++  u._uint16 = v;
+++  return u._float16;
+++}
+++
+++INLINE OVERLOADABLE int16 as_int16(float16 v) {
+++  union _type_cast_64_b u;
+++  u._float16 = v;
+++  return u._int16;
+++}
+++
+++INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
+++  union _type_cast_64_b u;
+++  u._float16 = v;
+++  return u._uint16;
++ }
+++
+++// ##END_CONVERSIONS##
+++
++ #define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
++     return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
++  }
++Index: beignet-0.1+git20130514+19e9c58/backend/src/update_conversions.sh
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/update_conversions.sh	2013-05-14 20:08:14.718024731 +0200
++@@ -0,0 +1,11 @@
+++#! /bin/sh -e
+++
+++STDLIB_HEADER=ocl_stdlib.h
+++
+++exec >$STDLIB_HEADER.tmp
+++sed -n -e '1,/##BEGIN_CONVERSIONS##/p' $STDLIB_HEADER
+++./gen_conversions.sh
+++sed -n -e '/##END_CONVERSIONS##/,$p' $STDLIB_HEADER
+++exec >&2
+++
+++mv $STDLIB_HEADER.tmp $STDLIB_HEADER
diff --cc debian/patches/0002-Define-all-convert_-functions.patch
index 0000000,0000000..06586ec
new file mode 100644
--- /dev/null
+++ b/debian/patches/0002-Define-all-convert_-functions.patch
@@@ -1,0 -1,0 +1,946 @@@
++From b3a5ab4df6690230feae128a26d9fa7a4cefdd2b Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Tue, 14 May 2013 16:45:46 +0200
++Subject: [PATCH 02/15] Define all convert_* functions.
++To: beignet at lists.freedesktop.org
++
++These functions convert between vectors of the same length by casting each
++member in turn.
++---
++ backend/src/gen_conversions.sh |   49 +++
++ backend/src/ocl_stdlib.h       |  863 ++++++++++++++++++++++++++++++++++++++--
++ 2 files changed, 889 insertions(+), 23 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/gen_conversions.sh	2013-05-14 20:08:14.714024732 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:21.594024425 +0200
++@@ -87,3 +87,52 @@
++         done
++ 
++ done
+++
+++# For all vector lengths and types, generate conversion functions
+++for vector_length in $VECTOR_LENGTHS; do
+++        if test $vector_length -eq 1; then
+++                continue;
+++        fi
+++        for ftype in $TYPES; do
+++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+++                for ttype in $TYPES; do
+++                        tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
+++                        if test $fbasetype = $tbasetype; then
+++                                continue
+++                        fi
+++                        fvectortype=$fbasetype$vector_length
+++                        tvectortype=$tbasetype$vector_length
+++                        construct="($tbasetype)(v.s0)"
+++                        if test $vector_length -gt 1; then
+++                                construct="$construct, ($tbasetype)(v.s1)"
+++                        fi
+++                        if test $vector_length -gt 2; then
+++                                construct="$construct, ($tbasetype)(v.s2)"
+++                        fi
+++                        if test $vector_length -gt 3; then
+++                                construct="$construct, ($tbasetype)(v.s3)"
+++                        fi
+++                        if test $vector_length -gt 4; then
+++                                construct="$construct, ($tbasetype)(v.s4)"
+++                                construct="$construct, ($tbasetype)(v.s5)"
+++                                construct="$construct, ($tbasetype)(v.s6)"
+++                                construct="$construct, ($tbasetype)(v.s7)"
+++                        fi
+++                        if test $vector_length -gt 8; then
+++                                construct="$construct, ($tbasetype)(v.s8)"
+++                                construct="$construct, ($tbasetype)(v.s9)"
+++                                construct="$construct, ($tbasetype)(v.sA)"
+++                                construct="$construct, ($tbasetype)(v.sB)"
+++                                construct="$construct, ($tbasetype)(v.sC)"
+++                                construct="$construct, ($tbasetype)(v.sD)"
+++                                construct="$construct, ($tbasetype)(v.sE)"
+++                                construct="$construct, ($tbasetype)(v.sF)"
+++                        fi
+++                        
+++                        echo "INLINE OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) {"
+++                        echo "  return ($tvectortype)($construct);"
+++                        echo "}"
+++                        echo
+++                done
+++        done
+++done
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:14.718024731 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:21.598024425 +0200
++@@ -1213,31 +1213,848 @@
++   return u._uint16;
++ }
++ 
+++INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(int2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(int2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(int2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(int2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(int2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(uint2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(uint2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(uint2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(uint2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(short2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(short2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(short2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(short2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(short2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(short2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(ushort2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(ushort2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(ushort2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(ushort2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(char2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(char2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(char2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(char2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(char2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(char2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(uchar2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(uchar2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(uchar2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(uchar2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(float2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(float2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(int3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(int3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(int3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(int3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(int3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(uint3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(uint3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(uint3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(uint3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(short3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(short3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(short3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(short3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(short3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(short3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(ushort3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(ushort3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(ushort3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(ushort3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(char3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(char3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(char3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(char3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(char3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(char3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(uchar3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(uchar3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(uchar3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(uchar3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(float3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(float3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(float3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(float3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(float3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(int4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(int4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(int4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(int4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(int4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(uint4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(uint4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(uint4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(uint4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(short4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(short4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(short4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(short4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(short4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(short4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(ushort4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(ushort4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(ushort4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(ushort4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(char4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(char4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(char4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(char4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(char4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(char4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(uchar4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(uchar4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(uchar4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(uchar4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(float4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(float4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(float4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(float4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(float4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(int8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(int8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(int8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(int8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(int8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(uint8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(uint8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(uint8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(uint8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(short8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(short8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(short8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(short8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(short8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(short8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(ushort8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(ushort8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(ushort8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(ushort8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(char8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(char8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(char8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(char8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(char8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(char8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(uchar8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(uchar8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(uchar8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(uchar8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(float8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(float8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(float8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(float8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(float8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(int16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(int16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(int16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(int16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(int16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(uint16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(uint16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(uint16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(uint16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(short16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(short16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(short16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(short16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(short16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(short16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(ushort16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(ushort16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(ushort16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(ushort16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(char16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(char16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(char16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(char16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(char16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(char16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(uchar16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(uchar16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(uchar16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(uchar16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(float16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(float16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(float16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(float16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(float16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(float16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
++ // ##END_CONVERSIONS##
++ 
++-#define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
++-    return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
++- }
++-#define DEF2(type) DEF(type, 4, char); \
++-                   DEF(type, 4, uchar); \
++-                   DEF(type, 4, short); \
++-                   DEF(type, 4, ushort); \
++-                   DEF(type, 4, int); \
++-                   DEF(type, 4, uint); \
++-                   DEF(type, 4, long); \
++-                   DEF(type, 4, ulong); \
++-                   DEF(type, 4, float);
++-DEF2(char);
++-DEF2(uchar);
++-DEF2(short);
++-DEF2(ushort);
++-DEF2(int);
++-DEF2(uint);
++-DEF2(long);
++-DEF2(ulong);
++-DEF2(float);
++-#undef DEF2
++-#undef DEF
++ /////////////////////////////////////////////////////////////////////////////
++ // OpenCL preprocessor directives & macros
++ /////////////////////////////////////////////////////////////////////////////
diff --cc debian/patches/0003-Add-long-and-ulong-types-to-conversions.patch
index 0000000,0000000..22a7945
new file mode 100644
--- /dev/null
+++ b/debian/patches/0003-Add-long-and-ulong-types-to-conversions.patch
@@@ -1,0 -1,0 +1,1719 @@@
++From a10c3357283b8e2714a41b2d31f0f9831073202a Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Tue, 14 May 2013 17:00:45 +0200
++Subject: [PATCH 03/15] Add long and ulong types to conversions.
++To: beignet at lists.freedesktop.org
++
++This enables as_* and convert_* for the long and ulong data types.
++---
++ backend/src/gen_conversions.sh |    2 +-
++ backend/src/ocl_stdlib.h       | 1248 +++++++++++++++++++++++++++++++++++++++-
++ 2 files changed, 1234 insertions(+), 16 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/gen_conversions.sh	2013-05-14 20:08:21.594024425 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:23.422024343 +0200
++@@ -1,7 +1,7 @@
++ #! /bin/sh -e
++ 
++ # Supported base types and their lengths
++-TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
+++TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
++ 
++ # Supported vector lengths
++ VECTOR_LENGTHS="1 2 3 4 8 16"
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:21.598024425 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:23.426024343 +0200
++@@ -478,6 +478,8 @@
++ }
++ 
++ union _type_cast_8_b {
+++  long _long;
+++  ulong _ulong;
++   int2 _int2;
++   uint2 _uint2;
++   short4 _short4;
++@@ -487,6 +489,114 @@
++   float2 _float2;
++ };
++ 
+++INLINE OVERLOADABLE ulong as_ulong(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._ulong;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(long v) {
+++  union _type_cast_8_b u;
+++  u._long = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE long as_long(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE int2 as_int2(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._int2;
+++}
+++
+++INLINE OVERLOADABLE uint2 as_uint2(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._uint2;
+++}
+++
+++INLINE OVERLOADABLE short4 as_short4(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._short4;
+++}
+++
+++INLINE OVERLOADABLE ushort4 as_ushort4(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._ushort4;
+++}
+++
+++INLINE OVERLOADABLE char8 as_char8(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._char8;
+++}
+++
+++INLINE OVERLOADABLE uchar8 as_uchar8(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._uchar8;
+++}
+++
+++INLINE OVERLOADABLE float2 as_float2(ulong v) {
+++  union _type_cast_8_b u;
+++  u._ulong = v;
+++  return u._float2;
+++}
+++
+++INLINE OVERLOADABLE long as_long(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(int2 v) {
+++  union _type_cast_8_b u;
+++  u._int2 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
++   union _type_cast_8_b u;
++   u._int2 = v;
++@@ -523,6 +633,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(uint2 v) {
+++  union _type_cast_8_b u;
+++  u._uint2 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(uint2 v) {
++   union _type_cast_8_b u;
++   u._uint2 = v;
++@@ -559,6 +681,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(short4 v) {
+++  union _type_cast_8_b u;
+++  u._short4 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(short4 v) {
++   union _type_cast_8_b u;
++   u._short4 = v;
++@@ -595,6 +729,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(ushort4 v) {
+++  union _type_cast_8_b u;
+++  u._ushort4 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
++   union _type_cast_8_b u;
++   u._ushort4 = v;
++@@ -631,6 +777,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(char8 v) {
+++  union _type_cast_8_b u;
+++  u._char8 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(char8 v) {
++   union _type_cast_8_b u;
++   u._char8 = v;
++@@ -667,6 +825,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(uchar8 v) {
+++  union _type_cast_8_b u;
+++  u._uchar8 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
++   union _type_cast_8_b u;
++   u._uchar8 = v;
++@@ -703,6 +873,18 @@
++   return u._float2;
++ }
++ 
+++INLINE OVERLOADABLE long as_long(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._long;
+++}
+++
+++INLINE OVERLOADABLE ulong as_ulong(float2 v) {
+++  union _type_cast_8_b u;
+++  u._float2 = v;
+++  return u._ulong;
+++}
+++
++ INLINE OVERLOADABLE int2 as_int2(float2 v) {
++   union _type_cast_8_b u;
++   u._float2 = v;
++@@ -782,6 +964,8 @@
++ }
++ 
++ union _type_cast_16_b {
+++  long2 _long2;
+++  ulong2 _ulong2;
++   int4 _int4;
++   uint4 _uint4;
++   short8 _short8;
++@@ -791,6 +975,114 @@
++   float4 _float4;
++ };
++ 
+++INLINE OVERLOADABLE ulong2 as_ulong2(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._ulong2;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(long2 v) {
+++  union _type_cast_16_b u;
+++  u._long2 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE long2 as_long2(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE int4 as_int4(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._int4;
+++}
+++
+++INLINE OVERLOADABLE uint4 as_uint4(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._uint4;
+++}
+++
+++INLINE OVERLOADABLE short8 as_short8(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._short8;
+++}
+++
+++INLINE OVERLOADABLE ushort8 as_ushort8(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._ushort8;
+++}
+++
+++INLINE OVERLOADABLE char16 as_char16(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._char16;
+++}
+++
+++INLINE OVERLOADABLE uchar16 as_uchar16(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._uchar16;
+++}
+++
+++INLINE OVERLOADABLE float4 as_float4(ulong2 v) {
+++  union _type_cast_16_b u;
+++  u._ulong2 = v;
+++  return u._float4;
+++}
+++
+++INLINE OVERLOADABLE long2 as_long2(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(int4 v) {
+++  union _type_cast_16_b u;
+++  u._int4 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
++   union _type_cast_16_b u;
++   u._int4 = v;
++@@ -827,6 +1119,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(uint4 v) {
+++  union _type_cast_16_b u;
+++  u._uint4 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(uint4 v) {
++   union _type_cast_16_b u;
++   u._uint4 = v;
++@@ -863,6 +1167,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(short8 v) {
+++  union _type_cast_16_b u;
+++  u._short8 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(short8 v) {
++   union _type_cast_16_b u;
++   u._short8 = v;
++@@ -899,6 +1215,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(ushort8 v) {
+++  union _type_cast_16_b u;
+++  u._ushort8 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
++   union _type_cast_16_b u;
++   u._ushort8 = v;
++@@ -935,6 +1263,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(char16 v) {
+++  union _type_cast_16_b u;
+++  u._char16 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(char16 v) {
++   union _type_cast_16_b u;
++   u._char16 = v;
++@@ -971,6 +1311,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(uchar16 v) {
+++  union _type_cast_16_b u;
+++  u._uchar16 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
++   union _type_cast_16_b u;
++   u._uchar16 = v;
++@@ -1007,6 +1359,18 @@
++   return u._float4;
++ }
++ 
+++INLINE OVERLOADABLE long2 as_long2(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._long2;
+++}
+++
+++INLINE OVERLOADABLE ulong2 as_ulong2(float4 v) {
+++  union _type_cast_16_b u;
+++  u._float4 = v;
+++  return u._ulong2;
+++}
+++
++ INLINE OVERLOADABLE int4 as_int4(float4 v) {
++   union _type_cast_16_b u;
++   u._float4 = v;
++@@ -1043,7 +1407,26 @@
++   return u._uchar16;
++ }
++ 
+++union _type_cast_24_b {
+++  long3 _long3;
+++  ulong3 _ulong3;
+++};
+++
+++INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) {
+++  union _type_cast_24_b u;
+++  u._long3 = v;
+++  return u._ulong3;
+++}
+++
+++INLINE OVERLOADABLE long3 as_long3(ulong3 v) {
+++  union _type_cast_24_b u;
+++  u._ulong3 = v;
+++  return u._long3;
+++}
+++
++ union _type_cast_32_b {
+++  long4 _long4;
+++  ulong4 _ulong4;
++   int8 _int8;
++   uint8 _uint8;
++   short16 _short16;
++@@ -1051,30 +1434,126 @@
++   float8 _float8;
++ };
++ 
++-INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
+++INLINE OVERLOADABLE ulong4 as_ulong4(long4 v) {
++   union _type_cast_32_b u;
++-  u._int8 = v;
+++  u._long4 = v;
+++  return u._ulong4;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(long4 v) {
+++  union _type_cast_32_b u;
+++  u._long4 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(long4 v) {
+++  union _type_cast_32_b u;
+++  u._long4 = v;
++   return u._uint8;
++ }
++ 
++-INLINE OVERLOADABLE short16 as_short16(int8 v) {
+++INLINE OVERLOADABLE short16 as_short16(long4 v) {
++   union _type_cast_32_b u;
++-  u._int8 = v;
+++  u._long4 = v;
++   return u._short16;
++ }
++ 
++-INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
+++INLINE OVERLOADABLE ushort16 as_ushort16(long4 v) {
++   union _type_cast_32_b u;
++-  u._int8 = v;
+++  u._long4 = v;
++   return u._ushort16;
++ }
++ 
++-INLINE OVERLOADABLE float8 as_float8(int8 v) {
+++INLINE OVERLOADABLE float8 as_float8(long4 v) {
+++  union _type_cast_32_b u;
+++  u._long4 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE long4 as_long4(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE int8 as_int8(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._int8;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(ulong4 v) {
+++  union _type_cast_32_b u;
+++  u._ulong4 = v;
+++  return u._float8;
+++}
+++
+++INLINE OVERLOADABLE long4 as_long4(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._ulong4;
+++}
+++
+++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._uint8;
+++}
+++
+++INLINE OVERLOADABLE short16 as_short16(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._short16;
+++}
+++
+++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
+++  union _type_cast_32_b u;
+++  u._int8 = v;
+++  return u._ushort16;
+++}
+++
+++INLINE OVERLOADABLE float8 as_float8(int8 v) {
++   union _type_cast_32_b u;
++   u._int8 = v;
++   return u._float8;
++ }
++ 
+++INLINE OVERLOADABLE long4 as_long4(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(uint8 v) {
+++  union _type_cast_32_b u;
+++  u._uint8 = v;
+++  return u._ulong4;
+++}
+++
++ INLINE OVERLOADABLE int8 as_int8(uint8 v) {
++   union _type_cast_32_b u;
++   u._uint8 = v;
++@@ -1099,6 +1578,18 @@
++   return u._float8;
++ }
++ 
+++INLINE OVERLOADABLE long4 as_long4(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(short16 v) {
+++  union _type_cast_32_b u;
+++  u._short16 = v;
+++  return u._ulong4;
+++}
+++
++ INLINE OVERLOADABLE int8 as_int8(short16 v) {
++   union _type_cast_32_b u;
++   u._short16 = v;
++@@ -1123,6 +1614,18 @@
++   return u._float8;
++ }
++ 
+++INLINE OVERLOADABLE long4 as_long4(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(ushort16 v) {
+++  union _type_cast_32_b u;
+++  u._ushort16 = v;
+++  return u._ulong4;
+++}
+++
++ INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
++   union _type_cast_32_b u;
++   u._ushort16 = v;
++@@ -1147,6 +1650,18 @@
++   return u._float8;
++ }
++ 
+++INLINE OVERLOADABLE long4 as_long4(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._long4;
+++}
+++
+++INLINE OVERLOADABLE ulong4 as_ulong4(float8 v) {
+++  union _type_cast_32_b u;
+++  u._float8 = v;
+++  return u._ulong4;
+++}
+++
++ INLINE OVERLOADABLE int8 as_int8(float8 v) {
++   union _type_cast_32_b u;
++   u._float8 = v;
++@@ -1172,11 +1687,73 @@
++ }
++ 
++ union _type_cast_64_b {
+++  long8 _long8;
+++  ulong8 _ulong8;
++   int16 _int16;
++   uint16 _uint16;
++   float16 _float16;
++ };
++ 
+++INLINE OVERLOADABLE ulong8 as_ulong8(long8 v) {
+++  union _type_cast_64_b u;
+++  u._long8 = v;
+++  return u._ulong8;
+++}
+++
+++INLINE OVERLOADABLE int16 as_int16(long8 v) {
+++  union _type_cast_64_b u;
+++  u._long8 = v;
+++  return u._int16;
+++}
+++
+++INLINE OVERLOADABLE uint16 as_uint16(long8 v) {
+++  union _type_cast_64_b u;
+++  u._long8 = v;
+++  return u._uint16;
+++}
+++
+++INLINE OVERLOADABLE float16 as_float16(long8 v) {
+++  union _type_cast_64_b u;
+++  u._long8 = v;
+++  return u._float16;
+++}
+++
+++INLINE OVERLOADABLE long8 as_long8(ulong8 v) {
+++  union _type_cast_64_b u;
+++  u._ulong8 = v;
+++  return u._long8;
+++}
+++
+++INLINE OVERLOADABLE int16 as_int16(ulong8 v) {
+++  union _type_cast_64_b u;
+++  u._ulong8 = v;
+++  return u._int16;
+++}
+++
+++INLINE OVERLOADABLE uint16 as_uint16(ulong8 v) {
+++  union _type_cast_64_b u;
+++  u._ulong8 = v;
+++  return u._uint16;
+++}
+++
+++INLINE OVERLOADABLE float16 as_float16(ulong8 v) {
+++  union _type_cast_64_b u;
+++  u._ulong8 = v;
+++  return u._float16;
+++}
+++
+++INLINE OVERLOADABLE long8 as_long8(int16 v) {
+++  union _type_cast_64_b u;
+++  u._int16 = v;
+++  return u._long8;
+++}
+++
+++INLINE OVERLOADABLE ulong8 as_ulong8(int16 v) {
+++  union _type_cast_64_b u;
+++  u._int16 = v;
+++  return u._ulong8;
+++}
+++
++ INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
++   union _type_cast_64_b u;
++   u._int16 = v;
++@@ -1189,6 +1766,18 @@
++   return u._float16;
++ }
++ 
+++INLINE OVERLOADABLE long8 as_long8(uint16 v) {
+++  union _type_cast_64_b u;
+++  u._uint16 = v;
+++  return u._long8;
+++}
+++
+++INLINE OVERLOADABLE ulong8 as_ulong8(uint16 v) {
+++  union _type_cast_64_b u;
+++  u._uint16 = v;
+++  return u._ulong8;
+++}
+++
++ INLINE OVERLOADABLE int16 as_int16(uint16 v) {
++   union _type_cast_64_b u;
++   u._uint16 = v;
++@@ -1201,6 +1790,18 @@
++   return u._float16;
++ }
++ 
+++INLINE OVERLOADABLE long8 as_long8(float16 v) {
+++  union _type_cast_64_b u;
+++  u._float16 = v;
+++  return u._long8;
+++}
+++
+++INLINE OVERLOADABLE ulong8 as_ulong8(float16 v) {
+++  union _type_cast_64_b u;
+++  u._float16 = v;
+++  return u._ulong8;
+++}
+++
++ INLINE OVERLOADABLE int16 as_int16(float16 v) {
++   union _type_cast_64_b u;
++   u._float16 = v;
++@@ -1213,6 +1814,95 @@
++   return u._uint16;
++ }
++ 
+++union _type_cast_128_b {
+++  long16 _long16;
+++  ulong16 _ulong16;
+++};
+++
+++INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) {
+++  union _type_cast_128_b u;
+++  u._long16 = v;
+++  return u._ulong16;
+++}
+++
+++INLINE OVERLOADABLE long16 as_long16(ulong16 v) {
+++  union _type_cast_128_b u;
+++  u._ulong16 = v;
+++  return u._long16;
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(long2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(long2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(long2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(long2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(long2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(long2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(long2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(long2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE long2 convert_long2(ulong2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE int2 convert_int2(ulong2 v) {
+++  return (int2)((int)(v.s0), (int)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uint2 convert_uint2(ulong2 v) {
+++  return (uint2)((uint)(v.s0), (uint)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE short2 convert_short2(ulong2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(ulong2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(ulong2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(ulong2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE float2 convert_float2(ulong2 v) {
+++  return (float2)((float)(v.s0), (float)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE long2 convert_long2(int2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(int2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
++   return (uint2)((uint)(v.s0), (uint)(v.s1));
++ }
++@@ -1237,6 +1927,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(uint2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(uint2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1261,6 +1959,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(short2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(short2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(short2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1285,6 +1991,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(ushort2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(ushort2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1309,6 +2023,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(char2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(char2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(char2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1333,6 +2055,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(uchar2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(uchar2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1357,6 +2087,14 @@
++   return (float2)((float)(v.s0), (float)(v.s1));
++ }
++ 
+++INLINE OVERLOADABLE long2 convert_long2(float2 v) {
+++  return (long2)((long)(v.s0), (long)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong2 convert_ulong2(float2 v) {
+++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
+++}
+++
++ INLINE OVERLOADABLE int2 convert_int2(float2 v) {
++   return (int2)((int)(v.s0), (int)(v.s1));
++ }
++@@ -1365,20 +2103,92 @@
++   return (uint2)((uint)(v.s0), (uint)(v.s1));
++ }
++ 
++-INLINE OVERLOADABLE short2 convert_short2(float2 v) {
++-  return (short2)((short)(v.s0), (short)(v.s1));
+++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
+++  return (short2)((short)(v.s0), (short)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
+++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
+++  return (char2)((char)(v.s0), (char)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
+++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(long3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(long3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(long3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(long3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(long3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(long3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE float3 convert_float3(long3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE long3 convert_long3(ulong3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE int3 convert_int3(ulong3 v) {
+++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uint3 convert_uint3(ulong3 v) {
+++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE short3 convert_short3(ulong3 v) {
+++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ushort3 convert_ushort3(ulong3 v) {
+++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE char3 convert_char3(ulong3 v) {
+++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE uchar3 convert_uchar3(ulong3 v) {
+++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
++-INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
++-  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
+++INLINE OVERLOADABLE float3 convert_float3(ulong3 v) {
+++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
++-INLINE OVERLOADABLE char2 convert_char2(float2 v) {
++-  return (char2)((char)(v.s0), (char)(v.s1));
+++INLINE OVERLOADABLE long3 convert_long3(int3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++ }
++ 
++-INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
++-  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
+++INLINE OVERLOADABLE ulong3 convert_ulong3(int3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++ }
++ 
++ INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
++@@ -1405,6 +2215,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(uint3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(uint3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1429,6 +2247,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(short3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(short3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(short3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1453,6 +2279,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(ushort3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(ushort3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1477,6 +2311,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(char3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(char3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(char3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1501,6 +2343,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(uchar3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(uchar3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1525,6 +2375,14 @@
++   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE long3 convert_long3(float3 v) {
+++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+++}
+++
+++INLINE OVERLOADABLE ulong3 convert_ulong3(float3 v) {
+++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+++}
+++
++ INLINE OVERLOADABLE int3 convert_int3(float3 v) {
++   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++ }
++@@ -1549,6 +2407,78 @@
++   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++ }
++ 
+++INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(long4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(long4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(long4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(long4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(long4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(long4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(long4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE long4 convert_long4(ulong4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE int4 convert_int4(ulong4 v) {
+++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uint4 convert_uint4(ulong4 v) {
+++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE short4 convert_short4(ulong4 v) {
+++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ushort4 convert_ushort4(ulong4 v) {
+++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE char4 convert_char4(ulong4 v) {
+++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE uchar4 convert_uchar4(ulong4 v) {
+++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE float4 convert_float4(ulong4 v) {
+++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE long4 convert_long4(int4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(int4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
++   return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++ }
++@@ -1573,6 +2503,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(uint4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(uint4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1597,6 +2535,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(short4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(short4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(short4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1621,6 +2567,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(ushort4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(ushort4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1645,6 +2599,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(char4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(char4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(char4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1669,6 +2631,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(uchar4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(uchar4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1693,6 +2663,14 @@
++   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE long4 convert_long4(float4 v) {
+++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
+++}
+++
+++INLINE OVERLOADABLE ulong4 convert_ulong4(float4 v) {
+++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
+++}
+++
++ INLINE OVERLOADABLE int4 convert_int4(float4 v) {
++   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++ }
++@@ -1717,6 +2695,78 @@
++   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++ }
++ 
+++INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(long8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(long8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(long8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(long8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(long8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(long8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(long8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE long8 convert_long8(ulong8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE int8 convert_int8(ulong8 v) {
+++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uint8 convert_uint8(ulong8 v) {
+++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE short8 convert_short8(ulong8 v) {
+++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ushort8 convert_ushort8(ulong8 v) {
+++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE char8 convert_char8(ulong8 v) {
+++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE uchar8 convert_uchar8(ulong8 v) {
+++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE float8 convert_float8(ulong8 v) {
+++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE long8 convert_long8(int8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(int8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
++   return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++ }
++@@ -1741,6 +2791,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(uint8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(uint8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1765,6 +2823,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(short8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(short8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(short8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1789,6 +2855,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(ushort8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(ushort8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1813,6 +2887,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(char8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(char8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(char8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1837,6 +2919,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(uchar8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(uchar8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1861,6 +2951,14 @@
++   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE long8 convert_long8(float8 v) {
+++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
+++}
+++
+++INLINE OVERLOADABLE ulong8 convert_ulong8(float8 v) {
+++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
+++}
+++
++ INLINE OVERLOADABLE int8 convert_int8(float8 v) {
++   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++ }
++@@ -1885,6 +2983,78 @@
++   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++ }
++ 
+++INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(long16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(long16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(long16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(long16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(long16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(long16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(long16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE long16 convert_long16(ulong16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE int16 convert_int16(ulong16 v) {
+++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uint16 convert_uint16(ulong16 v) {
+++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE short16 convert_short16(ulong16 v) {
+++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ushort16 convert_ushort16(ulong16 v) {
+++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE char16 convert_char16(ulong16 v) {
+++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE uchar16 convert_uchar16(ulong16 v) {
+++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE float16 convert_float16(ulong16 v) {
+++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE long16 convert_long16(int16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(int16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
++   return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++ }
++@@ -1909,6 +3079,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(uint16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(uint16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
++@@ -1933,6 +3111,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(short16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(short16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(short16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
++@@ -1957,6 +3143,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(ushort16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(ushort16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
++@@ -1981,6 +3175,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(char16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(char16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(char16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
++@@ -2005,6 +3207,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(uchar16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(uchar16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
++@@ -2029,6 +3239,14 @@
++   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++ }
++ 
+++INLINE OVERLOADABLE long16 convert_long16(float16 v) {
+++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
+++}
+++
+++INLINE OVERLOADABLE ulong16 convert_ulong16(float16 v) {
+++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
+++}
+++
++ INLINE OVERLOADABLE int16 convert_int16(float16 v) {
++   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++ }
diff --cc debian/patches/0004-Make-libgbm-optional-without-EGL-support.patch
index 0000000,0000000..274623b
new file mode 100644
--- /dev/null
+++ b/debian/patches/0004-Make-libgbm-optional-without-EGL-support.patch
@@@ -1,0 -1,0 +1,45 @@@
++From 499ec4e50734039504af732964985a3e094434ee Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Mon, 13 May 2013 23:02:16 +0200
++Subject: [PATCH 04/15] Make libgbm optional without EGL support
++To: beignet at lists.freedesktop.org
++
++If EGL or GBM cannot be found, the EGL support is disabled, and then
++neither library is required.
++---
++ src/CMakeLists.txt |   10 ++++++----
++ 1 file changed, 6 insertions(+), 4 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/CMakeLists.txt	2013-05-14 20:05:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt	2013-05-14 20:08:57.006022846 +0200
++@@ -28,14 +28,16 @@
++     x11/dricommon.c 
++     x11/va_dri2.c)
++ 
++-if (EGL_FOUND)
+++if (EGL_FOUND AND GBM_FOUND)
++ set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c x11/gbm_dri2_x11_platform.c)
++ SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}")
++ SET(CMAKE_C_FLAGS "-DHAS_EGL ${CMAKE_C_FLAGS}")
++ SET(OPTIONAL_EGL_LIBRARY "${EGL_LIBRARY}")
++-else(EGL_FOUND)
+++SET(OPTIONAL_GBM_LIBRARY "${GBM_LIBRARY}")
+++else(EGL_FOUND AND GBM_FOUND)
++ SET(OPTIONAL_EGL_LIBRARY "")
++-endif (EGL_FOUND)
+++SET(OPTIONAL_GBM_LIBRARY "")
+++endif (EGL_FOUND AND GBM_FOUND)
++ 
++ if (OCLIcd_FOUND)
++ set (OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c)
++@@ -57,7 +59,7 @@
++                       ${DRM_LIBRARY}
++                       ${OPENGL_LIBRARIES}
++                       ${OPTIONAL_EGL_LIBRARY}
++-                      ${GBM_LIBRARY})
+++                      ${OPTIONAL_GBM_LIBRARY})
++ set_target_properties(cl
++                         PROPERTIES
++                         VERSION 0.1
diff --cc debian/patches/0005-Define-clamp-value-lower-upper.patch
index 0000000,0000000..976fdbf
new file mode 100644
--- /dev/null
+++ b/debian/patches/0005-Define-clamp-value-lower-upper.patch
@@@ -1,0 -1,0 +1,131 @@@
++From 780dcc213bd8c37c297c5d7a089ad355cb31649d Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Mon, 13 May 2013 09:09:11 +0200
++Subject: [PATCH 05/15] Define clamp(value, lower, upper)
++To: beignet at lists.freedesktop.org
++
++The clamp(value, lower, upper) function is part of the standard library.
++
++ - Define the function, using min() and max() on the lower level
++ - Remove private definitions from kernels
++---
++ backend/src/ocl_stdlib.h                    |   23 +++++++++++++----------
++ kernels/compiler_julia.cl                   |    2 --
++ kernels/compiler_julia_no_break.cl          |    2 --
++ kernels/compiler_menger_sponge.cl           |    2 --
++ kernels/compiler_menger_sponge_no_shadow.cl |    2 --
++ kernels/compiler_nautilus.cl                |    4 +---
++ 6 files changed, 14 insertions(+), 21 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:23.426024343 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:02.546022599 +0200
++@@ -3643,21 +3643,24 @@
++ /////////////////////////////////////////////////////////////////////////////
++ // Common Functions (see 6.11.4 of OCL 1.1 spec)
++ /////////////////////////////////////////////////////////////////////////////
++-#define DECL_MIN_MAX(TYPE) \
+++#define DECL_MIN_MAX_CLAMP(TYPE) \
++ INLINE OVERLOADABLE TYPE max(TYPE a, TYPE b) { \
++   return a > b ? a : b; \
++ } \
++ INLINE OVERLOADABLE TYPE min(TYPE a, TYPE b) { \
++   return a < b ? a : b; \
+++} \
+++INLINE OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u) { \
+++  return max(min(v, u), l); \
++ }
++-DECL_MIN_MAX(float)
++-DECL_MIN_MAX(int)
++-DECL_MIN_MAX(short)
++-DECL_MIN_MAX(char)
++-DECL_MIN_MAX(uint)
++-DECL_MIN_MAX(unsigned short)
++-DECL_MIN_MAX(unsigned char)
++-#undef DECL_MIN_MAX
+++DECL_MIN_MAX_CLAMP(float)
+++DECL_MIN_MAX_CLAMP(int)
+++DECL_MIN_MAX_CLAMP(short)
+++DECL_MIN_MAX_CLAMP(char)
+++DECL_MIN_MAX_CLAMP(uint)
+++DECL_MIN_MAX_CLAMP(unsigned short)
+++DECL_MIN_MAX_CLAMP(unsigned char)
+++#undef DECL_MIN_MAX_CLAMP
++ 
++ INLINE OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
++ INLINE OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
++Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_julia.cl
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_julia.cl	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_julia.cl	2013-05-14 20:09:02.546022599 +0200
++@@ -14,8 +14,6 @@
++   return I - 2.0f * dot(N, I) * N;
++ }
++ 
++-inline float clamp(x,m,M) { return max(min(x,M),m); }
++-
++ inline uint pack_fp4(float4 u4) {
++   uint u;
++   u = (((uint) u4.x)) |
++Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_julia_no_break.cl
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_julia_no_break.cl	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_julia_no_break.cl	2013-05-14 20:09:02.546022599 +0200
++@@ -14,8 +14,6 @@
++   return I - 2.0f * dot(N, I) * N;
++ }
++ 
++-inline float clamp(x,m,M) { return max(min(x,M),m); }
++-
++ inline uint pack_fp4(float4 u4) {
++   uint u;
++   u = (((uint) u4.x)) |
++Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge.cl
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_menger_sponge.cl	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge.cl	2013-05-14 20:09:02.546022599 +0200
++@@ -25,8 +25,6 @@
++   return I - 2.0f * dot(N, I) * N;
++ }
++ 
++-inline float clamp(x,m,M) { return max(min(x,M),m); }
++-
++ inline uint pack_fp4(float4 u4) {
++   uint u;
++   u = (((uint) u4.x)) |
++Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge_no_shadow.cl
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_menger_sponge_no_shadow.cl	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge_no_shadow.cl	2013-05-14 20:09:02.546022599 +0200
++@@ -25,8 +25,6 @@
++   return I - 2.0f * dot(N, I) * N;
++ }
++ 
++-inline float clamp(x,m,M) { return max(min(x,M),m); }
++-
++ inline uint pack_fp4(float4 u4) {
++   uint u;
++   u = (((uint) u4.x)) |
++Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_nautilus.cl
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_nautilus.cl	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_nautilus.cl	2013-05-14 20:09:02.546022599 +0200
++@@ -14,8 +14,6 @@
++   return I - 2.0f * dot(N, I) * N;
++ }
++ 
++-inline float clamp(x,m,M) { return max(min(x,M),m); }
++-
++ inline uint pack_fp4(float4 u4) {
++   uint u;
++   u = (((uint) u4.x)) |
++@@ -59,7 +57,7 @@
++   for(int q=0;q<100;q++)
++   {
++      float l = e(o+0.5f*(vec3)(cos(1.1f*(float)(q)),cos(1.6f*(float)(q)),cos(1.4f*(float)(q))))-m;
++-     a+=clamp(4.0f*l,0.0f,1.0f);
+++     a+=floor(clamp(4.0f*l,0.0f,1.0f));
++   }
++   v*=a/100.0f;
++   vec4 gl_FragColor=(vec4)(v,1.0f);
diff --cc debian/patches/0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
index 0000000,0000000..4984913
new file mode 100644
--- /dev/null
+++ b/debian/patches/0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
@@@ -1,0 -1,0 +1,58 @@@
++From d7c7354c4857ed9934086e3bdaf2b07e55f46bd1 Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Mon, 13 May 2013 12:43:42 +0200
++Subject: [PATCH 06/15] Add clGetDeviceInfo(..., CL_BUILT_IN_KERNELS, ...)
++To: beignet at lists.freedesktop.org
++
++Currently, there are no built-in kernels, so this function returns an empty
++string.
++---
++ src/cl_device_id.c |    1 +
++ src/cl_device_id.h |    2 ++
++ src/cl_gt_device.h |    1 +
++ 3 files changed, 4 insertions(+)
++
++Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.c	2013-05-14 20:09:06.922022404 +0200
++@@ -230,6 +230,7 @@
++     DECL_STRING_FIELD(PROFILE, profile)
++     DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version)
++     DECL_STRING_FIELD(EXTENSIONS, extensions);
+++    DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels)
++ 
++     case CL_DRIVER_VERSION:
++       if (param_value_size_ret) {
++Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.h	2013-05-14 20:09:06.922022404 +0200
++@@ -84,6 +84,7 @@
++   const char *opencl_c_version;
++   const char *extensions;
++   const char *driver_version;
+++  const char *built_in_kernels;
++   size_t name_sz;
++   size_t vendor_sz;
++   size_t version_sz;
++@@ -91,6 +92,7 @@
++   size_t opencl_c_version_sz;
++   size_t extensions_sz;
++   size_t driver_version_sz;
+++  size_t built_in_kernels_sz;
++   /* Kernel specific info that we're assigning statically */
++   size_t wg_sz;
++   size_t compile_wg_sz[3];
++Index: beignet-0.1+git20130514+19e9c58/src/cl_gt_device.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_gt_device.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_gt_device.h	2013-05-14 20:09:06.922022404 +0200
++@@ -72,6 +72,7 @@
++ DECL_INFO_STRING(profile, "FULL_PROFILE")
++ DECL_INFO_STRING(opencl_c_version, "OpenCL 1.10")
++ DECL_INFO_STRING(extensions, "")
+++DECL_INFO_STRING(built_in_kernels, "")
++ DECL_INFO_STRING(driver_version, LIBCL_VERSION_STRING)
++ #undef DECL_INFO_STRING
++ 
diff --cc debian/patches/0007-Correct-type-of-device-properties.patch
index 0000000,0000000..577bd5b
new file mode 100644
--- /dev/null
+++ b/debian/patches/0007-Correct-type-of-device-properties.patch
@@@ -1,0 -1,0 +1,34 @@@
++From 5f6ccf7410d17c9e775ce30e8deb5036d6b79ab4 Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Mon, 13 May 2013 12:43:57 +0200
++Subject: [PATCH 07/15] Correct type of device properties
++To: beignet at lists.freedesktop.org
++
++ - CL_DEVICE_MAX_PARAMETER_SIZE is of type size_t
++ - CL_DEVICE_MAX_WORK_GROUP_SIZE is of type size_t
++---
++ src/cl_device_id.h |    4 ++--
++ 1 file changed, 2 insertions(+), 2 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.h	2013-05-14 20:09:06.922022404 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.h	2013-05-14 20:09:10.686022236 +0200
++@@ -29,7 +29,7 @@
++   cl_uint  max_thread_per_unit;
++   cl_uint  max_work_item_dimensions;
++   size_t   max_work_item_sizes[3];
++-  cl_uint  max_work_group_size;
+++  size_t   max_work_group_size;
++   cl_uint  preferred_vector_width_char;
++   cl_uint  preferred_vector_width_short;
++   cl_uint  preferred_vector_width_int;
++@@ -56,7 +56,7 @@
++   size_t   image3d_max_height;
++   size_t   image3d_max_depth;
++   cl_uint  max_samplers;
++-  cl_uint  max_parameter_size;
+++  size_t   max_parameter_size;
++   cl_uint  mem_base_addr_align;
++   cl_uint  min_data_type_align_size;
++   cl_device_fp_config single_fp_config;
diff --cc debian/patches/0008-Update-gitignore-files.patch
index 0000000,0000000..1cdfdec
new file mode 100644
--- /dev/null
+++ b/debian/patches/0008-Update-gitignore-files.patch
@@@ -1,0 -1,0 +1,65 @@@
++From 9555f0c6417c0af7f4a72084f28c40d63b1a93dc Mon Sep 17 00:00:00 2001
++From: Simon Richter <Simon.Richter at hogyros.de>
++Date: Mon, 13 May 2013 20:01:08 +0200
++Subject: [PATCH 08/15] Update gitignore files
++To: beignet at lists.freedesktop.org
++
++ - Ignore CMake built files
++ - Ignore .so files only in the subdirectory
++ - Ignore generated .bmp files
++ - Ignore generated config headers
++ - Ignore generated source for OCL
++---
++ .gitignore             |    5 ++++-
++ backend/src/.gitignore |    3 +++
++ src/.gitignore         |    2 ++
++ utests/.gitignore      |   13 +++++++++++++
++ 4 files changed, 22 insertions(+), 1 deletion(-)
++ create mode 100644 src/.gitignore
++ create mode 100644 utests/.gitignore
++
++Index: beignet-0.1+git20130514+19e9c58/.gitignore
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/.gitignore	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/.gitignore	2013-05-14 20:09:13.630022105 +0200
++@@ -1,2 +1,5 @@
++ *.o
++-*.so*
+++CMakeCache.txt
+++CMakeFiles/
+++Makefile
+++cmake_install.cmake
++Index: beignet-0.1+git20130514+19e9c58/backend/src/.gitignore
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/.gitignore	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/.gitignore	2013-05-14 20:09:13.630022105 +0200
++@@ -1 +1,4 @@
+++GBEConfig.h
+++libgbe.so
+++ocl_common_defines_str.cpp
++ ocl_stdlib_str.cpp
++Index: beignet-0.1+git20130514+19e9c58/src/.gitignore
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/src/.gitignore	2013-05-14 20:09:13.630022105 +0200
++@@ -0,0 +1,2 @@
+++OCLConfig.h
+++libcl.so
++Index: beignet-0.1+git20130514+19e9c58/utests/.gitignore
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/utests/.gitignore	2013-05-14 20:09:13.630022105 +0200
++@@ -0,0 +1,13 @@
+++compiler_box_blur.bmp
+++compiler_box_blur_float.bmp
+++compiler_clod.bmp
+++compiler_julia.bmp
+++compiler_julia_no_break.bmp
+++compiler_mandelbrot.bmp
+++compiler_mandelbrot_alternate.bmp
+++compiler_menger_sponge_no_shadow.bmp
+++compiler_nautilus.bmp
+++compiler_ribbon.bmp
+++flat_address_space
+++libutests.so
+++utest_run
diff --cc debian/patches/0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
index 0000000,0000000..361bc64
new file mode 100644
--- /dev/null
+++ b/debian/patches/0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
@@@ -1,0 -1,0 +1,418 @@@
++From 951d7db26d4ae1919d52219677fcf36242b8330a Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:18 +0800
++Subject: [PATCH 09/15] GBE: refine the sampler implementation to comply with
++ spec.
++To: beignet at lists.freedesktop.org
++
++The previous implementation is to use a new address space pointer to
++represent a sampler. The reason is that there is no specified data
++type for sampler_t in LLVM front end thus we can't determine the
++sampler argument type if we use a normal interger to represnet the
++sampler. But that breaks the OCL spec, the spec allows the kernel
++to define and initialize sampler variables in kernel side.
++
++Now I use a little tricky way to fix this problem. First, I decide
++to use normal unsigned interger to represent sampler_t in kernel side.
++Then at compile time, I check read_imagexxx function's sampler
++arguments. If the argument is a constant value, then it should be a
++kernel side defined sampler, then I insert the sampler type into a
++global sampler set for the current kernel function. If the argument
++is not a constant value, then I will check whether it's a kernel
++argument, if it is, then I fix up the corresponding kernel arg type
++to SAMPLER there.
++
++To unify the kernel side defined sampler and kernel argument sampler,
++I add two new gbe API. To export all the kernel side defined sampler
++data and size to the runtime library. Then latter, the runtime library
++can use this information to append new sampler to the unified sampler
++buffer and bind all the sampler at one time.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/CMakeLists.txt            |    2 +
++ backend/src/backend/program.cpp       |   20 +++++++++-
++ backend/src/backend/program.h         |    8 ++++
++ backend/src/backend/program.hpp       |   11 ++++++
++ backend/src/ir/function.cpp           |    1 +
++ backend/src/ir/function.hpp           |   11 ++++++
++ backend/src/ir/sampler.cpp            |   46 ++++++++++++++++++++++
++ backend/src/ir/sampler.hpp            |   67 +++++++++++++++++++++++++++++++++
++ backend/src/llvm/llvm_gen_backend.cpp |   22 ++++++++++-
++ backend/src/ocl_stdlib.h              |    6 +--
++ 10 files changed, 189 insertions(+), 5 deletions(-)
++ create mode 100644 backend/src/ir/sampler.cpp
++ create mode 100644 backend/src/ir/sampler.hpp
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:09:16.362021983 +0200
++@@ -61,6 +61,8 @@
++     ir/unit.hpp
++     ir/constant.cpp
++     ir/constant.hpp
+++    ir/sampler.cpp
+++    ir/sampler.hpp
++     ir/instruction.cpp
++     ir/instruction.hpp
++     ir/liveness.cpp
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.cpp	2013-05-14 20:07:18.782027225 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp	2013-05-14 20:09:16.366021983 +0200
++@@ -49,10 +49,11 @@
++ namespace gbe {
++ 
++   Kernel::Kernel(const std::string &name) :
++-    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL)
+++    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL)
++   {}
++   Kernel::~Kernel(void) {
++     if(ctx) GBE_DELETE(ctx);
+++    if(samplerSet) GBE_DELETE(samplerSet);
++     GBE_SAFE_DELETE_ARRAY(args);
++   }
++   int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
++@@ -90,6 +91,7 @@
++     for (const auto &pair : set) {
++       const std::string &name = pair.first;
++       Kernel *kernel = this->compileKernel(unit, name);
+++      kernel->setSamplerSet(pair.second->getSamplerSet());
++       kernels.insert(std::make_pair(name, kernel));
++     }
++     return true;
++@@ -250,6 +252,18 @@
++     return kernel->setConstBufSize(argID, sz);
++   }
++ 
+++  static size_t kernelGetSamplerSize(gbe_kernel gbeKernel) {
+++    if (gbeKernel == NULL) return 0;
+++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
+++    return kernel->getSamplerSize();
+++  }
+++
+++  static void kernelGetSamplerData(gbe_kernel gbeKernel, uint32_t *samplers) {
+++    if (gbeKernel == NULL) return;
+++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
+++    kernel->getSamplerData(samplers);
+++  }
+++
++   static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
++     return 0u;
++   }
++@@ -277,6 +291,8 @@
++ GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
++ GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
++ GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
+++GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL;
+++GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL;
++ 
++ namespace gbe
++ {
++@@ -304,6 +320,8 @@
++       gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
++       gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
++       gbe_kernel_use_slm = gbe::kernelUseSLM;
+++      gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize;
+++      gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
++       genSetupCallBacks();
++     }
++   };
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h	2013-05-14 20:09:16.366021983 +0200
++@@ -114,6 +114,14 @@
++ typedef void (gbe_program_get_global_constant_data_cb)(gbe_program gbeProgram, char *mem);
++ extern gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data;
++ 
+++/*! Get the size of defined samplers */
+++typedef size_t (gbe_kernel_get_sampler_size_cb)(gbe_kernel gbeKernel);
+++extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size;
+++
+++/*! Get the content of defined samplers */
+++typedef void (gbe_kernel_get_sampler_data_cb)(gbe_kernel gbeKernel, uint32_t *samplers);
+++extern gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data;
+++
++ /*! Destroy and deallocate the given program */
++ typedef void (gbe_program_delete_cb)(gbe_program);
++ extern gbe_program_delete_cb *gbe_program_delete;
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.hpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp	2013-05-14 20:09:16.366021983 +0200
++@@ -29,6 +29,8 @@
++ #include "backend/context.hpp"
++ #include "ir/constant.hpp"
++ #include "ir/unit.hpp"
+++#include "ir/function.hpp"
+++#include "ir/sampler.hpp"
++ #include "sys/hash_map.hpp"
++ #include "sys/vector.hpp"
++ #include <string>
++@@ -108,6 +110,14 @@
++       }
++       return -1;
++     }
+++    /*! Set sampler set. */
+++    void setSamplerSet(ir::SamplerSet *from) {
+++      samplerSet = from;
+++    }
+++    /*! Get defined sampler size */
+++    size_t getSamplerSize(void) const { return samplerSet->getDataSize(); }
+++    /*! Get defined sampler value array */
+++    void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); }
++   protected:
++     friend class Context;      //!< Owns the kernels
++     const std::string name;    //!< Kernel name
++@@ -119,6 +129,7 @@
++     uint32_t stackSize;        //!< Stack size (may be 0 if unused)
++     bool useSLM;               //!< SLM requires a special HW config
++     Context *ctx;              //!< Save context after compiler to alloc constant buffer curbe
+++    ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
++     GBE_CLASS(Kernel);         //!< Use custom allocators
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp	2013-05-14 20:09:16.366021983 +0200
++@@ -46,6 +46,7 @@
++     name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false)
++   {
++     initProfile(*this);
+++    samplerSet = GBE_NEW(SamplerSet);
++   }
++ 
++   Function::~Function(void) {
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:16.366021983 +0200
++@@ -28,6 +28,7 @@
++ #include "ir/register.hpp"
++ #include "ir/instruction.hpp"
++ #include "ir/profile.hpp"
+++#include "ir/sampler.hpp"
++ #include "sys/vector.hpp"
++ #include "sys/set.hpp"
++ #include "sys/map.hpp"
++@@ -217,6 +218,12 @@
++       for (auto arg : args) if (arg->reg == reg) return arg;
++       return NULL;
++     }
+++
+++    INLINE FunctionArgument *getArg(const Register &reg) {
+++      for (auto arg : args) if (arg->reg == reg) return arg;
+++      return NULL;
+++    }
+++
++     /*! Get output register */
++     INLINE Register getOutput(uint32_t ID) const { return outputs[ID]; }
++     /*! Get the argument location for the pushed register */
++@@ -281,6 +288,9 @@
++     INLINE bool getUseSLM(void) const { return this->useSLM; }
++     /*! Change the SLM config for the function */
++     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
+++    /*! Get sampler set in this function */
+++    SamplerSet* getSamplerSet(void) {return samplerSet; }
+++    //const SamplerSet& getSamplerSet(void) const {return samplerSet; }
++   private:
++     friend class Context;           //!< Can freely modify a function
++     std::string name;               //!< Function name
++@@ -296,6 +306,7 @@
++     LocationMap locationMap;        //!< Pushed function arguments (loc->reg)
++     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
++     bool useSLM;                    //!< Is SLM required?
+++    SamplerSet *samplerSet;
++     GBE_CLASS(Function);            //!< Use custom allocator
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:16.366021983 +0200
++@@ -0,0 +1,46 @@
+++/*
+++ * Copyright © 2012 Intel Corporation
+++ *
+++ * This library is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2 of the License, or (at your option) any later version.
+++ *
+++ * This library is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+++ *
+++ */
+++
+++/**
+++ * \file sampler.cpp
+++ *
+++ */
+++#include "sampler.hpp"
+++#include "context.hpp"
+++
+++namespace gbe {
+++namespace ir {
+++
+++  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
+++  {
+++    int i = 0;
+++
+++    for(auto it = regMap.begin();
+++        it != regMap.end(); ++it, ++i)
+++    {
+++      if (it->first == samplerValue)
+++        return it->second;
+++    }
+++    Register reg = ctx->reg(FAMILY_DWORD);
+++    ctx->LOADI(ir::TYPE_S32, reg, ctx->newIntegerImmediate(i, ir::TYPE_S32));
+++    regMap.insert(std::make_pair(samplerValue, reg));
+++    return reg;
+++  }
+++
+++} /* namespace ir */
+++} /* namespace gbe */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp	2013-05-14 20:09:16.366021983 +0200
++@@ -0,0 +1,67 @@
+++/*
+++ * Copyright © 2012 Intel Corporation
+++ *
+++ * This library is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2 of the License, or (at your option) any later version.
+++ *
+++ * This library is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+++ *
+++ */
+++
+++/**
+++ * \file sampler.hpp
+++ *
+++ * \author Benjamin Segovia <benjamin.segovia at intel.com>
+++ */
+++#ifndef __GBE_IR_SAMPLER_HPP__
+++#define __GBE_IR_SAMPLER_HPP__
+++
+++#include "ir/register.hpp"
+++#include "sys/map.hpp"
+++
+++
+++namespace gbe {
+++namespace ir {
+++
+++  /*! A sampler set is a set of global samplers which are defined as constant global
+++   * sampler or defined in the outermost kernel scope variables. According to the spec
+++   * all the variable should have a initialized integer value and can't be modified.
+++   */
+++  class Context;
+++
+++  class SamplerSet
+++  {
+++  public:
+++    /*! Append the specified sampler and return the allocated offset.
+++     *  If the speficied sampler is exist, only return the previous offset and
+++     *  don't append it again. Return -1, if failed.*/
+++    Register append(uint32_t clkSamplerValue, Context *ctx);
+++    size_t getDataSize(void) { return regMap.size(); }
+++    size_t getDataSize(void) const { return regMap.size(); }
+++    void getData(uint32_t *samplers) const {
+++      for ( auto &it : regMap)
+++        *samplers++ = it.first;
+++    }
+++
+++    void operator = (const SamplerSet& other) {
+++      regMap.insert(other.regMap.begin(), other.regMap.end());
+++    }
+++
+++    SamplerSet(const SamplerSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
+++    SamplerSet() {}
+++  private:
+++    map<uint32_t, Register> regMap;
+++    GBE_CLASS(SamplerSet);
+++  };
+++} /* namespace ir */
+++} /* namespace gbe */
+++
+++#endif /* __GBE_IR_SAMPLER_HPP__ */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:16.370021983 +0200
++@@ -1990,7 +1990,27 @@
++           case GEN_OCL_READ_IMAGE15:
++           {
++             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
++-            GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
+++            GBE_ASSERT(AI != AE);
+++            Constant *CPV = dyn_cast<Constant>(*AI);
+++            ir::Register sampler;
+++            if (CPV != NULL)
+++            {
+++              // This is not a kernel argument sampler, we need to append it to sampler set,
+++              // and allocate a sampler slot for it.
+++               auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
+++               GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
+++               sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
+++            } else {
+++              // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
+++              // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
+++              // work around.
+++              sampler = this->getRegister(*AI);
+++              ir::FunctionArgument *arg =  ctx.getFunction().getArg(sampler);
+++              GBE_ASSERT(arg != NULL);
+++              arg->type = ir::FunctionArgument::SAMPLER;
+++            }
+++            ++AI;
+++
++             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
++             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
++             ir::Register wcoord;
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:09:02.546022599 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:16.370021983 +0200
++@@ -46,7 +46,6 @@
++ #define __constant __attribute__((address_space(2)))
++ #define __local __attribute__((address_space(3)))
++ #define __texture __attribute__((address_space(4)))
++-#define __sampler __attribute__((address_space(5)))
++ #define global __global
++ //#define local __local
++ #define constant __constant
++@@ -77,7 +76,8 @@
++ typedef __texture struct _image2d_t* image2d_t;
++ struct _image3d_t;
++ typedef __texture struct _image3d_t* image3d_t;
++-typedef __sampler uint* sampler_t;
+++//typedef __sampler const uint* sampler_t;
+++typedef uint sampler_t;
++ typedef size_t event_t;
++ 
++ /////////////////////////////////////////////////////////////////////////////
++@@ -3966,7 +3966,7 @@
++   INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, sampler_t sampler, coord_type coord) \
++   {\
++     GET_IMAGE(cl_image, surface_id);\
++-    return __gen_ocl_read_image ##suffix(surface_id, (uint)sampler, coord.s0, coord.s1);\
+++    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord.s0, coord.s1);\
++   }
++ 
++ #define DECL_WRITE_IMAGE(type, suffix, coord_type) \
diff --cc debian/patches/0010-CL-Support-kernel-side-defined-samplers.patch
index 0000000,0000000..302980d
new file mode 100644
--- /dev/null
+++ b/debian/patches/0010-CL-Support-kernel-side-defined-samplers.patch
@@@ -1,0 -1,0 +1,378 @@@
++From 63a49c9c392dc31802167d88a3b309f646977903 Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:19 +0800
++Subject: [PATCH 10/15] CL: Support kernel side defined samplers.
++To: beignet at lists.freedesktop.org
++
++We changed the way to handle samplers. We gather all the kernel side
++defined samplers and those sampler in kernel argument into one samplers
++array. And don't allocate one single sampler each time.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ src/cl_command_queue.c      |    4 ++--
++ src/cl_command_queue_gen7.c |    2 ++
++ src/cl_driver.h             |    6 +++---
++ src/cl_driver_defs.c        |    2 +-
++ src/cl_kernel.c             |   36 ++++++++++++++++++++++-----------
++ src/cl_kernel.h             |    3 +++
++ src/cl_sampler.c            |   43 ++++++++++++++++++++++++++++++++++++++++
++ src/cl_sampler.h            |    5 +++++
++ src/intel/intel_gpgpu.c     |   46 ++++++++++++++++++++-----------------------
++ 9 files changed, 104 insertions(+), 43 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:19.978021822 +0200
++@@ -119,8 +119,6 @@
++                           k->args[i].mem->w, k->args[i].mem->h,
++                           k->args[i].mem->pitch, k->args[i].mem->tiling);
++     } else if (arg_type == GBE_ARG_SAMPLER) {
++-      uint32_t *curbe_index = (uint32_t*)(k->curbe + offset);
++-      cl_gpgpu_insert_sampler(queue->gpgpu, curbe_index, k->args[i].sampler);
++     } else
++       cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
++   }
++@@ -379,6 +377,8 @@
++   else
++     FATAL ("Unknown Gen Device");
++ 
+++  k->arg_sampler_sz = 0;
+++
++ #if USE_FULSIM
++   if (run_it != NULL && strcmp(run_it, "1") == 0) {
++     TRY (cl_fulsim_dump_all_surfaces, queue, k);
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:19.978021822 +0200
++@@ -224,6 +224,8 @@
++ 
++   /* Bind user buffers */
++   cl_command_queue_bind_surface(queue, ker);
+++  /* Bind all samplers */
+++  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->arg_sampler_sz + ker->sampler_sz);
++ 
++   /* Bind a stack if needed */
++   cl_bind_stack(gpgpu, ker);
++Index: beignet-0.1+git20130514+19e9c58/src/cl_driver.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_driver.h	2013-05-14 20:09:19.978021822 +0200
++@@ -110,9 +110,9 @@
++ typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint);
++ extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
++ 
++-/* Insert a sampler */
++-typedef void (cl_gpgpu_insert_sampler_cb)(cl_gpgpu, uint32_t *curbe_index, cl_sampler sampler);
++-extern cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler;
+++/* bind samplers defined in both kernel and kernel args. */
+++typedef void (cl_gpgpu_bind_sampler_cb)(cl_gpgpu, uint32_t *samplers, size_t sampler_sz);
+++extern cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler;
++ 
++ /* Set a 2d texture */
++ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
++Index: beignet-0.1+git20130514+19e9c58/src/cl_driver_defs.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver_defs.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_driver_defs.c	2013-05-14 20:09:19.978021822 +0200
++@@ -59,5 +59,5 @@
++ LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL;
++ LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL;
++ LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
++-LOCAL cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler = NULL;
+++LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL;
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:19.978021822 +0200
++@@ -110,6 +110,7 @@
++   if (arg_type == GBE_ARG_VALUE) {
++     if (UNLIKELY(value == NULL))
++       return CL_INVALID_KERNEL_ARGS;
+++
++     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
++     assert(offset + sz <= k->curbe_sz);
++     memcpy(k->curbe + offset, value, sz);
++@@ -129,20 +130,22 @@
++     return CL_SUCCESS;
++   }
++ 
++-  /* For a sampler*/
+++  /* Is it a sampler*/
++   if (arg_type == GBE_ARG_SAMPLER) {
++-     cl_sampler sampler;
++-     if (UNLIKELY(value == NULL))
+++    cl_sampler sampler;
+++    memcpy(&sampler, value, sz);
+++    if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
++       return CL_INVALID_KERNEL_ARGS;
++-     sampler = *(cl_sampler*)value;
++-
++-     if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
++-       return CL_INVALID_ARG_VALUE;
++-     k->args[index].local_sz = 0;
++-     k->args[index].is_set = 1;
++-     k->args[index].mem = NULL;
++-     k->args[index].sampler = sampler;
++-     return CL_SUCCESS;
+++    uint32_t slot;
+++    k->args[index].local_sz = 0;
+++    k->args[index].is_set = 1;
+++    k->args[index].mem = NULL;
+++    k->args[index].sampler = sampler;
+++    slot = cl_arg_sampler_insert(k, sampler);
+++    offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+++    assert(offset + sz <= k->curbe_sz);
+++    memcpy(k->curbe + offset, &slot, sizeof(slot));
+++    return CL_SUCCESS;
++   }
++ 
++   /* Otherwise, we just need to check that this is a buffer */
++@@ -203,6 +206,12 @@
++ 
++   /* Create the curbe */
++   k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque);
+++
+++  /* Get sampler data & size */
+++  k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque);
+++  k->arg_sampler_sz = 0;
+++  assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
+++  gbe_kernel_get_sampler_data(k->opaque, k->samplers);
++ }
++ 
++ LOCAL cl_kernel
++@@ -221,6 +230,9 @@
++   to->program = from->program;
++   to->arg_n = from->arg_n;
++   to->curbe_sz = from->curbe_sz;
+++  to->sampler_sz = from->sampler_sz;
+++  to->arg_sampler_sz = from->arg_sampler_sz;
+++  memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
++   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
++   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:19.978021822 +0200
++@@ -52,7 +52,10 @@
++   gbe_kernel opaque;          /* (Opaque) compiler structure for the OCL kernel */
++   char *curbe;                /* One curbe per kernel */
++   size_t curbe_sz;            /* Size of it */
+++  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel */
+++  size_t sampler_sz;          /* sampler size defined in kernel */
++   cl_argument *args;          /* To track argument setting */
+++  size_t arg_sampler_sz;      /* sampler size defined in kernel args */
++   uint32_t arg_n:31;          /* Number of arguments */
++   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
++ };
++Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.c	2013-05-14 20:09:19.978021822 +0200
++@@ -22,9 +22,50 @@
++ #include "cl_utils.h"
++ #include "cl_alloc.h"
++ #include "cl_khr_icd.h"
+++#include "cl_kernel.h"
++ 
++ #include <assert.h>
++ 
+++uint32_t cl_to_clk(cl_bool normalized_coords,
+++                   cl_addressing_mode address,
+++                   cl_filter_mode filter)
+++{
+++  int clk_address;
+++  int clk_filter;
+++  switch (address) {
+++  case CL_ADDRESS_NONE: clk_address = CLK_ADDRESS_NONE; break;
+++  case CL_ADDRESS_CLAMP: clk_address = CLK_ADDRESS_CLAMP; break;
+++  case CL_ADDRESS_CLAMP_TO_EDGE: clk_address = CLK_ADDRESS_CLAMP_TO_EDGE; break;
+++  case CL_ADDRESS_REPEAT: clk_address = CLK_ADDRESS_REPEAT; break;
+++  case CL_ADDRESS_MIRRORED_REPEAT: clk_address = CLK_ADDRESS_MIRRORED_REPEAT; break;
+++  default:
+++    assert(0);
+++  }
+++  switch(filter) {
+++  case CL_FILTER_NEAREST: clk_filter = CLK_FILTER_NEAREST; break;
+++  case CL_FILTER_LINEAR: clk_filter = CLK_FILTER_LINEAR; break;
+++  default:
+++    assert(0);
+++  }
+++  return (clk_address << __CLK_ADDRESS_BASE)
+++         | (normalized_coords << __CLK_NORMALIZED_BASE)
+++         | (clk_filter << __CLK_FILTER_BASE);
+++}
+++
+++int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler)
+++{
+++  int i, slot_id;
+++  for(i = 0; i < k->sampler_sz; i++)
+++  {
+++    if (k->samplers[i] == sampler->clkSamplerValue)
+++      return i;
+++  }
+++  slot_id = k->sampler_sz + k->arg_sampler_sz;
+++  k->samplers[slot_id] = sampler->clkSamplerValue;
+++  k->arg_sampler_sz++;
+++  return slot_id;
+++}
+++
++ LOCAL cl_sampler
++ cl_sampler_new(cl_context ctx,
++                cl_bool normalized_coords,
++@@ -54,6 +95,8 @@
++   sampler->ctx = ctx;
++   cl_context_add_ref(ctx);
++ 
+++  sampler->clkSamplerValue = cl_to_clk(normalized_coords, address, filter);
+++
++ exit:
++   if (errcode_ret)
++     *errcode_ret = err;
++Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.h	2013-05-14 20:09:19.978021822 +0200
++@@ -21,6 +21,7 @@
++ #define __CL_SAMPLER_H__
++ 
++ #include "CL/cl.h"
+++#include "../backend/src/ocl_common_defines.h"
++ #include <stdint.h>
++ 
++ /* How to access images */
++@@ -33,6 +34,7 @@
++   cl_bool normalized_coords; /* Are coordinates normalized? */
++   cl_addressing_mode address;/* CLAMP / REPEAT and so on... */
++   cl_filter_mode filter;     /* LINEAR / NEAREST mostly */
+++  uint32_t clkSamplerValue;
++ };
++ 
++ /* Create a new sampler object */
++@@ -48,5 +50,8 @@
++ /* Add one more reference to this object */
++ extern void cl_sampler_add_ref(cl_sampler);
++ 
+++/* insert a new argument sampler */
+++int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler);
+++
++ #endif /* __CL_SAMPLER_H__ */
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:19.982021822 +0200
++@@ -471,16 +471,6 @@
++ }
++ 
++ static int
++-intel_gpgpu_get_free_sampler_index(intel_gpgpu_t *gpgpu)
++-{
++-  int slot;
++-  assert(~gpgpu->sampler_bitmap != 0);
++-  slot = __fls(~gpgpu->sampler_bitmap);
++-  gpgpu->sampler_bitmap |= (1 << slot);
++-  return slot;
++-}
++-
++-static int
++ intel_get_surface_type(cl_mem_object_type type)
++ {
++   switch (type) {
++@@ -662,10 +652,10 @@
++ int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
++ {
++    switch( cl_address_mode ) {
++-   case CL_ADDRESS_NONE:
++-   case CL_ADDRESS_REPEAT:
+++   case CLK_ADDRESS_NONE:
+++   case CLK_ADDRESS_REPEAT:
++       return GEN_TEXCOORDMODE_WRAP;
++-   case CL_ADDRESS_CLAMP:
+++   case CLK_ADDRESS_CLAMP:
++       /* GL_CLAMP is the weird mode where coordinates are clamped to
++        * [0.0, 1.0], so linear filtering of coordinates outside of
++        * [0.0, 1.0] give you half edge texel value and half border
++@@ -679,9 +669,9 @@
++          return GEN_TEXCOORDMODE_CLAMP;
++       else
++          return GEN_TEXCOORDMODE_CLAMP_BORDER;
++-   case CL_ADDRESS_CLAMP_TO_EDGE:
+++   case CLK_ADDRESS_CLAMP_TO_EDGE:
++       return GEN_TEXCOORDMODE_CLAMP;
++-   case CL_ADDRESS_MIRRORED_REPEAT:
+++   case CLK_ADDRESS_MIRRORED_REPEAT:
++       return GEN_TEXCOORDMODE_MIRROR;
++    default:
++       return GEN_TEXCOORDMODE_WRAP;
++@@ -689,35 +679,33 @@
++ }
++ 
++ static void
++-intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t *curbe_index, cl_sampler cl_sampler)
+++intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
++ {
++-  int index;
++   int using_nearest = 0;
++   uint32_t wrap_mode;
++   gen7_sampler_state_t *sampler;
++ 
++-  index = intel_gpgpu_get_free_sampler_index(gpgpu);
++   sampler = (gen7_sampler_state_t *)gpgpu->sampler_state_b.bo->virtual + index;
++-  if (!cl_sampler->normalized_coords)
+++  if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
++     sampler->ss3.non_normalized_coord = 1;
++   else
++     sampler->ss3.non_normalized_coord = 0;
++ 
++-  switch (cl_sampler->filter) {
++-  case CL_FILTER_NEAREST:
+++  switch (clk_sampler & __CLK_FILTER_MASK) {
+++  case CLK_FILTER_NEAREST:
++     sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
++     sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
++     sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
++     using_nearest = 1;
++     break;
++-  case CL_FILTER_LINEAR:
+++  case CLK_FILTER_LINEAR:
++     sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
++     sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
++     sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
++     break;
++   }
++ 
++-  wrap_mode = translate_wrap_mode(cl_sampler->address, using_nearest);
+++  wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
++   sampler->ss3.r_wrap_mode = wrap_mode;
++   sampler->ss3.s_wrap_mode = wrap_mode;
++   sampler->ss3.t_wrap_mode = wrap_mode;
++@@ -738,7 +726,15 @@
++      sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
++                                    GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
++                                    GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
++-  *curbe_index = index;
+++}
+++
+++static void
+++intel_gpgpu_bind_sampler(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
+++{
+++  int index;
+++  assert(sampler_sz <= GEN_MAX_SAMPLERS);
+++  for(index = 0; index < sampler_sz; index++)
+++    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index]);
++ }
++ 
++ static void
++@@ -815,6 +811,6 @@
++   cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
++   cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
++   cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
++-  cl_gpgpu_insert_sampler = (cl_gpgpu_insert_sampler_cb *) intel_gpgpu_insert_sampler;
+++  cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler;
++ }
++ 
diff --cc debian/patches/0011-utests-Add-one-test-cases-for-sampler-support.patch
index 0000000,0000000..48a8ef6
new file mode 100644
--- /dev/null
+++ b/debian/patches/0011-utests-Add-one-test-cases-for-sampler-support.patch
@@@ -1,0 -1,0 +1,150 @@@
++From b4dc8376a24cc4e0bfacdb57ca3a1da87c8c90f4 Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:20 +0800
++Subject: [PATCH 11/15] utests: Add one test cases for sampler support.
++To: beignet at lists.freedesktop.org
++
++This new case tests define sampler in kernel side and in the
++kernel argument.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ kernels/test_copy_image1.cl     |   33 +++++++++++++++++
++ utests/CMakeLists.txt           |    1 +
++ utests/compiler_copy_image1.cpp |   77 +++++++++++++++++++++++++++++++++++++++
++ 3 files changed, 111 insertions(+)
++ create mode 100644 kernels/test_copy_image1.cl
++ create mode 100644 utests/compiler_copy_image1.cpp
++
++Index: beignet-0.1+git20130514+19e9c58/kernels/test_copy_image1.cl
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/kernels/test_copy_image1.cl	2013-05-14 20:09:21.802021741 +0200
++@@ -0,0 +1,33 @@
+++#define S(A,B,C) CLK_NORMALIZED_COORDS_##A | CLK_ADDRESS_##B | CLK_FILTER_##C
+++
+++#define COPY_IMAGE(_dst, _sampler, scoord, dcoord) \
+++  color = read_imagei(src, _sampler, scoord);\
+++  write_imagei(_dst, dcoord, color)
+++
+++__kernel void
+++test_copy_image1(__read_only image2d_t src,
+++                 __write_only image2d_t dst0,
+++                 sampler_t sampler0,
+++                 __write_only image2d_t dst1,
+++                 __write_only image2d_t dst2,
+++                 __write_only image2d_t dst3,
+++                 __write_only image2d_t dst4,
+++                 float w_inv, float h_inv)
+++{
+++  const sampler_t sampler1 = S(FALSE, REPEAT, NEAREST);
+++  const sampler_t sampler2 = S(FALSE, CLAMP, NEAREST);
+++  const sampler_t sampler3 = S(FALSE, MIRRORED_REPEAT, NEAREST);
+++  const sampler_t sampler4 = S(TRUE, REPEAT, NEAREST);
+++  int2 coord;
+++  float2 fcoord;
+++  int4 color;
+++  coord.x = (int)get_global_id(0);
+++  coord.y = (int)get_global_id(1);
+++  fcoord.x = coord.x * w_inv;
+++  fcoord.y = coord.y * h_inv;
+++  COPY_IMAGE(dst0, sampler0, coord, coord);
+++  COPY_IMAGE(dst1, sampler1, coord, coord);
+++  COPY_IMAGE(dst2, sampler2, coord, coord);
+++  COPY_IMAGE(dst3, sampler3, coord, coord);
+++  COPY_IMAGE(dst4, sampler4, fcoord, coord);
+++}
++Index: beignet-0.1+git20130514+19e9c58/utests/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/utests/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/utests/CMakeLists.txt	2013-05-14 20:09:21.802021741 +0200
++@@ -74,6 +74,7 @@
++   compiler_local_memory_barrier_wg64.cpp
++   compiler_movforphi_undef.cpp
++   compiler_volatile.cpp
+++  compiler_copy_image1.cpp
++   runtime_createcontext.cpp
++   utest_assert.cpp
++   utest.cpp
++Index: beignet-0.1+git20130514+19e9c58/utests/compiler_copy_image1.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/utests/compiler_copy_image1.cpp	2013-05-14 20:09:21.802021741 +0200
++@@ -0,0 +1,77 @@
+++#include "utest_helper.hpp"
+++
+++static void compiler_copy_image1(void)
+++{
+++  const size_t w = 512;
+++  const size_t h = 512;
+++  cl_image_format format;
+++  cl_image_desc desc;
+++  cl_sampler sampler;
+++
+++  // Setup kernel and images
+++  OCL_CREATE_KERNEL("test_copy_image1");
+++  buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h);
+++  for (uint32_t j = 0; j < h; ++j)
+++    for (uint32_t i = 0; i < w; i++)
+++      ((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
+++
+++  format.image_channel_order = CL_RGBA;
+++  format.image_channel_data_type = CL_UNSIGNED_INT8;
+++  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+++  desc.image_width = w;
+++  desc.image_height = h;
+++  desc.image_row_pitch = w * sizeof(uint32_t);
+++  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+++  OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
+++
+++  desc.image_row_pitch = 0;
+++  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+++  OCL_CREATE_IMAGE(buf[2], 0, &format, &desc, NULL);
+++  OCL_CREATE_IMAGE(buf[3], 0, &format, &desc, NULL);
+++  OCL_CREATE_IMAGE(buf[4], 0, &format, &desc, NULL);
+++  OCL_CREATE_IMAGE(buf[5], 0, &format, &desc, NULL);
+++  free(buf_data[0]);
+++  buf_data[0] = NULL;
+++
+++  // Run the kernel
+++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+++  OCL_SET_ARG(2, sizeof(sampler), &sampler);
+++  OCL_SET_ARG(3, sizeof(cl_mem), &buf[2]);
+++  OCL_SET_ARG(4, sizeof(cl_mem), &buf[3]);
+++  OCL_SET_ARG(5, sizeof(cl_mem), &buf[4]);
+++  OCL_SET_ARG(6, sizeof(cl_mem), &buf[5]);
+++  float w_inv = 1.0/w;
+++  float h_inv = 1.0/h;
+++  OCL_SET_ARG(7, sizeof(float), &w_inv);
+++  OCL_SET_ARG(8, sizeof(float), &h_inv);
+++
+++  globals[0] = w;
+++  globals[1] = h;
+++  locals[0] = 16;
+++  locals[1] = 16;
+++  OCL_NDRANGE(2);
+++
+++  // Check result
+++  OCL_MAP_BUFFER(0);
+++  OCL_MAP_BUFFER(1);
+++  OCL_MAP_BUFFER(2);
+++  OCL_MAP_BUFFER(3);
+++  OCL_MAP_BUFFER(4);
+++  OCL_MAP_BUFFER(5);
+++
+++  for(uint32_t k = 0; k < 5; k++)
+++  {
+++    for (uint32_t j = 0; j < h; ++j)
+++      for (uint32_t i = 0; i < w; i++)
+++        OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == ((uint32_t*)buf_data[1 + k])[j * w + i]);
+++  }
+++  OCL_UNMAP_BUFFER(0);
+++  OCL_UNMAP_BUFFER(1);
+++  OCL_UNMAP_BUFFER(2);
+++  OCL_UNMAP_BUFFER(3);
+++  OCL_UNMAP_BUFFER(4);
+++  OCL_UNMAP_BUFFER(5);
+++}
+++
+++MAKE_UTEST_FROM_FUNCTION(compiler_copy_image1);
diff --cc debian/patches/0012-GBE-remove-sampler-address-space.patch
index 0000000,0000000..899b141
new file mode 100644
--- /dev/null
+++ b/debian/patches/0012-GBE-remove-sampler-address-space.patch
@@@ -1,0 -1,0 +1,74 @@@
++From a87eca304c690096511c4db9b6cddf2544ab6d3f Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:21 +0800
++Subject: [PATCH 12/15] GBE: remove sampler address space.
++To: beignet at lists.freedesktop.org
++
++As now sampler_t is a normal integer data type, we don't
++need the sampler address space any more.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/ir/instruction.cpp        |    1 -
++ backend/src/ir/instruction.hpp        |    1 -
++ backend/src/llvm/llvm_gen_backend.cpp |    3 ---
++ backend/src/ocl_stdlib.h              |    1 -
++ 4 files changed, 6 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.cpp	2013-05-14 20:07:24.282026980 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp	2013-05-14 20:09:23.482021666 +0200
++@@ -915,7 +915,6 @@
++       case MEM_CONSTANT: return out << "constant";
++       case MEM_PRIVATE: return out << "private";
++       case IMAGE: return out << "image";
++-      case SAMPLER: return out << "sampler";
++       case MEM_INVALID: return out << "invalid";
++     };
++     return out;
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.hpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp	2013-05-14 20:09:23.482021666 +0200
++@@ -50,7 +50,6 @@
++     MEM_CONSTANT,   //!< Immutable global memory
++     MEM_PRIVATE,    //!< Per thread private memory
++     IMAGE,          //!< For texture image.
++-    SAMPLER,        //!< For sampler.
++     MEM_INVALID
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:16.370021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:23.486021665 +0200
++@@ -256,7 +256,6 @@
++       case 2: return ir::MEM_CONSTANT;
++       case 3: return ir::MEM_LOCAL;
++       case 4: return ir::IMAGE;
++-      case 5: return ir::SAMPLER;
++     }
++     GBE_ASSERT(false);
++     return ir::MEM_GLOBAL;
++@@ -916,8 +915,6 @@
++               case ir::IMAGE:
++                 ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
++               break;
++-              case ir::SAMPLER:
++-                ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, ptrSize);
++               break;
++               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
++             }
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:09:16.370021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:23.486021665 +0200
++@@ -76,7 +76,6 @@
++ typedef __texture struct _image2d_t* image2d_t;
++ struct _image3d_t;
++ typedef __texture struct _image3d_t* image3d_t;
++-//typedef __sampler const uint* sampler_t;
++ typedef uint sampler_t;
++ typedef size_t event_t;
++ 
diff --cc debian/patches/0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
index 0000000,0000000..8b04c61
new file mode 100644
--- /dev/null
+++ b/debian/patches/0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
@@@ -1,0 -1,0 +1,41 @@@
++From 08ee5ccdb12d2ffe6afe23532f557e758c2dd8ec Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:22 +0800
++Subject: [PATCH 13/15] GBE: add scalar register support in
++ loadImmInstruction.
++To: beignet at lists.freedesktop.org
++
++There is a slight possibility that the destination register
++is a scalar register. We need to check it here.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/backend/gen_insn_selection.cpp |    8 ++++++++
++ 1 file changed, 8 insertions(+)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_insn_selection.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:25.126021592 +0200
++@@ -1546,6 +1546,13 @@
++       const Immediate imm = insn.getImmediate();
++       const GenRegister dst = sel.selReg(insn.getDst(0), type);
++ 
+++      sel.push();
+++      if (sel.isScalarOrBool(insn.getDst(0)) == true) {
+++        sel.curr.execWidth = 1;
+++        sel.curr.predicate = GEN_PREDICATE_NONE;
+++        sel.curr.noMask = 1;
+++      }
+++
++       switch (type) {
++         case TYPE_U32:
++         case TYPE_S32:
++@@ -1559,6 +1566,7 @@
++         case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
++         default: NOT_SUPPORTED;
++       }
+++      sel.pop();
++       return true;
++     }
++ 
diff --cc debian/patches/0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
index 0000000,0000000..d5b838e
new file mode 100644
--- /dev/null
+++ b/debian/patches/0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
@@@ -1,0 -1,0 +1,407 @@@
++From 832e548e52983eed1b84cb9b605f56492626e28b Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:23 +0800
++Subject: [PATCH 14/15] GBE: concentrate all samplers' allocation at compile
++ time.
++To: beignet at lists.freedesktop.org
++
++This is the first step to do image/sampler allocation fully
++at compile time. Thus we can determine all the sampler id and image
++bti index at compile time. So it can make the following things
++easier or faster:
++
++1. After we finish both image/sampler, we can treat all image bti and sampler
++   as constant and can get their value when we encode the Sampler and TypedWrite
++   instructions. Then we don't need to compute the message header at runtime which
++   cost 3 instructions each call.
++
++2. get image width/height/depth. As we know the surface bti at compile time,
++   we can put those data at specified curbe entry and generate correct indirect
++   register access to get those information at compile time.
++
++This is the first step. And just finish the sampler part. Now all the
++samplers including those defeined in kernel arguments will be allocated
++at compile time. At runtime, it just need to fill in the sampler value
++into the proper slot which map to the specified input argument. Then the
++driver will create and bind the sampler to the correct slot.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/ir/function.hpp           |   17 +++++++++--
++ backend/src/ir/sampler.cpp            |   53 ++++++++++++++++++++++++++-------
++ backend/src/ir/sampler.hpp            |   25 +++++++++++-----
++ backend/src/llvm/llvm_gen_backend.cpp |    7 +----
++ backend/src/ocl_common_defines.h      |    9 ++++--
++ src/cl_command_queue.c                |    2 --
++ src/cl_command_queue_gen7.c           |    2 +-
++ src/cl_kernel.c                       |   14 ++++-----
++ src/cl_kernel.h                       |    5 ++--
++ src/cl_sampler.c                      |   22 ++++++++------
++ src/cl_sampler.h                      |    4 +--
++ src/intel/intel_gpgpu.c               |    2 +-
++ 12 files changed, 106 insertions(+), 56 deletions(-)
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:26.818021517 +0200
++@@ -197,6 +197,18 @@
++       GBE_ASSERT(args[ID] != NULL);
++       return *args[ID];
++     }
+++
+++    /*! Get arg ID. */
+++    INLINE int32_t getArgID(FunctionArgument *requestArg) {
+++      for (uint32_t ID = 0; ID < args.size(); ID++)
+++      {
+++        if ( args[ID] == requestArg )
+++          return ID;
+++      }
+++      GBE_ASSERTM(0, "Failed to get a valid argument ID.");
+++      return -1;
+++    }
+++
++     /*! Get the number of pushed registers */
++     INLINE uint32_t pushedNum(void) const { return pushMap.size(); }
++     /*! Get the pushed data location for the given register */
++@@ -289,8 +301,7 @@
++     /*! Change the SLM config for the function */
++     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
++     /*! Get sampler set in this function */
++-    SamplerSet* getSamplerSet(void) {return samplerSet; }
++-    //const SamplerSet& getSamplerSet(void) const {return samplerSet; }
+++    SamplerSet* getSamplerSet(void) const {return samplerSet; }
++   private:
++     friend class Context;           //!< Can freely modify a function
++     std::string name;               //!< Function name
++@@ -306,7 +317,7 @@
++     LocationMap locationMap;        //!< Pushed function arguments (loc->reg)
++     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
++     bool useSLM;                    //!< Is SLM required?
++-    SamplerSet *samplerSet;
+++    SamplerSet *samplerSet;          //!< samplers used in this function.
++     GBE_CLASS(Function);            //!< Use custom allocator
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.cpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:26.818021517 +0200
++@@ -22,25 +22,58 @@
++  */
++ #include "sampler.hpp"
++ #include "context.hpp"
+++#include "ocl_common_defines.h"
++ 
++ namespace gbe {
++ namespace ir {
++ 
++-  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
+++  const uint32_t SamplerSet::getIdx(const Register reg) const
++   {
++-    int i = 0;
+++    auto it = regMap.find(reg);
+++    GBE_ASSERT(it != regMap.end());
+++    return it->second.slot;
+++  }
++ 
++-    for(auto it = regMap.begin();
++-        it != regMap.end(); ++it, ++i)
++-    {
++-      if (it->first == samplerValue)
++-        return it->second;
++-    }
+++  void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) {
+++    struct SamplerRegSlot samplerSlot;
+++    // This register is just used as a key.
+++    samplerSlot.reg = reg;
+++    samplerSlot.slot = samplerMap.size();
+++    samplerMap.insert(std::make_pair(key, samplerSlot));
+++    regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot));
+++    ctx->LOADI(ir::TYPE_S32, samplerSlot.reg, ctx->newIntegerImmediate(samplerSlot.slot, ir::TYPE_S32));
+++  }
+++
+++  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
+++  {
+++    auto it = samplerMap.find(samplerValue);
+++    if (it != samplerMap.end())
+++        return it->second.reg;
++     Register reg = ctx->reg(FAMILY_DWORD);
++-    ctx->LOADI(ir::TYPE_S32, reg, ctx->newIntegerImmediate(i, ir::TYPE_S32));
++-    regMap.insert(std::make_pair(samplerValue, reg));
+++    appendReg(reg, samplerValue, ctx);
++     return reg;
++   }
++ 
+++#define SAMPLER_ID(id) ((id << __CLK_SAMPLER_ARG_BASE) | __CLK_SAMPLER_ARG_KEY_BIT)
+++  void SamplerSet::append(Register samplerReg, Context *ctx)
+++  {
+++    ir::FunctionArgument *arg =  ctx->getFunction().getArg(samplerReg);
+++    GBE_ASSERT(arg != NULL);
+++
+++    // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
+++    // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
+++    // work around.
+++    arg->type = ir::FunctionArgument::SAMPLER;
+++    int32_t id = ctx->getFunction().getArgID(arg);
+++    GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
+++
+++    auto it = samplerMap.find(SAMPLER_ID(id));
+++    if (it != samplerMap.end()) {
+++      GBE_ASSERT(it->second.reg == samplerReg);
+++      return;
+++    }
+++    appendReg(samplerReg, SAMPLER_ID(id), ctx);
+++  }
+++
++ } /* namespace ir */
++ } /* namespace gbe */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.hpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp	2013-05-14 20:09:26.818021517 +0200
++@@ -27,7 +27,6 @@
++ #include "ir/register.hpp"
++ #include "sys/map.hpp"
++ 
++-
++ namespace gbe {
++ namespace ir {
++ 
++@@ -37,6 +36,11 @@
++    */
++   class Context;
++ 
+++  struct SamplerRegSlot {
+++    Register reg;
+++    uint32_t slot;
+++  };
+++
++   class SamplerSet
++   {
++   public:
++@@ -44,21 +48,28 @@
++      *  If the speficied sampler is exist, only return the previous offset and
++      *  don't append it again. Return -1, if failed.*/
++     Register append(uint32_t clkSamplerValue, Context *ctx);
++-    size_t getDataSize(void) { return regMap.size(); }
++-    size_t getDataSize(void) const { return regMap.size(); }
+++    /*! Append a sampler defined in kernel args. */
+++    void append(Register samplerArg, Context *ctx);
+++    /*! Get the sampler idx (actual location) */
+++    const uint32_t getIdx(const Register reg) const;
+++    size_t getDataSize(void) { return samplerMap.size(); }
+++    size_t getDataSize(void) const { return samplerMap.size(); }
++     void getData(uint32_t *samplers) const {
++-      for ( auto &it : regMap)
++-        *samplers++ = it.first;
+++      for(auto &it : samplerMap)
+++        samplers[it.second.slot] = it.first;
++     }
++ 
++     void operator = (const SamplerSet& other) {
++       regMap.insert(other.regMap.begin(), other.regMap.end());
+++      samplerMap.insert(other.samplerMap.begin(), other.samplerMap.end());
++     }
++ 
++-    SamplerSet(const SamplerSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
+++    SamplerSet(const SamplerSet& other) : samplerMap(other.samplerMap.begin(), other.samplerMap.end()) { }
++     SamplerSet() {}
++   private:
++-    map<uint32_t, Register> regMap;
+++    void appendReg(const Register reg, uint32_t key, Context *ctx);
+++    map<uint32_t, SamplerRegSlot> samplerMap;
+++    map<Register, SamplerRegSlot> regMap;
++     GBE_CLASS(SamplerSet);
++   };
++ } /* namespace ir */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:23.486021665 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:26.818021517 +0200
++@@ -1998,13 +1998,8 @@
++                GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
++                sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
++             } else {
++-              // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
++-              // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
++-              // work around.
++               sampler = this->getRegister(*AI);
++-              ir::FunctionArgument *arg =  ctx.getFunction().getArg(sampler);
++-              GBE_ASSERT(arg != NULL);
++-              arg->type = ir::FunctionArgument::SAMPLER;
+++              ctx.getFunction().getSamplerSet()->append(sampler, &ctx);
++             }
++             ++AI;
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_common_defines.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_common_defines.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_common_defines.h	2013-05-14 20:09:26.818021517 +0200
++@@ -111,9 +111,12 @@
++     __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
++                                      __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
++ 
++-    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
++-    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
++-                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
+++    __CLK_SAMPLER_ARG_BASE         = __CLK_MIP_BASE + __CLK_SAMPLER_BITS,
+++    __CLK_SAMPLER_ARG_BITS         = 8,
+++    __CLK_SAMPLER_ARG_MASK         = ((1 << __CLK_SAMPLER_ARG_BITS) - 1) << __CLK_SAMPLER_ARG_BASE,
+++    __CLK_SAMPLER_ARG_KEY_BIT      = (1 << (__CLK_SAMPLER_ARG_BASE + __CLK_SAMPLER_ARG_BITS)),
+++    __CLK_SAMPLER_ARG_KEY_BITS     = 1,
+++
++ } clk_sampler_type;
++ 
++ // Memory synchronization
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:26.818021517 +0200
++@@ -377,8 +377,6 @@
++   else
++     FATAL ("Unknown Gen Device");
++ 
++-  k->arg_sampler_sz = 0;
++-
++ #if USE_FULSIM
++   if (run_it != NULL && strcmp(run_it, "1") == 0) {
++     TRY (cl_fulsim_dump_all_surfaces, queue, k);
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:26.822021517 +0200
++@@ -225,7 +225,7 @@
++   /* Bind user buffers */
++   cl_command_queue_bind_surface(queue, ker);
++   /* Bind all samplers */
++-  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->arg_sampler_sz + ker->sampler_sz);
+++  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->sampler_sz);
++ 
++   /* Bind a stack if needed */
++   cl_bind_stack(gpgpu, ker);
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:26.822021517 +0200
++@@ -136,15 +136,11 @@
++     memcpy(&sampler, value, sz);
++     if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
++       return CL_INVALID_KERNEL_ARGS;
++-    uint32_t slot;
++     k->args[index].local_sz = 0;
++     k->args[index].is_set = 1;
++     k->args[index].mem = NULL;
++     k->args[index].sampler = sampler;
++-    slot = cl_arg_sampler_insert(k, sampler);
++-    offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
++-    assert(offset + sz <= k->curbe_sz);
++-    memcpy(k->curbe + offset, &slot, sizeof(slot));
+++    cl_set_sampler_arg_slot(k, index, sampler);
++     return CL_SUCCESS;
++   }
++ 
++@@ -209,9 +205,9 @@
++ 
++   /* Get sampler data & size */
++   k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque);
++-  k->arg_sampler_sz = 0;
++   assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
++-  gbe_kernel_get_sampler_data(k->opaque, k->samplers);
+++  if (k->sampler_sz > 0)
+++    gbe_kernel_get_sampler_data(k->opaque, k->samplers);
++ }
++ 
++ LOCAL cl_kernel
++@@ -231,8 +227,8 @@
++   to->arg_n = from->arg_n;
++   to->curbe_sz = from->curbe_sz;
++   to->sampler_sz = from->sampler_sz;
++-  to->arg_sampler_sz = from->arg_sampler_sz;
++-  memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
+++  if (to->sampler_sz)
+++    memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
++   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
++   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:26.822021517 +0200
++@@ -52,10 +52,9 @@
++   gbe_kernel opaque;          /* (Opaque) compiler structure for the OCL kernel */
++   char *curbe;                /* One curbe per kernel */
++   size_t curbe_sz;            /* Size of it */
++-  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel */
++-  size_t sampler_sz;          /* sampler size defined in kernel */
+++  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
+++  size_t sampler_sz;          /* sampler size defined in kernel & kernel args. */
++   cl_argument *args;          /* To track argument setting */
++-  size_t arg_sampler_sz;      /* sampler size defined in kernel args */
++   uint32_t arg_n:31;          /* Number of arguments */
++   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
++ };
++Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.c	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.c	2013-05-14 20:09:26.822021517 +0200
++@@ -52,18 +52,22 @@
++          | (clk_filter << __CLK_FILTER_BASE);
++ }
++ 
++-int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler)
+++#define IS_SAMPLER_ARG(v) (v & __CLK_SAMPLER_ARG_KEY_BIT)
+++#define SAMPLER_ARG_ID(v) ((v & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE)
+++int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler)
++ {
++-  int i, slot_id;
++-  for(i = 0; i < k->sampler_sz; i++)
+++  int slot_id;
+++  for(slot_id = 0; slot_id < k->sampler_sz; slot_id++)
++   {
++-    if (k->samplers[i] == sampler->clkSamplerValue)
++-      return i;
+++    if (IS_SAMPLER_ARG(k->samplers[slot_id])) {
+++     if (SAMPLER_ARG_ID(k->samplers[slot_id]) == index) {
+++       k->samplers[slot_id] = (k->samplers[slot_id] & (~__CLK_SAMPLER_MASK))
+++                              | sampler->clkSamplerValue;
+++       return slot_id;
+++     }
+++    }
++   }
++-  slot_id = k->sampler_sz + k->arg_sampler_sz;
++-  k->samplers[slot_id] = sampler->clkSamplerValue;
++-  k->arg_sampler_sz++;
++-  return slot_id;
+++  assert(0);
++ }
++ 
++ LOCAL cl_sampler
++Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.h	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.h	2013-05-14 20:09:26.822021517 +0200
++@@ -50,8 +50,8 @@
++ /* Add one more reference to this object */
++ extern void cl_sampler_add_ref(cl_sampler);
++ 
++-/* insert a new argument sampler */
++-int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler);
+++/* set a sampler kernel argument */
+++int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler);
++ 
++ #endif /* __CL_SAMPLER_H__ */
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-14 20:09:19.982021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:26.822021517 +0200
++@@ -734,7 +734,7 @@
++   int index;
++   assert(sampler_sz <= GEN_MAX_SAMPLERS);
++   for(index = 0; index < sampler_sz; index++)
++-    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index]);
+++    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index] & __CLK_SAMPLER_MASK);
++ }
++ 
++ static void
diff --cc debian/patches/0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
index 0000000,0000000..1378d51
new file mode 100644
--- /dev/null
+++ b/debian/patches/0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
@@@ -1,0 -1,0 +1,1027 @@@
++From 4a767da3faa8ed91a3edb61a8c42a1c8c0e8b7b8 Mon Sep 17 00:00:00 2001
++From: Zhigang Gong <zhigang.gong at linux.intel.com>
++Date: Mon, 13 May 2013 11:32:24 +0800
++Subject: [PATCH 15/15] GBE/Runtime: Optimize Sample/TypedWrite instruction.
++To: beignet at lists.freedesktop.org
++
++This commit does two major things as below:
++1. Allocate image surface at compile time, and add new gbe interfaces to let runtime know
++how many image surfaces we have, and the image allocation informations. Thus the runtime
++library know how to bind those image surfaces.
++
++2. As now for both image and sampler, at compile time, we know the eaxct binding table
++index. We no longer need to get those index from the input argument(curbe) and prepare
++the desc to the architecture register. We can use imm as the desc thus we can save
++4 out of 4 instructions for SampleInstruction and save 2 out of 12 instructions for
++the TypedWriteInstruction.
++
++This patch is also a major prepartion for the get_image_width/height/... functions.
++
++Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
++---
++ backend/src/CMakeLists.txt                 |    2 +
++ backend/src/backend/gen_context.cpp        |   43 ++++++-----------
++ backend/src/backend/gen_encoder.cpp        |   66 ++++++++++++++++++++++----
++ backend/src/backend/gen_encoder.hpp        |   10 ++--
++ backend/src/backend/gen_insn_selection.cpp |   44 +++++++++++-------
++ backend/src/backend/program.cpp            |   33 ++++++++++++-
++ backend/src/backend/program.h              |   25 ++++++++++
++ backend/src/backend/program.hpp            |    9 ++++
++ backend/src/ir/function.cpp                |    1 +
++ backend/src/ir/function.hpp                |    4 ++
++ backend/src/ir/image.cpp                   |   69 ++++++++++++++++++++++++++++
++ backend/src/ir/image.hpp                   |   65 ++++++++++++++++++++++++++
++ backend/src/ir/instruction.hpp             |    7 +++
++ backend/src/ir/sampler.cpp                 |    3 +-
++ backend/src/llvm/llvm_gen_backend.cpp      |    1 +
++ src/cl_command_queue.c                     |   29 +++++++-----
++ src/cl_command_queue.h                     |    3 ++
++ src/cl_command_queue_gen7.c                |    2 +
++ src/cl_driver.h                            |   17 +++----
++ src/cl_kernel.c                            |   20 ++++++++
++ src/cl_kernel.h                            |    2 +
++ src/intel/intel_driver.c                   |    7 ++-
++ src/intel/intel_gpgpu.c                    |   27 ++---------
++ 23 files changed, 383 insertions(+), 106 deletions(-)
++ create mode 100644 backend/src/ir/image.cpp
++ create mode 100644 backend/src/ir/image.hpp
++
++Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:09:16.362021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:09:28.634021436 +0200
++@@ -63,6 +63,8 @@
++     ir/constant.hpp
++     ir/sampler.cpp
++     ir/sampler.hpp
+++    ir/image.cpp
+++    ir/image.hpp
++     ir/instruction.cpp
++     ir/instruction.hpp
++     ir/liveness.cpp
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_context.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_context.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_context.cpp	2013-05-14 20:09:28.634021436 +0200
++@@ -287,47 +287,36 @@
++   void GenContext::emitSampleInstruction(const SelectionInstruction &insn) {
++     const GenRegister dst = ra->genReg(insn.dst(0));
++     const GenRegister msgPayload = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_F);
++-    const GenRegister bti = ra->genReg(insn.src(4));
++-    const GenRegister sampler = ra->genReg(insn.src(5));
++-    const GenRegister ucoord = ra->genReg(insn.src(6));
++-    const GenRegister vcoord = ra->genReg(insn.src(7));
++-    const GenRegister wcoord = ra->genReg(insn.src(8));
++-    const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
++-    const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
+++    const unsigned char bti = insn.extra.function;
+++    const unsigned char sampler = insn.extra.elem;
+++    const GenRegister ucoord = ra->genReg(insn.src(4));
+++    const GenRegister vcoord = ra->genReg(insn.src(5));
+++    const GenRegister wcoord = ra->genReg(insn.src(6));
++     uint32_t simdWidth = p->curr.execWidth;
++     p->push();
++     const uint32_t nr = msgPayload.nr;
++     // prepare mesg desc and move to a0.0.
++     // desc = bti | (sampler << 8) | (0 << 12) | (2 << 16) | (0 << 18) | (0 << 19) | (4 << 20) | (1 << 25) | (0 < 29) | (0 << 31)
++-    p->curr.execWidth = 1;
++-    p->MOV(a0_0, GenRegister::immud((GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE << 12) | (2 << 17)
++-                                    | ((4 * (simdWidth/8)) << 20)
++-                                    | ((2 * (simdWidth/8)) << 25)));
++-    p->SHL(temp, GenRegister::ud1grf(sampler.nr, sampler.subnr/sizeof(float)), GenRegister::immud(8));
++-    p->OR(a0_0, a0_0, temp);
++-    p->OR(a0_0, a0_0, GenRegister::ud1grf(bti.nr, bti.subnr/sizeof(float)));
++-    p->curr.execWidth = simdWidth;
++     /* Prepare message payload. */
++     p->MOV(GenRegister::f8grf(nr , 0), ucoord);
++     p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
++     if (insn.src(8).reg() != 0)
++       p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
++-    p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
+++    p->SAMPLE(dst, msgPayload, false, bti, sampler, simdWidth, -1, 0);
++ 
++     p->pop();
++   }
++ 
++   void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) {
++     const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD);
++-    const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
++-    const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
++-    const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
++-    const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
++-    const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
++-    const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
++-    const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
++-    const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
++-    const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
+++    const GenRegister ucoord = ra->genReg(insn.src(insn.extra.elem));
+++    const GenRegister vcoord = ra->genReg(insn.src(1 + insn.extra.elem));
+++    const GenRegister wcoord = ra->genReg(insn.src(2 + insn.extra.elem));
+++    const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
+++    const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
+++    const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
+++    const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
+++    const unsigned char bti = insn.extra.function;
++ 
++     p->push();
++     uint32_t simdWidth = p->curr.execWidth;
++@@ -339,8 +328,6 @@
++ 
++     // prepare mesg desc and move to a0.0.
++     // desc = bti | (msg_type << 14) | (header_present << 19))
++-    p->MOV(a0_0, GenRegister::immud((GEN_TYPED_WRITE << 14) | (1 << 19) | (9 << 25)));
++-    p->OR(a0_0, a0_0, GenRegister::ud1grf(bti.nr, bti.subnr/sizeof(float)));
++     // prepare header, we need to enable all the 8 planes.
++     p->MOV(GenRegister::ud8grf(nr, 7), GenRegister::immud(0xff));
++     // Typed write only support SIMD8.
++@@ -368,7 +355,7 @@
++       QUARTER_MOV1(nr + 7, B);
++       QUARTER_MOV1(nr + 8, A);
++ #undef QUARTER_MOV
++-      p->TYPED_WRITE(header, a0_0);
+++      p->TYPED_WRITE(header, true, bti);
++     }
++ 
++     p->pop();
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_encoder.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.cpp	2013-05-14 20:09:28.634021436 +0200
++@@ -166,6 +166,39 @@
++   }
++ #endif
++ 
+++  static void setSamplerMessage(GenEncoder *p,
+++                                GenInstruction *insn,
+++                                unsigned char bti,
+++                                unsigned char sampler,
+++                                uint32_t msg_type,
+++                                uint32_t response_length,
+++                                uint32_t msg_length,
+++                                bool header_present,
+++                                uint32_t simd_mode,
+++                                uint32_t return_format)
+++  {
+++     const GenMessageTarget sfid = GEN_SFID_SAMPLER;
+++     setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+++     insn->bits3.sampler_gen7.bti = bti;
+++     insn->bits3.sampler_gen7.sampler = sampler;
+++     insn->bits3.sampler_gen7.msg_type = msg_type;
+++     insn->bits3.sampler_gen7.simd_mode = simd_mode;
+++  }
+++
+++
+++  static void setTypedWriteMessage(GenEncoder *p,
+++                                   GenInstruction *insn,
+++                                   unsigned char bti,
+++                                   unsigned char msg_type,
+++                                   uint32_t msg_length,
+++                                   bool header_present)
+++  {
+++     const GenMessageTarget sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+++     setMessageDescriptor(p, insn, sfid, msg_length, 0, header_present);
+++     insn->bits3.gen7_typed_rw.bti = bti;
+++     insn->bits3.gen7_typed_rw.msg_type = msg_type;
+++  }
+++
++   //////////////////////////////////////////////////////////////////////////
++   // Gen Emitter encoding class
++   //////////////////////////////////////////////////////////////////////////
++@@ -800,31 +833,44 @@
++   }
++ 
++   void GenEncoder::SAMPLE(GenRegister dest,
++-                          GenRegister src0,
++-                          GenRegister src1,
+++                          GenRegister msg,
+++                          bool header_present,
+++                          unsigned char bti,
+++                          unsigned char sampler,
+++                          uint32_t simdWidth,
++                           uint32_t writemask,
++                           uint32_t return_format)
++   {
++      if (writemask == 0) return;
++-
+++     uint32_t msg_type = (simdWidth == 16) ?
+++                            GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE : GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+++     uint32_t response_length = (4 * (simdWidth / 8));
+++     uint32_t msg_length = (2 * (simdWidth / 8));
+++     if (header_present)
+++       msg_length++;
+++     uint32_t simd_mode = (simdWidth == 16) ?
+++                            GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
++      GenInstruction *insn = this->next(GEN_OPCODE_SEND);
++      insn->header.predicate_control = 0; /* XXX */
++      this->setHeader(insn);
++      this->setDst(insn, dest);
++-     this->setSrc0(insn, src0);
++-     this->setSrc1(insn, src1);
++-     insn->header.destreg_or_condmod = GEN_SFID_SAMPLER;
+++     this->setSrc0(insn, msg);
+++     setSamplerMessage(this, insn, bti, sampler, msg_type,
+++                       response_length, msg_length,
+++                       header_present,
+++                       simd_mode, return_format);
++   }
++ 
++-  void GenEncoder::TYPED_WRITE(GenRegister header, GenRegister desc)
+++  void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
++   {
++      GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+++     uint32_t msg_type = GEN_TYPED_WRITE;
+++     uint32_t msg_length = header_present ? 9 : 8;
++      insn->header.predicate_control = 0; /* XXX */
++      this->setHeader(insn);
++      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
++-     this->setSrc0(insn, header);
++-     this->setSrc1(insn, desc);
++-     insn->header.destreg_or_condmod = GEN6_SFID_DATAPORT_RENDER_CACHE;
+++     this->setSrc0(insn, msg);
+++     setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
++   }
++ 
++   void GenEncoder::EOT(uint32_t msg) {
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_encoder.hpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.hpp	2013-05-14 20:09:28.634021436 +0200
++@@ -140,14 +140,18 @@
++     void BYTE_SCATTER(GenRegister src, uint32_t bti, uint32_t elemSize);
++     /*! Send instruction for the sampler */
++     void SAMPLE(GenRegister dest,
++-                GenRegister src0,
++-                GenRegister src1,
+++                GenRegister msg,
+++                bool header_present,
+++                unsigned char bti,
+++                unsigned char sampler,
+++                unsigned int simdWidth,
++                 uint32_t writemask,
++                 uint32_t return_format);
++ 
++     /*! TypedWrite instruction for texture */
++     void TYPED_WRITE(GenRegister header,
++-                     GenRegister desc);
+++                     bool header_present,
+++                     unsigned char bti);
++     /*! Extended math function (2 sources) */
++     void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
++     /*! Extended math function (1 source) */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:25.126021592 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:28.638021436 +0200
++@@ -466,9 +466,9 @@
++     /*! Encode ternary instructions */
++     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
++     /*! Encode sample instructions */
++-    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum);
+++    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler);
++     /*! Encode typed write instructions */
++-    void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum);
+++    void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti);
++     /*! Use custom allocators */
++     GBE_CLASS(Opaque);
++     friend class SelectionBlock;
++@@ -964,8 +964,11 @@
++       this->matchBasicBlock(insnNum);
++     });
++    }
++- /* XXX always 4 return values? */
++-  void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum) {
+++
+++  void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
+++                                 GenRegister *src, uint32_t srcNum,
+++                                 GenRegister *msgPayloads, uint32_t msgNum,
+++                                 uint32_t bti, uint32_t sampler) {
++     SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum + srcNum);
++     SelectionVector *dstVector = this->appendVector();
++     SelectionVector *msgVector = this->appendVector();
++@@ -987,6 +990,9 @@
++     msgVector->regNum = msgNum;
++     msgVector->isSrc = 1;
++     msgVector->reg = &insn->src(0);
+++
+++    insn->extra.function = bti;
+++    insn->extra.elem = sampler;
++   }
++ 
++   ///////////////////////////////////////////////////////////////////////////
++@@ -999,7 +1005,8 @@
++   }
++ 
++   void Selection::Opaque::TYPED_WRITE(GenRegister *src, uint32_t srcNum,
++-                              GenRegister *msgs, uint32_t msgNum) {
+++                                      GenRegister *msgs, uint32_t msgNum,
+++                                      uint32_t bti) {
++     uint32_t elemID = 0;
++     uint32_t i;
++     SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, msgNum + srcNum);
++@@ -1010,6 +1017,7 @@
++     for (i = 0; i < srcNum; ++i, ++elemID)
++       insn->src(elemID) = src[i];
++ 
+++    insn->extra.function = bti;
++     insn->extra.elem = msgNum;
++     // Sends require contiguous allocation
++     msgVector->regNum = msgNum;
++@@ -1965,7 +1973,7 @@
++     {
++       using namespace ir;
++       GenRegister msgPayloads[4];
++-      GenRegister dst[insn.getDstNum()], src[insn.getSrcNum()];
+++      GenRegister dst[insn.getDstNum()], src[insn.getSrcNum() - 2];
++ 
++       for( int i = 0; i < 4; ++i)
++         msgPayloads[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
++@@ -1973,10 +1981,15 @@
++       for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID)
++         dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
++ 
++-      for (uint32_t valueID = 0; valueID < insn.getSrcNum(); ++valueID)
++-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
+++      for (uint32_t valueID = 0; valueID < insn.getSrcNum() - 2; ++valueID)
+++        src[valueID] = sel.selReg(insn.getSrc(valueID + 2), insn.getSrcType());
+++
+++      uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
+++                       (insn.getSrc(SampleInstruction::SURFACE_BTI));
+++      uint32_t sampler = sel.ctx.getFunction().getSamplerSet()->getIdx
+++                           (insn.getSrc(SampleInstruction::SAMPLER_BTI));
++ 
++-      sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum(), msgPayloads, 4);
+++      sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum() - 2, msgPayloads, 4, bti, sampler);
++       return true;
++     }
++     DECL_CTOR(SampleInstruction, 1, 1);
++@@ -1998,17 +2011,16 @@
++       for(uint32_t i = 0; i < msgNum; i++)
++         msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
++ 
++-      // bti always uses TYPE_U32.
++-      src[valueID] = sel.selReg(insn.getSrc(valueID), TYPE_U32);
++-      valueID++;
++       // u, v, w coords should use coord type.
++       for (; valueID < 1 + coordNum; ++valueID)
++-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getCoordType());
+++        src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getCoordType());
++ 
++-      for (; valueID < insn.getSrcNum(); ++valueID)
++-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
+++      for (; (valueID + 1) < insn.getSrcNum(); ++valueID)
+++        src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getSrcType());
++ 
++-      sel.TYPED_WRITE(src, insn.getSrcNum(), msgs, msgNum);
+++      uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
+++                       (insn.getSrc(TypedWriteInstruction::SURFACE_BTI));
+++      sel.TYPED_WRITE(src, insn.getSrcNum() - 1, msgs, msgNum, bti);
++       return true;
++     }
++     DECL_CTOR(TypedWriteInstruction, 1, 1);
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.cpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp	2013-05-14 20:09:28.638021436 +0200
++@@ -49,11 +49,12 @@
++ namespace gbe {
++ 
++   Kernel::Kernel(const std::string &name) :
++-    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL)
+++    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL), imageSet(NULL)
++   {}
++   Kernel::~Kernel(void) {
++     if(ctx) GBE_DELETE(ctx);
++     if(samplerSet) GBE_DELETE(samplerSet);
+++    if(imageSet) GBE_DELETE(imageSet);
++     GBE_SAFE_DELETE_ARRAY(args);
++   }
++   int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
++@@ -92,6 +93,7 @@
++       const std::string &name = pair.first;
++       Kernel *kernel = this->compileKernel(unit, name);
++       kernel->setSamplerSet(pair.second->getSamplerSet());
+++      kernel->setImageSet(pair.second->getImageSet());
++       kernels.insert(std::make_pair(name, kernel));
++     }
++     return true;
++@@ -264,6 +266,27 @@
++     kernel->getSamplerData(samplers);
++   }
++ 
+++  static size_t kernelGetImageSize(gbe_kernel gbeKernel) {
+++    if (gbeKernel == NULL) return 0;
+++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
+++    return kernel->getImageSize();
+++  }
+++
+++  static void kernelGetImageData(gbe_kernel gbeKernel, ImageInfo *images) {
+++    if (gbeKernel == NULL) return;
+++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
+++    kernel->getImageData(images);
+++  }
+++
+++  static uint32_t gbeImageBaseIndex = 0;
+++  static void setImageBaseIndex(uint32_t baseIdx) {
+++     gbeImageBaseIndex = baseIdx;
+++  }
+++
+++  static uint32_t getImageBaseIndex() {
+++    return gbeImageBaseIndex;
+++  }
+++
++   static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
++     return 0u;
++   }
++@@ -293,6 +316,10 @@
++ GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
++ GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL;
++ GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL;
+++GBE_EXPORT_SYMBOL gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL;
+++GBE_EXPORT_SYMBOL gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL;
+++GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL;
+++GBE_EXPORT_SYMBOL gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL;
++ 
++ namespace gbe
++ {
++@@ -322,6 +349,10 @@
++       gbe_kernel_use_slm = gbe::kernelUseSLM;
++       gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize;
++       gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
+++      gbe_kernel_get_image_size = gbe::kernelGetImageSize;
+++      gbe_kernel_get_image_data = gbe::kernelGetImageData;
+++      gbe_get_image_base_index = gbe::getImageBaseIndex;
+++      gbe_set_image_base_index = gbe::setImageBaseIndex;
++       genSetupCallBacks();
++     }
++   };
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.h	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h	2013-05-14 20:09:28.638021436 +0200
++@@ -87,6 +87,31 @@
++   GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
++ };
++ 
+++typedef struct ImageInfo {
+++    int32_t arg_idx;
+++    int32_t idx;
+++    int32_t wSlot;
+++    int32_t hSlot;
+++    int32_t depthSlot;
+++    int32_t dataTypeSlot;
+++    int32_t channelOrderSlot;
+++    int32_t dimOrderSlot;
+++} ImageInfo;
+++
+++typedef void (gbe_set_image_base_index_cb)(uint32_t base_idx);
+++extern gbe_set_image_base_index_cb *gbe_set_image_base_index;
+++
+++typedef uint32_t (gbe_get_image_base_index_cb)();
+++extern gbe_get_image_base_index_cb *gbe_get_image_base_index;
+++
+++/*! Get the size of defined images */
+++typedef size_t (gbe_kernel_get_image_size_cb)(gbe_kernel gbeKernel);
+++extern gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size;
+++
+++/*! Get the content of defined images */
+++typedef void (gbe_kernel_get_image_data_cb)(gbe_kernel gbeKernel, ImageInfo *images);
+++extern gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data;
+++
++ /*! Create a new program from the given source code (zero terminated string) */
++ typedef gbe_program (gbe_program_new_from_source_cb)(const char *source,
++                                                      size_t stringSize,
++Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.hpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp	2013-05-14 20:09:28.638021436 +0200
++@@ -118,6 +118,14 @@
++     size_t getSamplerSize(void) const { return samplerSet->getDataSize(); }
++     /*! Get defined sampler value array */
++     void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); }
+++    /*! Set image set. */
+++    void setImageSet(ir::ImageSet * from) {
+++      imageSet = from;
+++    }
+++    /*! Get defined image size */
+++    size_t getImageSize(void) const { return imageSet->getDataSize(); }
+++    /*! Get defined image value array */
+++    void getImageData(ImageInfo *images) const { imageSet->getData(images); }
++   protected:
++     friend class Context;      //!< Owns the kernels
++     const std::string name;    //!< Kernel name
++@@ -130,6 +138,7 @@
++     bool useSLM;               //!< SLM requires a special HW config
++     Context *ctx;              //!< Save context after compiler to alloc constant buffer curbe
++     ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
+++    ir::ImageSet *imageSet;    //!< Copy from the corresponding function.
++     GBE_CLASS(Kernel);         //!< Use custom allocators
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.cpp	2013-05-14 20:09:16.366021983 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp	2013-05-14 20:09:28.638021436 +0200
++@@ -47,6 +47,7 @@
++   {
++     initProfile(*this);
++     samplerSet = GBE_NEW(SamplerSet);
+++    imageSet = GBE_NEW(ImageSet);
++   }
++ 
++   Function::~Function(void) {
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-14 20:09:26.818021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:28.638021436 +0200
++@@ -29,6 +29,7 @@
++ #include "ir/instruction.hpp"
++ #include "ir/profile.hpp"
++ #include "ir/sampler.hpp"
+++#include "ir/image.hpp"
++ #include "sys/vector.hpp"
++ #include "sys/set.hpp"
++ #include "sys/map.hpp"
++@@ -302,6 +303,8 @@
++     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
++     /*! Get sampler set in this function */
++     SamplerSet* getSamplerSet(void) const {return samplerSet; }
+++    /*! Get image set in this function */
+++    ImageSet* getImageSet(void) const {return imageSet; }
++   private:
++     friend class Context;           //!< Can freely modify a function
++     std::string name;               //!< Function name
++@@ -318,6 +321,7 @@
++     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
++     bool useSLM;                    //!< Is SLM required?
++     SamplerSet *samplerSet;          //!< samplers used in this function.
+++    ImageSet* imageSet;              //!< Image set in this function's arguments..
++     GBE_CLASS(Function);            //!< Use custom allocator
++   };
++ 
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/image.cpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/image.cpp	2013-05-14 20:09:28.638021436 +0200
++@@ -0,0 +1,69 @@
+++/*
+++ * Copyright © 2012 Intel Corporation
+++ *
+++ * This library is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2 of the License, or (at your option) any later version.
+++ *
+++ * This library is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+++ *
+++ */
+++
+++/**
+++ * \file image.cpp
+++ *
+++ */
+++#include "image.hpp"
+++#include "context.hpp"
+++#include "ocl_common_defines.h"
+++#include "backend/program.h"
+++
+++namespace gbe {
+++namespace ir {
+++
+++  void ImageSet::append(Register imageReg, Context *ctx)
+++  {
+++    ir::FunctionArgument *arg =  ctx->getFunction().getArg(imageReg);
+++    GBE_ASSERTM(arg && arg->type == ir::FunctionArgument::IMAGE, "Append an invalid reg to image set.");
+++    GBE_ASSERTM(regMap.find(imageReg) == regMap.end(), "Append the same image reg twice.");
+++
+++    int32_t id = ctx->getFunction().getArgID(arg);
+++    struct ImageInfo *imageInfo = GBE_NEW(struct ImageInfo);
+++    imageInfo->arg_idx = id;
+++    imageInfo->idx = regMap.size() + gbe_get_image_base_index();
+++    imageInfo->wSlot = -1;
+++    imageInfo->hSlot = -1;
+++    imageInfo->depthSlot = -1;
+++    imageInfo->dataTypeSlot = -1;
+++    imageInfo->channelOrderSlot = -1;
+++    imageInfo->dimOrderSlot = -1;
+++
+++    regMap.insert(std::make_pair(imageReg, imageInfo));
+++  }
+++
+++  const uint32_t ImageSet::getIdx(const Register imageReg) const
+++  {
+++    auto it = regMap.find(imageReg);
+++    GBE_ASSERT(it != regMap.end());
+++    return it->second->idx;
+++  }
+++
+++  void ImageSet::getData(struct ImageInfo *imageInfos) const {
+++      for(auto &it : regMap)
+++        imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second;
+++  }
+++
+++  ImageSet::~ImageSet() {
+++    for(auto &it : regMap)
+++      GBE_DELETE(it.second);
+++  }
+++
+++} /* namespace ir */
+++} /* namespace gbe */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/image.hpp
++===================================================================
++--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/image.hpp	2013-05-14 20:09:28.638021436 +0200
++@@ -0,0 +1,65 @@
+++/*
+++ * Copyright © 2012 Intel Corporation
+++ *
+++ * This library is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2 of the License, or (at your option) any later version.
+++ *
+++ * This library is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+++ *
+++ */
+++
+++/**
+++ * \file image.hpp
+++ *
+++ */
+++#ifndef __GBE_IR_IMAGE_HPP__
+++#define __GBE_IR_IMAGE_HPP__
+++
+++#include "ir/register.hpp"
+++#include "sys/map.hpp"
+++
+++extern "C" {
+++  struct ImageInfo;
+++}
+++
+++namespace gbe {
+++namespace ir {
+++
+++  class Context;
+++  /*! An image set is a set of images which are defined in kernel args.
+++   *  We use this set to gather the images here and allocate a unique index
+++   *  for each individual image. And that individual image could be used
+++   *  at backend to identify this image's location.
+++   */
+++  class ImageSet
+++  {
+++  public:
+++    /*! Append an image argument. */
+++    void append(Register imageReg, Context *ctx);
+++    /*! Get the image's index(actual location). */
+++    const uint32_t getIdx(const Register imageReg) const;
+++    size_t getDataSize(void) { return regMap.size(); }
+++    size_t getDataSize(void) const { return regMap.size(); }
+++    void getData(struct ImageInfo *imageInfos) const;
+++    void operator = (const ImageSet& other) {
+++      regMap.insert(other.regMap.begin(), other.regMap.end());
+++    }
+++    ImageSet(const ImageSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
+++    ImageSet() {}
+++    ~ImageSet();
+++  private:
+++    map<Register, struct ImageInfo *> regMap;
+++    GBE_CLASS(ImageSet);
+++  };
+++} /* namespace ir */
+++} /* namespace gbe */
+++
+++#endif /* __GBE_IR_IMAGE_HPP__ */
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.hpp	2013-05-14 20:09:23.482021666 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp	2013-05-14 20:09:28.642021436 +0200
++@@ -297,6 +297,9 @@
++   /*! Store data in an texture */
++   class TypedWriteInstruction : public Instruction {
++   public:
+++    enum {
+++     SURFACE_BTI = 0
+++    };
++     /*! Return true if the given instruction is an instance of this class */
++     static bool isClassOf(const Instruction &insn);
++     Type getSrcType(void) const;
++@@ -306,6 +309,10 @@
++   /*! Load texels from a texture */
++   class SampleInstruction : public Instruction {
++   public:
+++    enum {
+++     SURFACE_BTI = 0,
+++     SAMPLER_BTI = 1
+++    };
++     /*! Return true if the given instruction is an instance of this class */
++     static bool isClassOf(const Instruction &insn);
++     Type getSrcType(void) const;
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.cpp	2013-05-14 20:09:26.818021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:28.642021436 +0200
++@@ -36,12 +36,10 @@
++ 
++   void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) {
++     struct SamplerRegSlot samplerSlot;
++-    // This register is just used as a key.
++     samplerSlot.reg = reg;
++     samplerSlot.slot = samplerMap.size();
++     samplerMap.insert(std::make_pair(key, samplerSlot));
++     regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot));
++-    ctx->LOADI(ir::TYPE_S32, samplerSlot.reg, ctx->newIntegerImmediate(samplerSlot.slot, ir::TYPE_S32));
++   }
++ 
++   Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
++@@ -49,6 +47,7 @@
++     auto it = samplerMap.find(samplerValue);
++     if (it != samplerMap.end())
++         return it->second.reg;
+++    // This register is just used as a key.
++     Register reg = ctx->reg(FAMILY_DWORD);
++     appendReg(reg, samplerValue, ctx);
++     return reg;
++Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:26.818021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:28.642021436 +0200
++@@ -914,6 +914,7 @@
++               break;
++               case ir::IMAGE:
++                 ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
+++                ctx.getFunction().getImageSet()->append(reg, &ctx);
++               break;
++               break;
++               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-14 20:09:26.818021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:28.642021436 +0200
++@@ -99,6 +99,21 @@
++ }
++ 
++ LOCAL cl_int
+++cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
+++{
+++  uint32_t i;
+++  for (i = 0; i < k->image_sz; i++) {
+++    int id = k->images[i].arg_idx;
+++    assert(gbe_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE);
+++    cl_gpgpu_bind_image(queue->gpgpu, k->images[i].idx, k->args[id].mem->bo,
+++                        k->args[id].mem->intel_fmt, k->args[id].mem->type,
+++                        k->args[id].mem->w, k->args[id].mem->h,
+++                        k->args[id].mem->pitch, k->args[id].mem->tiling);
+++  }
+++  return CL_SUCCESS;
+++}
+++
+++LOCAL cl_int
++ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
++ {
++   /* Bind all user buffers (given by clSetKernelArg) */
++@@ -107,20 +122,10 @@
++   for (i = 0; i < k->arg_n; ++i) {
++     uint32_t offset; // location of the address in the curbe
++     arg_type = gbe_kernel_get_arg_type(k->opaque, i);
++-    if (arg_type != GBE_ARG_GLOBAL_PTR &&
++-        arg_type != GBE_ARG_IMAGE &&
++-        arg_type != GBE_ARG_SAMPLER)
+++    if (arg_type != GBE_ARG_GLOBAL_PTR)
++       continue;
++     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
++-    if (arg_type == GBE_ARG_IMAGE) {
++-      uint32_t *curbe_index = (uint32_t*)(k->curbe + offset);
++-      cl_gpgpu_bind_image(queue->gpgpu, curbe_index, k->args[i].mem->bo,
++-                          k->args[i].mem->intel_fmt, k->args[i].mem->type,
++-                          k->args[i].mem->w, k->args[i].mem->h,
++-                          k->args[i].mem->pitch, k->args[i].mem->tiling);
++-    } else if (arg_type == GBE_ARG_SAMPLER) {
++-    } else
++-      cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+++    cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
++   }
++ 
++   return CL_SUCCESS;
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.h	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.h	2013-05-14 20:09:28.642021436 +0200
++@@ -70,6 +70,9 @@
++ /* Bind all the surfaces in the GPGPU state */
++ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
++ 
+++/* Bind all the image surfaces in the GPGPU state */
+++extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel);
+++
++ /*update constant buffer to final curbe */
++ extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
++ #endif /* __CL_COMMAND_QUEUE_H__ */
++Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-14 20:09:26.822021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:28.642021436 +0200
++@@ -224,6 +224,8 @@
++ 
++   /* Bind user buffers */
++   cl_command_queue_bind_surface(queue, ker);
+++  /* Bind user images */
+++  cl_command_queue_bind_image(queue, ker);
++   /* Bind all samplers */
++   cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->sampler_sz);
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/cl_driver.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver.h	2013-05-14 20:09:19.978021822 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_driver.h	2013-05-14 20:09:28.646021435 +0200
++@@ -116,14 +116,15 @@
++ 
++ /* Set a 2d texture */
++ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
++-                                        uint32_t *curbe_index,
++-                                        cl_buffer obj_bo,
++-                                        uint32_t format,
++-                                        uint32_t type,
++-                                        int32_t w,
++-                                        int32_t h,
++-                                        int pitch,
++-                                        cl_gpgpu_tiling tiling);
+++                                      uint32_t id,
+++                                      cl_buffer obj_bo,
+++                                      uint32_t format,
+++                                      uint32_t type,
+++                                      int32_t w,
+++                                      int32_t h,
+++                                      int pitch,
+++                                      cl_gpgpu_tiling tiling);
+++
++ extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image;
++ 
++ /* Setup a stack */
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-14 20:09:26.822021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:28.646021435 +0200
++@@ -56,6 +56,8 @@
++         cl_mem_delete(k->args[i].mem);
++     cl_free(k->args);
++   }
+++  if (k->image_sz)
+++    cl_free(k->images);
++   k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
++   cl_free(k);
++ }
++@@ -208,6 +210,18 @@
++   assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
++   if (k->sampler_sz > 0)
++     gbe_kernel_get_sampler_data(k->opaque, k->samplers);
+++  /* Get image data & size */
+++  k->image_sz = gbe_kernel_get_image_size(k->opaque);
+++  assert(k->sampler_sz <= GEN_MAX_SURFACES);
+++  if (k->image_sz > 0) {
+++    TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
+++    gbe_kernel_get_image_data(k->opaque, k->images);
+++  } else
+++    k->images = NULL;
+++  return;
+++error:
+++  cl_buffer_unreference(k->bo);
+++  k->bo = NULL;
++ }
++ 
++ LOCAL cl_kernel
++@@ -227,8 +241,14 @@
++   to->arg_n = from->arg_n;
++   to->curbe_sz = from->curbe_sz;
++   to->sampler_sz = from->sampler_sz;
+++  to->image_sz = from->image_sz;
++   if (to->sampler_sz)
++     memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
+++  if (to->image_sz) {
+++    TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0])));
+++    memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0]));
+++  } else
+++    to->images = NULL;
++   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
++   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
++ 
++Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-14 20:09:26.822021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:28.646021435 +0200
++@@ -54,6 +54,8 @@
++   size_t curbe_sz;            /* Size of it */
++   uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
++   size_t sampler_sz;          /* sampler size defined in kernel & kernel args. */
+++  struct ImageInfo *images;   /* images defined in kernel args */
+++  size_t image_sz;            /* image count in kernel args */
++   cl_argument *args;          /* To track argument setting */
++   uint32_t arg_n:31;          /* Number of arguments */
++   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
++Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_driver.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_driver.c	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_driver.c	2013-05-14 20:09:28.646021435 +0200
++@@ -369,14 +369,17 @@
++   intel_driver_terminate(driver);
++   intel_driver_delete(driver);
++ }
++-
+++#include "program.h"
++ static intel_driver_t*
++ cl_intel_driver_new(cl_context_prop props)
++ {
++   intel_driver_t *driver = NULL;
++   TRY_ALLOC_NO_ERR (driver, intel_driver_new());
++   intel_driver_open(driver, props);
++-
+++  /* We use the first 2 slots(0,1) for all the bufs.
+++   * Notify the gbe this base index, thus gbe can avoid conflicts
+++   * when it allocates slots for images*/
+++  gbe_set_image_base_index(2);
++ exit:
++   return driver;
++ error:
++Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
++===================================================================
++--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-14 20:09:26.822021517 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:28.646021435 +0200
++@@ -451,25 +451,6 @@
++   heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface);
++ }
++ 
++-static inline unsigned long
++-__fls(unsigned long x)
++-{
++-        asm("bsf %1,%0"
++-            : "=r" (x)
++-            : "rm" (x));
++-        return x;
++-}
++-
++-static int
++-intel_gpgpu_get_free_img_index(intel_gpgpu_t *gpgpu)
++-{
++-  int slot;
++-  assert(~gpgpu->img_bitmap != 0);
++-  slot = __fls(~gpgpu->img_bitmap);
++-  gpgpu->img_bitmap |= (1 << slot);
++-  return slot + gpgpu->img_index_base;
++-}
++-
++ static int
++ intel_get_surface_type(cl_mem_object_type type)
++ {
++@@ -490,7 +471,7 @@
++ 
++ static void
++ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
++-                              uint32_t *curbe_index,
+++                              uint32_t index,
++                               dri_bo* obj_bo,
++                               uint32_t format,
++                               cl_mem_object_type type,
++@@ -499,7 +480,6 @@
++                               int32_t pitch,
++                               int32_t tiling)
++ {
++-  int32_t index = intel_gpgpu_get_free_img_index(gpgpu);
++   surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual;
++   gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
++ 
++@@ -521,7 +501,6 @@
++   }
++   ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
++   intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo);
++-  *curbe_index = index;
++   gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
++ }
++ 
++@@ -544,7 +523,7 @@
++ 
++ static void
++ intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu,
++-                       uint32_t *index,
+++                       uint32_t index,
++                        cl_buffer *obj_bo,
++                        uint32_t format,
++                        cl_mem_object_type type,
++@@ -554,7 +533,7 @@
++                        cl_gpgpu_tiling tiling)
++ {
++   intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, format, type, w, h, pitch, tiling);
++-  assert(*index < GEN_MAX_SURFACES);
+++  assert(index < GEN_MAX_SURFACES);
++ }
++ 
++ static void
diff --cc debian/patches/const64
index 5ce8d29,0000000..79d9524
mode 100644,000000..100644
--- a/debian/patches/const64
+++ b/debian/patches/const64
@@@ -1,24 -1,0 +1,24 @@@
- Index: beignet-0.1+git20130419+9c11c18/backend/src/ir/instruction.cpp
++Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp
 +===================================================================
- --- beignet-0.1+git20130419+9c11c18.orig/backend/src/ir/instruction.cpp	2013-04-19 10:28:49.000000000 +0200
- +++ beignet-0.1+git20130419+9c11c18/backend/src/ir/instruction.cpp	2013-04-19 19:17:43.329603279 +0200
- @@ -591,17 +591,17 @@
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.cpp	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp	2013-05-14 20:07:24.282026980 +0200
++@@ -593,17 +593,17 @@
 +     static const Type madType[] = {TYPE_FLOAT};
 +     static const uint32_t madTypeNum = ARRAY_ELEM_NUM(madType);
 + 
 +-    // TODO add support for 64 bits values
 +     static const Type allButBool[] = {TYPE_S8,  TYPE_U8,
 +                                       TYPE_S16, TYPE_U16,
 +                                       TYPE_S32, TYPE_U32,
 ++                                      TYPE_S64, TYPE_U64,
 +                                       TYPE_FLOAT, TYPE_DOUBLE};
 +     static const uint32_t allButBoolNum = ARRAY_ELEM_NUM(allButBool);
 + 
 +-    // TODO add support for 64 bits values
 +     static const Type logicalType[] = {TYPE_S8,  TYPE_U8,
 +                                        TYPE_S16, TYPE_U16,
 +                                        TYPE_S32, TYPE_U32,
 ++                                       TYPE_S64, TYPE_U64,
 +                                        TYPE_BOOL};
 +     static const uint32_t logicalTypeNum = ARRAY_ELEM_NUM(logicalType);
 + 
diff --cc debian/patches/khronos
index a191933,0000000..37ae544
mode 100644,000000..100644
--- a/debian/patches/khronos
+++ b/debian/patches/khronos
@@@ -1,3100 -1,0 +1,3100 @@@
 +Description: Use Khronos Group headers
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-04-01
 +
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_ext.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_ext.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_ext.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_ext.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_ext.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_ext.h	2013-05-14 20:04:53.670033695 +0200
 +@@ -1,251 +1 @@
 +-/*******************************************************************************
 +- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
 +-/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
 +-
 +-/* cl_ext.h contains OpenCL extensions which don't have external */
 +-/* (OpenGL, D3D) dependencies.                                   */
 +-
 +-#ifndef __CL_EXT_H
 +-#define __CL_EXT_H
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#ifdef __APPLE__
 +-	#include <OpenCL/cl.h>
 +-    #include <AvailabilityMacros.h>
 +-#else
 +-	#include <CL/cl.h>
 +-#endif
 +-
 +-/* cl_khr_fp16 extension - no extension #define since it has no functions  */
 +-#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
 +-
 +-/* Memory object destruction
 +- *
 +- * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
 +- *
 +- * Registers a user callback function that will be called when the memory object is deleted and its resources 
 +- * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback 
 +- * stack associated with memobj. The registered user callback functions are called in the reverse order in 
 +- * which they were registered. The user callback functions are called and then the memory object is deleted 
 +- * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be 
 +- * notified when the memory referenced by host_ptr, specified when the memory object is created and used as 
 +- * the storage bits for the memory object, can be reused or freed.
 +- *
 +- * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
 +- *
 +- * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 +- * before using.
 +- */
 +-#define cl_APPLE_SetMemObjectDestructor 1
 +-cl_int	CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */, 
 +-                                        void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
 +-                                        void * /*user_data */ )             CL_EXT_SUFFIX__VERSION_1_0;  
 +-
 +-
 +-/* Context Logging Functions
 +- *
 +- * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
 +- * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 +- * before using.
 +- *
 +- * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger 
 +- */
 +-#define cl_APPLE_ContextLoggingFunctions 1
 +-extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * /* errstr */, 
 +-                                            const void * /* private_info */, 
 +-                                            size_t       /* cb */, 
 +-                                            void *       /* user_data */ )  CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
 +-extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * /* errstr */, 
 +-                                          const void * /* private_info */, 
 +-                                          size_t       /* cb */, 
 +-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
 +-extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */, 
 +-                                          const void * /* private_info */, 
 +-                                          size_t       /* cb */, 
 +-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
 +-
 +-
 +-/************************ 
 +-* cl_khr_icd extension *                                                  
 +-************************/
 +-#define cl_khr_icd 1
 +-
 +-/* cl_platform_info                                                        */
 +-#define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
 +-
 +-/* Additional Error Codes                                                  */
 +-#define CL_PLATFORM_NOT_FOUND_KHR                   -1001
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
 +-                       cl_platform_id * /* platforms */,
 +-                       cl_uint *        /* num_platforms */);
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
 +-    cl_uint          /* num_entries */,
 +-    cl_platform_id * /* platforms */,
 +-    cl_uint *        /* num_platforms */);
 +-
 +-
 +-/* Extension: cl_khr_image2D_buffer
 +- *
 +- * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
 +- * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
 +- * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
 +- * and 2D images created from a buffer.  Similarly, the write_image built-ins are also supported
 +- * for 2D images created from a buffer.
 +- *
 +- * When the 2D image from buffer is created, the client must specify the width,
 +- * height, image format (i.e. channel order and channel data type) and optionally the row pitch
 +- *
 +- * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
 +- * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
 +- */
 +-    
 +-/*************************************
 +- * cl_khr_initalize_memory extension *
 +- *************************************/
 +-    
 +-#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
 +-    
 +-    
 +-/**************************************
 +- * cl_khr_terminate_context extension *
 +- **************************************/
 +-    
 +-#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
 +-#define CL_CONTEXT_TERMINATE_KHR                    0x2010
 +-
 +-#define cl_khr_terminate_context 1
 +-extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
 +-    
 +-    
 +-/*
 +- * Extension: cl_khr_spir
 +- *
 +- * This extension adds support to create an OpenCL program object from a 
 +- * Standard Portable Intermediate Representation (SPIR) instance
 +- */
 +-
 +-/******************************************
 +-* cl_nv_device_attribute_query extension *
 +-******************************************/
 +-/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
 +-#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
 +-#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
 +-#define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
 +-#define CL_DEVICE_WARP_SIZE_NV                      0x4003
 +-#define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
 +-#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
 +-#define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
 +-
 +-
 +-/*********************************
 +-* cl_amd_device_attribute_query *
 +-*********************************/
 +-#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
 +-
 +-#ifdef CL_VERSION_1_1
 +-   /***********************************
 +-    * cl_ext_device_fission extension *
 +-    ***********************************/
 +-    #define cl_ext_device_fission   1
 +-    
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
 +-    
 +-    typedef CL_API_ENTRY cl_int 
 +-    (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
 +-    
 +-    typedef CL_API_ENTRY cl_int 
 +-    (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    typedef cl_ulong  cl_device_partition_property_ext;
 +-    extern CL_API_ENTRY cl_int CL_API_CALL
 +-    clCreateSubDevicesEXT(  cl_device_id /*in_device*/,
 +-                            const cl_device_partition_property_ext * /* properties */,
 +-                            cl_uint /*num_entries*/,
 +-                            cl_device_id * /*out_devices*/,
 +-                            cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    typedef CL_API_ENTRY cl_int 
 +-    ( CL_API_CALL * clCreateSubDevicesEXT_fn)(  cl_device_id /*in_device*/,
 +-                                                const cl_device_partition_property_ext * /* properties */,
 +-                                                cl_uint /*num_entries*/,
 +-                                                cl_device_id * /*out_devices*/,
 +-                                                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 +-
 +-    /* cl_device_partition_property_ext */
 +-    #define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
 +-    #define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
 +-    #define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
 +-    #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
 +-    
 +-    /* clDeviceGetInfo selectors */
 +-    #define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
 +-    #define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
 +-    #define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
 +-    #define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
 +-    #define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
 +-    
 +-    /* error codes */
 +-    #define CL_DEVICE_PARTITION_FAILED_EXT              -1057
 +-    #define CL_INVALID_PARTITION_COUNT_EXT              -1058
 +-    #define CL_INVALID_PARTITION_NAME_EXT               -1059
 +-    
 +-    /* CL_AFFINITY_DOMAINs */
 +-    #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
 +-    #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
 +-    #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
 +-    #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
 +-    #define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
 +-    #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
 +-    
 +-    /* cl_device_partition_property_ext list terminators */
 +-    #define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
 +-    #define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
 +-    #define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
 +-
 +-
 +-
 +-#endif /* CL_VERSION_1_1 */
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-
 +-#endif /* __CL_EXT_H */
 ++#include_next <CL/cl_ext.h>
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/opencl.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/opencl.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/opencl.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/opencl.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/opencl.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/opencl.h	2013-05-14 20:04:53.674033695 +0200
 +@@ -1,54 +1 @@
 +-/*******************************************************************************
 +- * Copyright (c) 2008-2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
 +-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 +-
 +-#ifndef __OPENCL_H
 +-#define __OPENCL_H
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#ifdef __APPLE__
 +-
 +-#include <OpenCL/cl.h>
 +-#include <OpenCL/cl_gl.h>
 +-#include <OpenCL/cl_gl_ext.h>
 +-#include <OpenCL/cl_ext.h>
 +-
 +-#else
 +-
 +-#include <CL/cl.h>
 +-#include <CL/cl_gl.h>
 +-#include <CL/cl_gl_ext.h>
 +-#include <CL/cl_ext.h>
 +-
 +-#endif
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __OPENCL_H   */
 +-
 ++#include_next <CL/opencl.h>
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_d3d10.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_d3d10.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_d3d10.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_d3d10.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_d3d10.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_d3d10.h	2013-05-14 20:04:53.674033695 +0200
 +@@ -1,126 +1 @@
 +-/**********************************************************************************
 +- * Copyright (c) 2008-2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
 +-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
 +-
 +-#ifndef __OPENCL_CL_D3D10_H
 +-#define __OPENCL_CL_D3D10_H
 +-
 +-#include <d3d10.h>
 +-#include <CL/cl.h>
 +-#include <CL/cl_platform.h>
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-/******************************************************************************
 +- * cl_khr_d3d10_sharing                                                       */
 +-#define cl_khr_d3d10_sharing 1
 +-
 +-typedef cl_uint cl_d3d10_device_source_khr;
 +-typedef cl_uint cl_d3d10_device_set_khr;
 +-
 +-/******************************************************************************/
 +-
 +-// Error Codes
 +-#define CL_INVALID_D3D10_DEVICE_KHR                  -1002
 +-#define CL_INVALID_D3D10_RESOURCE_KHR                -1003
 +-#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR       -1004
 +-#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR           -1005
 +-
 +-// cl_d3d10_device_source_nv
 +-#define CL_D3D10_DEVICE_KHR                          0x4010
 +-#define CL_D3D10_DXGI_ADAPTER_KHR                    0x4011
 +-
 +-// cl_d3d10_device_set_nv
 +-#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR           0x4012
 +-#define CL_ALL_DEVICES_FOR_D3D10_KHR                 0x4013
 +-
 +-// cl_context_info
 +-#define CL_CONTEXT_D3D10_DEVICE_KHR                  0x4014
 +-#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
 +-
 +-// cl_mem_info
 +-#define CL_MEM_D3D10_RESOURCE_KHR                    0x4015
 +-
 +-// cl_image_info
 +-#define CL_IMAGE_D3D10_SUBRESOURCE_KHR               0x4016
 +-
 +-// cl_command_type
 +-#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR         0x4017
 +-#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR         0x4018
 +-
 +-/******************************************************************************/
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
 +-    cl_platform_id             platform,
 +-    cl_d3d10_device_source_khr d3d_device_source,
 +-    void *                     d3d_object,
 +-    cl_d3d10_device_set_khr    d3d_device_set,
 +-    cl_uint                    num_entries,
 +-    cl_device_id *             devices,
 +-    cl_uint *                  num_devices) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
 +-    cl_context     context,
 +-    cl_mem_flags   flags,
 +-    ID3D10Buffer * resource,
 +-    cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
 +-    cl_context        context,
 +-    cl_mem_flags      flags,
 +-    ID3D10Texture2D * resource,
 +-    UINT              subresource,
 +-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
 +-    cl_context        context,
 +-    cl_mem_flags      flags,
 +-    ID3D10Texture3D * resource,
 +-    UINT              subresource,
 +-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
 +-    cl_command_queue command_queue,
 +-    cl_uint          num_objects,
 +-    const cl_mem *   mem_objects,
 +-    cl_uint          num_events_in_wait_list,
 +-    const cl_event * event_wait_list,
 +-    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
 +-    cl_command_queue command_queue,
 +-    cl_uint          num_objects,
 +-    const cl_mem *   mem_objects,
 +-    cl_uint          num_events_in_wait_list,
 +-    const cl_event * event_wait_list,
 +-    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  // __OPENCL_CL_D3D10_H
 +-
 ++#include_next <CL/cl_d3d10.h>
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/cl.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/cl.h	2013-05-14 20:04:53.674033695 +0200
 +@@ -1,1214 +1 @@
 +-/*******************************************************************************
 +- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- ******************************************************************************/
 +-
 +-#ifndef __OPENCL_CL_H
 +-#define __OPENCL_CL_H
 +-
 +-#ifdef __APPLE__
 +-#include <OpenCL/cl_platform.h>
 +-#else
 +-#include <CL/cl_platform.h>
 +-#endif	
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-/******************************************************************************/
 +-
 +-typedef struct _cl_platform_id *    cl_platform_id;
 +-typedef struct _cl_device_id *      cl_device_id;
 +-typedef struct _cl_context *        cl_context;
 +-typedef struct _cl_command_queue *  cl_command_queue;
 +-typedef struct _cl_mem *            cl_mem;
 +-typedef struct _cl_program *        cl_program;
 +-typedef struct _cl_kernel *         cl_kernel;
 +-typedef struct _cl_event *          cl_event;
 +-typedef struct _cl_sampler *        cl_sampler;
 +-
 +-typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 
 +-typedef cl_ulong            cl_bitfield;
 +-typedef cl_bitfield         cl_device_type;
 +-typedef cl_uint             cl_platform_info;
 +-typedef cl_uint             cl_device_info;
 +-typedef cl_bitfield         cl_device_fp_config;
 +-typedef cl_uint             cl_device_mem_cache_type;
 +-typedef cl_uint             cl_device_local_mem_type;
 +-typedef cl_bitfield         cl_device_exec_capabilities;
 +-typedef cl_bitfield         cl_command_queue_properties;
 +-typedef intptr_t            cl_device_partition_property;
 +-typedef cl_bitfield         cl_device_affinity_domain;
 +-
 +-typedef intptr_t            cl_context_properties;
 +-typedef cl_uint             cl_context_info;
 +-typedef cl_uint             cl_command_queue_info;
 +-typedef cl_uint             cl_channel_order;
 +-typedef cl_uint             cl_channel_type;
 +-typedef cl_bitfield         cl_mem_flags;
 +-typedef cl_uint             cl_mem_object_type;
 +-typedef cl_uint             cl_mem_info;
 +-typedef cl_bitfield         cl_mem_migration_flags;
 +-typedef cl_uint             cl_image_info;
 +-typedef cl_uint             cl_buffer_create_type;
 +-typedef cl_uint             cl_addressing_mode;
 +-typedef cl_uint             cl_filter_mode;
 +-typedef cl_uint             cl_sampler_info;
 +-typedef cl_bitfield         cl_map_flags;
 +-typedef cl_uint             cl_program_info;
 +-typedef cl_uint             cl_program_build_info;
 +-typedef cl_uint             cl_program_binary_type;
 +-typedef cl_int              cl_build_status;
 +-typedef cl_uint             cl_kernel_info;
 +-typedef cl_uint             cl_kernel_arg_info;
 +-typedef cl_uint             cl_kernel_arg_address_qualifier;
 +-typedef cl_uint             cl_kernel_arg_access_qualifier;
 +-typedef cl_bitfield         cl_kernel_arg_type_qualifier;
 +-typedef cl_uint             cl_kernel_work_group_info;
 +-typedef cl_uint             cl_event_info;
 +-typedef cl_uint             cl_command_type;
 +-typedef cl_uint             cl_profiling_info;
 +-
 +-
 +-typedef struct _cl_image_format {
 +-    cl_channel_order        image_channel_order;
 +-    cl_channel_type         image_channel_data_type;
 +-} cl_image_format;
 +-
 +-typedef struct _cl_image_desc {
 +-    cl_mem_object_type      image_type;
 +-    size_t                  image_width;
 +-    size_t                  image_height;
 +-    size_t                  image_depth;
 +-    size_t                  image_array_size;
 +-    size_t                  image_row_pitch;
 +-    size_t                  image_slice_pitch;
 +-    cl_uint                 num_mip_levels;
 +-    cl_uint                 num_samples;
 +-    cl_mem                  buffer;
 +-} cl_image_desc;
 +-
 +-typedef struct _cl_buffer_region {
 +-    size_t                  origin;
 +-    size_t                  size;
 +-} cl_buffer_region;
 +-
 +-
 +-/******************************************************************************/
 +-
 +-/* Error Codes */
 +-#define CL_SUCCESS                                  0
 +-#define CL_DEVICE_NOT_FOUND                         -1
 +-#define CL_DEVICE_NOT_AVAILABLE                     -2
 +-#define CL_COMPILER_NOT_AVAILABLE                   -3
 +-#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
 +-#define CL_OUT_OF_RESOURCES                         -5
 +-#define CL_OUT_OF_HOST_MEMORY                       -6
 +-#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
 +-#define CL_MEM_COPY_OVERLAP                         -8
 +-#define CL_IMAGE_FORMAT_MISMATCH                    -9
 +-#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
 +-#define CL_BUILD_PROGRAM_FAILURE                    -11
 +-#define CL_MAP_FAILURE                              -12
 +-#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
 +-#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
 +-#define CL_COMPILE_PROGRAM_FAILURE                  -15
 +-#define CL_LINKER_NOT_AVAILABLE                     -16
 +-#define CL_LINK_PROGRAM_FAILURE                     -17
 +-#define CL_DEVICE_PARTITION_FAILED                  -18
 +-#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
 +-
 +-#define CL_INVALID_VALUE                            -30
 +-#define CL_INVALID_DEVICE_TYPE                      -31
 +-#define CL_INVALID_PLATFORM                         -32
 +-#define CL_INVALID_DEVICE                           -33
 +-#define CL_INVALID_CONTEXT                          -34
 +-#define CL_INVALID_QUEUE_PROPERTIES                 -35
 +-#define CL_INVALID_COMMAND_QUEUE                    -36
 +-#define CL_INVALID_HOST_PTR                         -37
 +-#define CL_INVALID_MEM_OBJECT                       -38
 +-#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
 +-#define CL_INVALID_IMAGE_SIZE                       -40
 +-#define CL_INVALID_SAMPLER                          -41
 +-#define CL_INVALID_BINARY                           -42
 +-#define CL_INVALID_BUILD_OPTIONS                    -43
 +-#define CL_INVALID_PROGRAM                          -44
 +-#define CL_INVALID_PROGRAM_EXECUTABLE               -45
 +-#define CL_INVALID_KERNEL_NAME                      -46
 +-#define CL_INVALID_KERNEL_DEFINITION                -47
 +-#define CL_INVALID_KERNEL                           -48
 +-#define CL_INVALID_ARG_INDEX                        -49
 +-#define CL_INVALID_ARG_VALUE                        -50
 +-#define CL_INVALID_ARG_SIZE                         -51
 +-#define CL_INVALID_KERNEL_ARGS                      -52
 +-#define CL_INVALID_WORK_DIMENSION                   -53
 +-#define CL_INVALID_WORK_GROUP_SIZE                  -54
 +-#define CL_INVALID_WORK_ITEM_SIZE                   -55
 +-#define CL_INVALID_GLOBAL_OFFSET                    -56
 +-#define CL_INVALID_EVENT_WAIT_LIST                  -57
 +-#define CL_INVALID_EVENT                            -58
 +-#define CL_INVALID_OPERATION                        -59
 +-#define CL_INVALID_GL_OBJECT                        -60
 +-#define CL_INVALID_BUFFER_SIZE                      -61
 +-#define CL_INVALID_MIP_LEVEL                        -62
 +-#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
 +-#define CL_INVALID_PROPERTY                         -64
 +-#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
 +-#define CL_INVALID_COMPILER_OPTIONS                 -66
 +-#define CL_INVALID_LINKER_OPTIONS                   -67
 +-#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
 +-
 +-/* OpenCL Version */
 +-#define CL_VERSION_1_0                              1
 +-#define CL_VERSION_1_1                              1
 +-#define CL_VERSION_1_2                              1
 +-
 +-/* cl_bool */
 +-#define CL_FALSE                                    0
 +-#define CL_TRUE                                     1
 +-#define CL_BLOCKING                                 CL_TRUE
 +-#define CL_NON_BLOCKING                             CL_FALSE
 +-
 +-/* cl_platform_info */
 +-#define CL_PLATFORM_PROFILE                         0x0900
 +-#define CL_PLATFORM_VERSION                         0x0901
 +-#define CL_PLATFORM_NAME                            0x0902
 +-#define CL_PLATFORM_VENDOR                          0x0903
 +-#define CL_PLATFORM_EXTENSIONS                      0x0904
 +-
 +-/* cl_device_type - bitfield */
 +-#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
 +-#define CL_DEVICE_TYPE_CPU                          (1 << 1)
 +-#define CL_DEVICE_TYPE_GPU                          (1 << 2)
 +-#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
 +-#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
 +-#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
 +-
 +-/* cl_device_info */
 +-#define CL_DEVICE_TYPE                              0x1000
 +-#define CL_DEVICE_VENDOR_ID                         0x1001
 +-#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
 +-#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
 +-#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
 +-#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
 +-#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
 +-#define CL_DEVICE_ADDRESS_BITS                      0x100D
 +-#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
 +-#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
 +-#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
 +-#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
 +-#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
 +-#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
 +-#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
 +-#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
 +-#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
 +-#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
 +-#define CL_DEVICE_MAX_SAMPLERS                      0x1018
 +-#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
 +-#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
 +-#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
 +-#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
 +-#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
 +-#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
 +-#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
 +-#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
 +-#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
 +-#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
 +-#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
 +-#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
 +-#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
 +-#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
 +-#define CL_DEVICE_AVAILABLE                         0x1027
 +-#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
 +-#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
 +-#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
 +-#define CL_DEVICE_NAME                              0x102B
 +-#define CL_DEVICE_VENDOR                            0x102C
 +-#define CL_DRIVER_VERSION                           0x102D
 +-#define CL_DEVICE_PROFILE                           0x102E
 +-#define CL_DEVICE_VERSION                           0x102F
 +-#define CL_DEVICE_EXTENSIONS                        0x1030
 +-#define CL_DEVICE_PLATFORM                          0x1031
 +-#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
 +-/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
 +-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
 +-#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
 +-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
 +-#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
 +-#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
 +-#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
 +-#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
 +-#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
 +-#define CL_DEVICE_PARENT_DEVICE                     0x1042
 +-#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
 +-#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
 +-#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
 +-#define CL_DEVICE_PARTITION_TYPE                    0x1046
 +-#define CL_DEVICE_REFERENCE_COUNT                   0x1047
 +-#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
 +-#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
 +-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
 +-#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
 +-
 +-/* cl_device_fp_config - bitfield */
 +-#define CL_FP_DENORM                                (1 << 0)
 +-#define CL_FP_INF_NAN                               (1 << 1)
 +-#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
 +-#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
 +-#define CL_FP_ROUND_TO_INF                          (1 << 4)
 +-#define CL_FP_FMA                                   (1 << 5)
 +-#define CL_FP_SOFT_FLOAT                            (1 << 6)
 +-#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
 +-
 +-/* cl_device_mem_cache_type */
 +-#define CL_NONE                                     0x0
 +-#define CL_READ_ONLY_CACHE                          0x1
 +-#define CL_READ_WRITE_CACHE                         0x2
 +-
 +-/* cl_device_local_mem_type */
 +-#define CL_LOCAL                                    0x1
 +-#define CL_GLOBAL                                   0x2
 +-
 +-/* cl_device_exec_capabilities - bitfield */
 +-#define CL_EXEC_KERNEL                              (1 << 0)
 +-#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
 +-
 +-/* cl_command_queue_properties - bitfield */
 +-#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
 +-#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
 +-
 +-/* cl_context_info  */
 +-#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
 +-#define CL_CONTEXT_DEVICES                          0x1081
 +-#define CL_CONTEXT_PROPERTIES                       0x1082
 +-#define CL_CONTEXT_NUM_DEVICES                      0x1083
 +-
 +-/* cl_context_properties */
 +-#define CL_CONTEXT_PLATFORM                         0x1084
 +-#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
 +-    
 +-/* cl_device_partition_property */
 +-#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
 +-#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
 +-#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
 +-#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
 +-    
 +-/* cl_device_affinity_domain */
 +-#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
 +-#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
 +-#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
 +-#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
 +-#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
 +-#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
 +-
 +-/* cl_command_queue_info */
 +-#define CL_QUEUE_CONTEXT                            0x1090
 +-#define CL_QUEUE_DEVICE                             0x1091
 +-#define CL_QUEUE_REFERENCE_COUNT                    0x1092
 +-#define CL_QUEUE_PROPERTIES                         0x1093
 +-
 +-/* cl_mem_flags - bitfield */
 +-#define CL_MEM_READ_WRITE                           (1 << 0)
 +-#define CL_MEM_WRITE_ONLY                           (1 << 1)
 +-#define CL_MEM_READ_ONLY                            (1 << 2)
 +-#define CL_MEM_USE_HOST_PTR                         (1 << 3)
 +-#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
 +-#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
 +-// reserved                                         (1 << 6)    
 +-#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
 +-#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
 +-#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
 +-
 +-/* cl_mem_migration_flags - bitfield */
 +-#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
 +-#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
 +-
 +-/* cl_channel_order */
 +-#define CL_R                                        0x10B0
 +-#define CL_A                                        0x10B1
 +-#define CL_RG                                       0x10B2
 +-#define CL_RA                                       0x10B3
 +-#define CL_RGB                                      0x10B4
 +-#define CL_RGBA                                     0x10B5
 +-#define CL_BGRA                                     0x10B6
 +-#define CL_ARGB                                     0x10B7
 +-#define CL_INTENSITY                                0x10B8
 +-#define CL_LUMINANCE                                0x10B9
 +-#define CL_Rx                                       0x10BA
 +-#define CL_RGx                                      0x10BB
 +-#define CL_RGBx                                     0x10BC
 +-#define CL_DEPTH                                    0x10BD
 +-#define CL_DEPTH_STENCIL                            0x10BE
 +-
 +-/* cl_channel_type */
 +-#define CL_SNORM_INT8                               0x10D0
 +-#define CL_SNORM_INT16                              0x10D1
 +-#define CL_UNORM_INT8                               0x10D2
 +-#define CL_UNORM_INT16                              0x10D3
 +-#define CL_UNORM_SHORT_565                          0x10D4
 +-#define CL_UNORM_SHORT_555                          0x10D5
 +-#define CL_UNORM_INT_101010                         0x10D6
 +-#define CL_SIGNED_INT8                              0x10D7
 +-#define CL_SIGNED_INT16                             0x10D8
 +-#define CL_SIGNED_INT32                             0x10D9
 +-#define CL_UNSIGNED_INT8                            0x10DA
 +-#define CL_UNSIGNED_INT16                           0x10DB
 +-#define CL_UNSIGNED_INT32                           0x10DC
 +-#define CL_HALF_FLOAT                               0x10DD
 +-#define CL_FLOAT                                    0x10DE
 +-#define CL_UNORM_INT24                              0x10DF
 +-
 +-/* cl_mem_object_type */
 +-#define CL_MEM_OBJECT_BUFFER                        0x10F0
 +-#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
 +-#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
 +-#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
 +-#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
 +-#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
 +-#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
 +-
 +-/* cl_mem_info */
 +-#define CL_MEM_TYPE                                 0x1100
 +-#define CL_MEM_FLAGS                                0x1101
 +-#define CL_MEM_SIZE                                 0x1102
 +-#define CL_MEM_HOST_PTR                             0x1103
 +-#define CL_MEM_MAP_COUNT                            0x1104
 +-#define CL_MEM_REFERENCE_COUNT                      0x1105
 +-#define CL_MEM_CONTEXT                              0x1106
 +-#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
 +-#define CL_MEM_OFFSET                               0x1108
 +-
 +-/* cl_image_info */
 +-#define CL_IMAGE_FORMAT                             0x1110
 +-#define CL_IMAGE_ELEMENT_SIZE                       0x1111
 +-#define CL_IMAGE_ROW_PITCH                          0x1112
 +-#define CL_IMAGE_SLICE_PITCH                        0x1113
 +-#define CL_IMAGE_WIDTH                              0x1114
 +-#define CL_IMAGE_HEIGHT                             0x1115
 +-#define CL_IMAGE_DEPTH                              0x1116
 +-#define CL_IMAGE_ARRAY_SIZE                         0x1117
 +-#define CL_IMAGE_BUFFER                             0x1118
 +-#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
 +-#define CL_IMAGE_NUM_SAMPLES                        0x111A
 +-
 +-/* cl_addressing_mode */
 +-#define CL_ADDRESS_NONE                             0x1130
 +-#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
 +-#define CL_ADDRESS_CLAMP                            0x1132
 +-#define CL_ADDRESS_REPEAT                           0x1133
 +-#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
 +-
 +-/* cl_filter_mode */
 +-#define CL_FILTER_NEAREST                           0x1140
 +-#define CL_FILTER_LINEAR                            0x1141
 +-
 +-/* cl_sampler_info */
 +-#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
 +-#define CL_SAMPLER_CONTEXT                          0x1151
 +-#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
 +-#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
 +-#define CL_SAMPLER_FILTER_MODE                      0x1154
 +-
 +-/* cl_map_flags - bitfield */
 +-#define CL_MAP_READ                                 (1 << 0)
 +-#define CL_MAP_WRITE                                (1 << 1)
 +-#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
 +-
 +-/* cl_program_info */
 +-#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
 +-#define CL_PROGRAM_CONTEXT                          0x1161
 +-#define CL_PROGRAM_NUM_DEVICES                      0x1162
 +-#define CL_PROGRAM_DEVICES                          0x1163
 +-#define CL_PROGRAM_SOURCE                           0x1164
 +-#define CL_PROGRAM_BINARY_SIZES                     0x1165
 +-#define CL_PROGRAM_BINARIES                         0x1166
 +-#define CL_PROGRAM_NUM_KERNELS                      0x1167
 +-#define CL_PROGRAM_KERNEL_NAMES                     0x1168
 +-
 +-/* cl_program_build_info */
 +-#define CL_PROGRAM_BUILD_STATUS                     0x1181
 +-#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
 +-#define CL_PROGRAM_BUILD_LOG                        0x1183
 +-#define CL_PROGRAM_BINARY_TYPE                      0x1184
 +-    
 +-/* cl_program_binary_type */
 +-#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
 +-#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
 +-#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
 +-#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
 +-
 +-/* cl_build_status */
 +-#define CL_BUILD_SUCCESS                            0
 +-#define CL_BUILD_NONE                               -1
 +-#define CL_BUILD_ERROR                              -2
 +-#define CL_BUILD_IN_PROGRESS                        -3
 +-
 +-/* cl_kernel_info */
 +-#define CL_KERNEL_FUNCTION_NAME                     0x1190
 +-#define CL_KERNEL_NUM_ARGS                          0x1191
 +-#define CL_KERNEL_REFERENCE_COUNT                   0x1192
 +-#define CL_KERNEL_CONTEXT                           0x1193
 +-#define CL_KERNEL_PROGRAM                           0x1194
 +-#define CL_KERNEL_ATTRIBUTES                        0x1195
 +-
 +-/* cl_kernel_arg_info */
 +-#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
 +-#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
 +-#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
 +-#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
 +-#define CL_KERNEL_ARG_NAME                          0x119A
 +-
 +-/* cl_kernel_arg_address_qualifier */
 +-#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
 +-#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
 +-#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
 +-#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
 +-
 +-/* cl_kernel_arg_access_qualifier */
 +-#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
 +-#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
 +-#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
 +-#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
 +-    
 +-/* cl_kernel_arg_type_qualifer */
 +-#define CL_KERNEL_ARG_TYPE_NONE                     0
 +-#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
 +-#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
 +-#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
 +-
 +-/* cl_kernel_work_group_info */
 +-#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
 +-#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
 +-#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
 +-#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
 +-#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
 +-#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
 +-
 +-/* cl_event_info  */
 +-#define CL_EVENT_COMMAND_QUEUE                      0x11D0
 +-#define CL_EVENT_COMMAND_TYPE                       0x11D1
 +-#define CL_EVENT_REFERENCE_COUNT                    0x11D2
 +-#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
 +-#define CL_EVENT_CONTEXT                            0x11D4
 +-
 +-/* cl_command_type */
 +-#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
 +-#define CL_COMMAND_TASK                             0x11F1
 +-#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
 +-#define CL_COMMAND_READ_BUFFER                      0x11F3
 +-#define CL_COMMAND_WRITE_BUFFER                     0x11F4
 +-#define CL_COMMAND_COPY_BUFFER                      0x11F5
 +-#define CL_COMMAND_READ_IMAGE                       0x11F6
 +-#define CL_COMMAND_WRITE_IMAGE                      0x11F7
 +-#define CL_COMMAND_COPY_IMAGE                       0x11F8
 +-#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
 +-#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
 +-#define CL_COMMAND_MAP_BUFFER                       0x11FB
 +-#define CL_COMMAND_MAP_IMAGE                        0x11FC
 +-#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
 +-#define CL_COMMAND_MARKER                           0x11FE
 +-#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
 +-#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
 +-#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
 +-#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
 +-#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
 +-#define CL_COMMAND_USER                             0x1204
 +-#define CL_COMMAND_BARRIER                          0x1205
 +-#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
 +-#define CL_COMMAND_FILL_BUFFER                      0x1207
 +-#define CL_COMMAND_FILL_IMAGE                       0x1208
 +-
 +-/* command execution status */
 +-#define CL_COMPLETE                                 0x0
 +-#define CL_RUNNING                                  0x1
 +-#define CL_SUBMITTED                                0x2
 +-#define CL_QUEUED                                   0x3
 +-
 +-/* cl_buffer_create_type  */
 +-#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
 +-
 +-/* cl_profiling_info  */
 +-#define CL_PROFILING_COMMAND_QUEUED                 0x1280
 +-#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
 +-#define CL_PROFILING_COMMAND_START                  0x1282
 +-#define CL_PROFILING_COMMAND_END                    0x1283
 +-
 +-/********************************************************************************************************/
 +-
 +-/* Platform API */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetPlatformIDs(cl_uint          /* num_entries */,
 +-                 cl_platform_id * /* platforms */,
 +-                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL 
 +-clGetPlatformInfo(cl_platform_id   /* platform */, 
 +-                  cl_platform_info /* param_name */,
 +-                  size_t           /* param_value_size */, 
 +-                  void *           /* param_value */,
 +-                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Device APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetDeviceIDs(cl_platform_id   /* platform */,
 +-               cl_device_type   /* device_type */, 
 +-               cl_uint          /* num_entries */, 
 +-               cl_device_id *   /* devices */, 
 +-               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetDeviceInfo(cl_device_id    /* device */,
 +-                cl_device_info  /* param_name */, 
 +-                size_t          /* param_value_size */, 
 +-                void *          /* param_value */,
 +-                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-    
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clCreateSubDevices(cl_device_id                         /* in_device */,
 +-                   const cl_device_partition_property * /* properties */,
 +-                   cl_uint                              /* num_devices */,
 +-                   cl_device_id *                       /* out_devices */,
 +-                   cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
 +-    
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
 +-    
 +-/* Context APIs  */
 +-extern CL_API_ENTRY cl_context CL_API_CALL
 +-clCreateContext(const cl_context_properties * /* properties */,
 +-                cl_uint                 /* num_devices */,
 +-                const cl_device_id *    /* devices */,
 +-                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
 +-                void *                  /* user_data */,
 +-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_context CL_API_CALL
 +-clCreateContextFromType(const cl_context_properties * /* properties */,
 +-                        cl_device_type          /* device_type */,
 +-                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
 +-                        void *                  /* user_data */,
 +-                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetContextInfo(cl_context         /* context */, 
 +-                 cl_context_info    /* param_name */, 
 +-                 size_t             /* param_value_size */, 
 +-                 void *             /* param_value */, 
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Command Queue APIs */
 +-extern CL_API_ENTRY cl_command_queue CL_API_CALL
 +-clCreateCommandQueue(cl_context                     /* context */, 
 +-                     cl_device_id                   /* device */, 
 +-                     cl_command_queue_properties    /* properties */,
 +-                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
 +-                      cl_command_queue_info /* param_name */,
 +-                      size_t                /* param_value_size */,
 +-                      void *                /* param_value */,
 +-                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Memory Object APIs */
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateBuffer(cl_context   /* context */,
 +-               cl_mem_flags /* flags */,
 +-               size_t       /* size */,
 +-               void *       /* host_ptr */,
 +-               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateSubBuffer(cl_mem                   /* buffer */,
 +-                  cl_mem_flags             /* flags */,
 +-                  cl_buffer_create_type    /* buffer_create_type */,
 +-                  const void *             /* buffer_create_info */,
 +-                  cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateImage(cl_context              /* context */,
 +-              cl_mem_flags            /* flags */,
 +-              const cl_image_format * /* image_format */,
 +-              const cl_image_desc *   /* image_desc */, 
 +-              void *                  /* host_ptr */,
 +-              cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-                        
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetSupportedImageFormats(cl_context           /* context */,
 +-                           cl_mem_flags         /* flags */,
 +-                           cl_mem_object_type   /* image_type */,
 +-                           cl_uint              /* num_entries */,
 +-                           cl_image_format *    /* image_formats */,
 +-                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
 +-                                    
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetMemObjectInfo(cl_mem           /* memobj */,
 +-                   cl_mem_info      /* param_name */, 
 +-                   size_t           /* param_value_size */,
 +-                   void *           /* param_value */,
 +-                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetImageInfo(cl_mem           /* image */,
 +-               cl_image_info    /* param_name */, 
 +-               size_t           /* param_value_size */,
 +-               void *           /* param_value */,
 +-               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetMemObjectDestructorCallback(  cl_mem /* memobj */, 
 +-                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
 +-                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
 +-
 +-/* Sampler APIs */
 +-extern CL_API_ENTRY cl_sampler CL_API_CALL
 +-clCreateSampler(cl_context          /* context */,
 +-                cl_bool             /* normalized_coords */, 
 +-                cl_addressing_mode  /* addressing_mode */, 
 +-                cl_filter_mode      /* filter_mode */,
 +-                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetSamplerInfo(cl_sampler         /* sampler */,
 +-                 cl_sampler_info    /* param_name */,
 +-                 size_t             /* param_value_size */,
 +-                 void *             /* param_value */,
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-/* Program Object APIs  */
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clCreateProgramWithSource(cl_context        /* context */,
 +-                          cl_uint           /* count */,
 +-                          const char **     /* strings */,
 +-                          const size_t *    /* lengths */,
 +-                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clCreateProgramWithBinary(cl_context                     /* context */,
 +-                          cl_uint                        /* num_devices */,
 +-                          const cl_device_id *           /* device_list */,
 +-                          const size_t *                 /* lengths */,
 +-                          const unsigned char **         /* binaries */,
 +-                          cl_int *                       /* binary_status */,
 +-                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clCreateProgramWithBuiltInKernels(cl_context            /* context */,
 +-                                  cl_uint               /* num_devices */,
 +-                                  const cl_device_id *  /* device_list */,
 +-                                  const char *          /* kernel_names */,
 +-                                  cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clBuildProgram(cl_program           /* program */,
 +-               cl_uint              /* num_devices */,
 +-               const cl_device_id * /* device_list */,
 +-               const char *         /* options */, 
 +-               void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
 +-               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clCompileProgram(cl_program           /* program */,
 +-                 cl_uint              /* num_devices */,
 +-                 const cl_device_id * /* device_list */,
 +-                 const char *         /* options */, 
 +-                 cl_uint              /* num_input_headers */,
 +-                 const cl_program *   /* input_headers */,
 +-                 const char **        /* header_include_names */,
 +-                 void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
 +-                 void *               /* user_data */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_program CL_API_CALL
 +-clLinkProgram(cl_context           /* context */,
 +-              cl_uint              /* num_devices */,
 +-              const cl_device_id * /* device_list */,
 +-              const char *         /* options */, 
 +-              cl_uint              /* num_input_programs */,
 +-              const cl_program *   /* input_programs */,
 +-              void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
 +-              void *               /* user_data */,
 +-              cl_int *             /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetProgramInfo(cl_program         /* program */,
 +-                 cl_program_info    /* param_name */,
 +-                 size_t             /* param_value_size */,
 +-                 void *             /* param_value */,
 +-                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetProgramBuildInfo(cl_program            /* program */,
 +-                      cl_device_id          /* device */,
 +-                      cl_program_build_info /* param_name */,
 +-                      size_t                /* param_value_size */,
 +-                      void *                /* param_value */,
 +-                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-/* Kernel Object APIs */
 +-extern CL_API_ENTRY cl_kernel CL_API_CALL
 +-clCreateKernel(cl_program      /* program */,
 +-               const char *    /* kernel_name */,
 +-               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clCreateKernelsInProgram(cl_program     /* program */,
 +-                         cl_uint        /* num_kernels */,
 +-                         cl_kernel *    /* kernels */,
 +-                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainKernel(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetKernelArg(cl_kernel    /* kernel */,
 +-               cl_uint      /* arg_index */,
 +-               size_t       /* arg_size */,
 +-               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetKernelInfo(cl_kernel       /* kernel */,
 +-                cl_kernel_info  /* param_name */,
 +-                size_t          /* param_value_size */,
 +-                void *          /* param_value */,
 +-                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetKernelArgInfo(cl_kernel       /* kernel */,
 +-                   cl_uint         /* arg_indx */,
 +-                   cl_kernel_arg_info  /* param_name */,
 +-                   size_t          /* param_value_size */,
 +-                   void *          /* param_value */,
 +-                   size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
 +-                         cl_device_id               /* device */,
 +-                         cl_kernel_work_group_info  /* param_name */,
 +-                         size_t                     /* param_value_size */,
 +-                         void *                     /* param_value */,
 +-                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Event Object APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clWaitForEvents(cl_uint             /* num_events */,
 +-                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetEventInfo(cl_event         /* event */,
 +-               cl_event_info    /* param_name */,
 +-               size_t           /* param_value_size */,
 +-               void *           /* param_value */,
 +-               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_event CL_API_CALL
 +-clCreateUserEvent(cl_context    /* context */,
 +-                  cl_int *      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;               
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetUserEventStatus(cl_event   /* event */,
 +-                     cl_int     /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
 +-                     
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clSetEventCallback( cl_event    /* event */,
 +-                    cl_int      /* command_exec_callback_type */,
 +-                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
 +-                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
 +-
 +-/* Profiling APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetEventProfilingInfo(cl_event            /* event */,
 +-                        cl_profiling_info   /* param_name */,
 +-                        size_t              /* param_value_size */,
 +-                        void *              /* param_value */,
 +-                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-                                
 +-/* Flush and Finish APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-/* Enqueued Commands APIs */
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
 +-                    cl_mem              /* buffer */,
 +-                    cl_bool             /* blocking_read */,
 +-                    size_t              /* offset */,
 +-                    size_t              /* size */, 
 +-                    void *              /* ptr */,
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
 +-                        cl_mem              /* buffer */,
 +-                        cl_bool             /* blocking_read */,
 +-                        const size_t *      /* buffer_offset */,
 +-                        const size_t *      /* host_offset */, 
 +-                        const size_t *      /* region */,
 +-                        size_t              /* buffer_row_pitch */,
 +-                        size_t              /* buffer_slice_pitch */,
 +-                        size_t              /* host_row_pitch */,
 +-                        size_t              /* host_slice_pitch */,                        
 +-                        void *              /* ptr */,
 +-                        cl_uint             /* num_events_in_wait_list */,
 +-                        const cl_event *    /* event_wait_list */,
 +-                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteBuffer(cl_command_queue   /* command_queue */, 
 +-                     cl_mem             /* buffer */, 
 +-                     cl_bool            /* blocking_write */, 
 +-                     size_t             /* offset */, 
 +-                     size_t             /* size */, 
 +-                     const void *       /* ptr */, 
 +-                     cl_uint            /* num_events_in_wait_list */, 
 +-                     const cl_event *   /* event_wait_list */, 
 +-                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
 +-                         cl_mem              /* buffer */,
 +-                         cl_bool             /* blocking_write */,
 +-                         const size_t *      /* buffer_offset */,
 +-                         const size_t *      /* host_offset */, 
 +-                         const size_t *      /* region */,
 +-                         size_t              /* buffer_row_pitch */,
 +-                         size_t              /* buffer_slice_pitch */,
 +-                         size_t              /* host_row_pitch */,
 +-                         size_t              /* host_slice_pitch */,                        
 +-                         const void *        /* ptr */,
 +-                         cl_uint             /* num_events_in_wait_list */,
 +-                         const cl_event *    /* event_wait_list */,
 +-                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueFillBuffer(cl_command_queue   /* command_queue */,
 +-                    cl_mem             /* buffer */, 
 +-                    const void *       /* pattern */, 
 +-                    size_t             /* pattern_size */, 
 +-                    size_t             /* offset */, 
 +-                    size_t             /* size */, 
 +-                    cl_uint            /* num_events_in_wait_list */, 
 +-                    const cl_event *   /* event_wait_list */, 
 +-                    cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
 +-                    cl_mem              /* src_buffer */,
 +-                    cl_mem              /* dst_buffer */, 
 +-                    size_t              /* src_offset */,
 +-                    size_t              /* dst_offset */,
 +-                    size_t              /* size */, 
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBufferRect(cl_command_queue    /* command_queue */, 
 +-                        cl_mem              /* src_buffer */,
 +-                        cl_mem              /* dst_buffer */, 
 +-                        const size_t *      /* src_origin */,
 +-                        const size_t *      /* dst_origin */,
 +-                        const size_t *      /* region */, 
 +-                        size_t              /* src_row_pitch */,
 +-                        size_t              /* src_slice_pitch */,
 +-                        size_t              /* dst_row_pitch */,
 +-                        size_t              /* dst_slice_pitch */,
 +-                        cl_uint             /* num_events_in_wait_list */,
 +-                        const cl_event *    /* event_wait_list */,
 +-                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReadImage(cl_command_queue     /* command_queue */,
 +-                   cl_mem               /* image */,
 +-                   cl_bool              /* blocking_read */, 
 +-                   const size_t *       /* origin[3] */,
 +-                   const size_t *       /* region[3] */,
 +-                   size_t               /* row_pitch */,
 +-                   size_t               /* slice_pitch */, 
 +-                   void *               /* ptr */,
 +-                   cl_uint              /* num_events_in_wait_list */,
 +-                   const cl_event *     /* event_wait_list */,
 +-                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueWriteImage(cl_command_queue    /* command_queue */,
 +-                    cl_mem              /* image */,
 +-                    cl_bool             /* blocking_write */, 
 +-                    const size_t *      /* origin[3] */,
 +-                    const size_t *      /* region[3] */,
 +-                    size_t              /* input_row_pitch */,
 +-                    size_t              /* input_slice_pitch */, 
 +-                    const void *        /* ptr */,
 +-                    cl_uint             /* num_events_in_wait_list */,
 +-                    const cl_event *    /* event_wait_list */,
 +-                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueFillImage(cl_command_queue   /* command_queue */,
 +-                   cl_mem             /* image */, 
 +-                   const void *       /* fill_color */, 
 +-                   const size_t *     /* origin[3] */, 
 +-                   const size_t *     /* region[3] */, 
 +-                   cl_uint            /* num_events_in_wait_list */, 
 +-                   const cl_event *   /* event_wait_list */, 
 +-                   cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
 +-                            
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyImage(cl_command_queue     /* command_queue */,
 +-                   cl_mem               /* src_image */,
 +-                   cl_mem               /* dst_image */, 
 +-                   const size_t *       /* src_origin[3] */,
 +-                   const size_t *       /* dst_origin[3] */,
 +-                   const size_t *       /* region[3] */, 
 +-                   cl_uint              /* num_events_in_wait_list */,
 +-                   const cl_event *     /* event_wait_list */,
 +-                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
 +-                           cl_mem           /* src_image */,
 +-                           cl_mem           /* dst_buffer */, 
 +-                           const size_t *   /* src_origin[3] */,
 +-                           const size_t *   /* region[3] */, 
 +-                           size_t           /* dst_offset */,
 +-                           cl_uint          /* num_events_in_wait_list */,
 +-                           const cl_event * /* event_wait_list */,
 +-                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
 +-                           cl_mem           /* src_buffer */,
 +-                           cl_mem           /* dst_image */, 
 +-                           size_t           /* src_offset */,
 +-                           const size_t *   /* dst_origin[3] */,
 +-                           const size_t *   /* region[3] */, 
 +-                           cl_uint          /* num_events_in_wait_list */,
 +-                           const cl_event * /* event_wait_list */,
 +-                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY void * CL_API_CALL
 +-clEnqueueMapBuffer(cl_command_queue /* command_queue */,
 +-                   cl_mem           /* buffer */,
 +-                   cl_bool          /* blocking_map */, 
 +-                   cl_map_flags     /* map_flags */,
 +-                   size_t           /* offset */,
 +-                   size_t           /* size */,
 +-                   cl_uint          /* num_events_in_wait_list */,
 +-                   const cl_event * /* event_wait_list */,
 +-                   cl_event *       /* event */,
 +-                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY void * CL_API_CALL
 +-clEnqueueMapImage(cl_command_queue  /* command_queue */,
 +-                  cl_mem            /* image */, 
 +-                  cl_bool           /* blocking_map */, 
 +-                  cl_map_flags      /* map_flags */, 
 +-                  const size_t *    /* origin[3] */,
 +-                  const size_t *    /* region[3] */,
 +-                  size_t *          /* image_row_pitch */,
 +-                  size_t *          /* image_slice_pitch */,
 +-                  cl_uint           /* num_events_in_wait_list */,
 +-                  const cl_event *  /* event_wait_list */,
 +-                  cl_event *        /* event */,
 +-                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
 +-                        cl_mem           /* memobj */,
 +-                        void *           /* mapped_ptr */,
 +-                        cl_uint          /* num_events_in_wait_list */,
 +-                        const cl_event *  /* event_wait_list */,
 +-                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueMigrateMemObjects(cl_command_queue       /* command_queue */,
 +-                           cl_uint                /* num_mem_objects */,
 +-                           const cl_mem *         /* mem_objects */,
 +-                           cl_mem_migration_flags /* flags */,
 +-                           cl_uint                /* num_events_in_wait_list */,
 +-                           const cl_event *       /* event_wait_list */,
 +-                           cl_event *             /* event */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
 +-                       cl_kernel        /* kernel */,
 +-                       cl_uint          /* work_dim */,
 +-                       const size_t *   /* global_work_offset */,
 +-                       const size_t *   /* global_work_size */,
 +-                       const size_t *   /* local_work_size */,
 +-                       cl_uint          /* num_events_in_wait_list */,
 +-                       const cl_event * /* event_wait_list */,
 +-                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueTask(cl_command_queue  /* command_queue */,
 +-              cl_kernel         /* kernel */,
 +-              cl_uint           /* num_events_in_wait_list */,
 +-              const cl_event *  /* event_wait_list */,
 +-              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
 +-					  void (CL_CALLBACK * /*user_func*/)(void *), 
 +-                      void *            /* args */,
 +-                      size_t            /* cb_args */, 
 +-                      cl_uint           /* num_mem_objects */,
 +-                      const cl_mem *    /* mem_list */,
 +-                      const void **     /* args_mem_loc */,
 +-                      cl_uint           /* num_events_in_wait_list */,
 +-                      const cl_event *  /* event_wait_list */,
 +-                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
 +-                            cl_uint           /* num_events_in_wait_list */,
 +-                            const cl_event *  /* event_wait_list */,
 +-                            cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
 +-                             cl_uint           /* num_events_in_wait_list */,
 +-                             const cl_event *  /* event_wait_list */,
 +-                             cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
 +-
 +-
 +-/* Extension function access
 +- *
 +- * Returns the extension function address for the given function name,
 +- * or NULL if a valid function can not be found.  The client must
 +- * check to make sure the address is not NULL, before using or 
 +- * calling the returned function address.
 +- */
 +-extern CL_API_ENTRY void * CL_API_CALL 
 +-clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
 +-                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
 +-    
 +-
 +-// Deprecated OpenCL 1.1 APIs
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 +-clCreateImage2D(cl_context              /* context */,
 +-                cl_mem_flags            /* flags */,
 +-                const cl_image_format * /* image_format */,
 +-                size_t                  /* image_width */,
 +-                size_t                  /* image_height */,
 +-                size_t                  /* image_row_pitch */, 
 +-                void *                  /* host_ptr */,
 +-                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 +-clCreateImage3D(cl_context              /* context */,
 +-                cl_mem_flags            /* flags */,
 +-                const cl_image_format * /* image_format */,
 +-                size_t                  /* image_width */, 
 +-                size_t                  /* image_height */,
 +-                size_t                  /* image_depth */, 
 +-                size_t                  /* image_row_pitch */, 
 +-                size_t                  /* image_slice_pitch */, 
 +-                void *                  /* host_ptr */,
 +-                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
 +-clEnqueueMarker(cl_command_queue    /* command_queue */,
 +-                cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
 +-clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
 +-                        cl_uint          /* num_events */,
 +-                        const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
 +-clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
 +-clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
 +-clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __OPENCL_CL_H */
 +-
 ++#include_next <CL/cl.h>
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_platform.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_platform.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_platform.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_platform.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_platform.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_platform.h	2013-05-14 20:04:53.678033695 +0200
 +@@ -1,1254 +1 @@
 +-/**********************************************************************************
 +- * Copyright (c) 2008-2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
 +-/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
 +-
 +-#ifndef __CL_PLATFORM_H
 +-#define __CL_PLATFORM_H
 +-
 +-#ifdef __APPLE__
 +-    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
 +-    #include <AvailabilityMacros.h>
 +-#endif
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-#if defined(_WIN32)
 +-    #define CL_API_ENTRY
 +-    #define CL_API_CALL     __stdcall
 +-    #define CL_CALLBACK     __stdcall
 +-#else
 +-    #define CL_API_ENTRY
 +-    #define CL_API_CALL
 +-    #define CL_CALLBACK
 +-#endif
 +-
 +-#ifdef __APPLE__
 +-    #define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))
 +-    #define CL_API_SUFFIX__VERSION_1_0                  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
 +-    #define CL_EXT_SUFFIX__VERSION_1_0                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
 +-    #define CL_API_SUFFIX__VERSION_1_1                  AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-    #define GCL_API_SUFFIX__VERSION_1_1                 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-    #define CL_EXT_SUFFIX__VERSION_1_1                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED       CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
 +-    
 +-    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
 +-        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
 +-        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
 +-        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
 +-        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
 +-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
 +-    #else
 +-        #warning  This path should never happen outside of internal operating system development.  AvailabilityMacros do not function correctly here!
 +-        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
 +-    #endif
 +-#else
 +-    #define CL_EXTENSION_WEAK_LINK  
 +-    #define CL_API_SUFFIX__VERSION_1_0
 +-    #define CL_EXT_SUFFIX__VERSION_1_0
 +-    #define CL_API_SUFFIX__VERSION_1_1
 +-    #define CL_EXT_SUFFIX__VERSION_1_1
 +-    #define CL_API_SUFFIX__VERSION_1_2
 +-    #define CL_EXT_SUFFIX__VERSION_1_2
 +-    
 +-    #ifdef __GNUC__
 +-        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
 +-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
 +-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
 +-        #else
 +-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
 +-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
 +-        #endif
 +-    
 +-        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
 +-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED    
 +-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
 +-        #else
 +-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
 +-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
 +-        #endif
 +-    #elif _WIN32
 +-        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
 +-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED    
 +-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
 +-        #else
 +-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED 
 +-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)     
 +-        #endif
 +-    
 +-        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
 +-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
 +-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
 +-        #else
 +-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED 
 +-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)     
 +-        #endif
 +-    #else
 +-        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
 +-        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
 +-    
 +-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
 +-        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
 +-    #endif
 +-#endif
 +-
 +-#if (defined (_WIN32) && defined(_MSC_VER))
 +-
 +-/* scalar types  */
 +-typedef signed   __int8         cl_char;
 +-typedef unsigned __int8         cl_uchar;
 +-typedef signed   __int16        cl_short;
 +-typedef unsigned __int16        cl_ushort;
 +-typedef signed   __int32        cl_int;
 +-typedef unsigned __int32        cl_uint;
 +-typedef signed   __int64        cl_long;
 +-typedef unsigned __int64        cl_ulong;
 +-
 +-typedef unsigned __int16        cl_half;
 +-typedef float                   cl_float;
 +-typedef double                  cl_double;
 +-
 +-/* Macro names and corresponding values defined by OpenCL */
 +-#define CL_CHAR_BIT         8
 +-#define CL_SCHAR_MAX        127
 +-#define CL_SCHAR_MIN        (-127-1)
 +-#define CL_CHAR_MAX         CL_SCHAR_MAX
 +-#define CL_CHAR_MIN         CL_SCHAR_MIN
 +-#define CL_UCHAR_MAX        255
 +-#define CL_SHRT_MAX         32767
 +-#define CL_SHRT_MIN         (-32767-1)
 +-#define CL_USHRT_MAX        65535
 +-#define CL_INT_MAX          2147483647
 +-#define CL_INT_MIN          (-2147483647-1)
 +-#define CL_UINT_MAX         0xffffffffU
 +-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
 +-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
 +-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
 +-
 +-#define CL_FLT_DIG          6
 +-#define CL_FLT_MANT_DIG     24
 +-#define CL_FLT_MAX_10_EXP   +38
 +-#define CL_FLT_MAX_EXP      +128
 +-#define CL_FLT_MIN_10_EXP   -37
 +-#define CL_FLT_MIN_EXP      -125
 +-#define CL_FLT_RADIX        2
 +-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
 +-#define CL_FLT_MIN          1.175494350822287507969e-38f
 +-#define CL_FLT_EPSILON      0x1.0p-23f
 +-
 +-#define CL_DBL_DIG          15
 +-#define CL_DBL_MANT_DIG     53
 +-#define CL_DBL_MAX_10_EXP   +308
 +-#define CL_DBL_MAX_EXP      +1024
 +-#define CL_DBL_MIN_10_EXP   -307
 +-#define CL_DBL_MIN_EXP      -1021
 +-#define CL_DBL_RADIX        2
 +-#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
 +-#define CL_DBL_MIN          2.225073858507201383090e-308
 +-#define CL_DBL_EPSILON      2.220446049250313080847e-16
 +-
 +-#define  CL_M_E             2.718281828459045090796
 +-#define  CL_M_LOG2E         1.442695040888963387005
 +-#define  CL_M_LOG10E        0.434294481903251816668
 +-#define  CL_M_LN2           0.693147180559945286227
 +-#define  CL_M_LN10          2.302585092994045901094
 +-#define  CL_M_PI            3.141592653589793115998
 +-#define  CL_M_PI_2          1.570796326794896557999
 +-#define  CL_M_PI_4          0.785398163397448278999
 +-#define  CL_M_1_PI          0.318309886183790691216
 +-#define  CL_M_2_PI          0.636619772367581382433
 +-#define  CL_M_2_SQRTPI      1.128379167095512558561
 +-#define  CL_M_SQRT2         1.414213562373095145475
 +-#define  CL_M_SQRT1_2       0.707106781186547572737
 +-
 +-#define  CL_M_E_F           2.71828174591064f
 +-#define  CL_M_LOG2E_F       1.44269502162933f
 +-#define  CL_M_LOG10E_F      0.43429449200630f
 +-#define  CL_M_LN2_F         0.69314718246460f
 +-#define  CL_M_LN10_F        2.30258512496948f
 +-#define  CL_M_PI_F          3.14159274101257f
 +-#define  CL_M_PI_2_F        1.57079637050629f
 +-#define  CL_M_PI_4_F        0.78539818525314f
 +-#define  CL_M_1_PI_F        0.31830987334251f
 +-#define  CL_M_2_PI_F        0.63661974668503f
 +-#define  CL_M_2_SQRTPI_F    1.12837922573090f
 +-#define  CL_M_SQRT2_F       1.41421353816986f
 +-#define  CL_M_SQRT1_2_F     0.70710676908493f
 +-
 +-#define CL_NAN              (CL_INFINITY - CL_INFINITY)
 +-#define CL_HUGE_VALF        ((cl_float) 1e50)
 +-#define CL_HUGE_VAL         ((cl_double) 1e500)
 +-#define CL_MAXFLOAT         CL_FLT_MAX
 +-#define CL_INFINITY         CL_HUGE_VALF
 +-
 +-#else
 +-
 +-#include <stdint.h>
 +-
 +-/* scalar types  */
 +-typedef int8_t          cl_char;
 +-typedef uint8_t         cl_uchar;
 +-typedef int16_t         cl_short    __attribute__((aligned(2)));
 +-typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
 +-typedef int32_t         cl_int      __attribute__((aligned(4)));
 +-typedef uint32_t        cl_uint     __attribute__((aligned(4)));
 +-typedef int64_t         cl_long     __attribute__((aligned(8)));
 +-typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
 +-
 +-typedef uint16_t        cl_half     __attribute__((aligned(2)));
 +-typedef float           cl_float    __attribute__((aligned(4)));
 +-typedef double          cl_double   __attribute__((aligned(8)));
 +-
 +-/* Macro names and corresponding values defined by OpenCL */
 +-#define CL_CHAR_BIT         8
 +-#define CL_SCHAR_MAX        127
 +-#define CL_SCHAR_MIN        (-127-1)
 +-#define CL_CHAR_MAX         CL_SCHAR_MAX
 +-#define CL_CHAR_MIN         CL_SCHAR_MIN
 +-#define CL_UCHAR_MAX        255
 +-#define CL_SHRT_MAX         32767
 +-#define CL_SHRT_MIN         (-32767-1)
 +-#define CL_USHRT_MAX        65535
 +-#define CL_INT_MAX          2147483647
 +-#define CL_INT_MIN          (-2147483647-1)
 +-#define CL_UINT_MAX         0xffffffffU
 +-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
 +-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
 +-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
 +-
 +-#define CL_FLT_DIG          6
 +-#define CL_FLT_MANT_DIG     24
 +-#define CL_FLT_MAX_10_EXP   +38
 +-#define CL_FLT_MAX_EXP      +128
 +-#define CL_FLT_MIN_10_EXP   -37
 +-#define CL_FLT_MIN_EXP      -125
 +-#define CL_FLT_RADIX        2
 +-#define CL_FLT_MAX          0x1.fffffep127f
 +-#define CL_FLT_MIN          0x1.0p-126f
 +-#define CL_FLT_EPSILON      0x1.0p-23f
 +-
 +-#define CL_DBL_DIG          15
 +-#define CL_DBL_MANT_DIG     53
 +-#define CL_DBL_MAX_10_EXP   +308
 +-#define CL_DBL_MAX_EXP      +1024
 +-#define CL_DBL_MIN_10_EXP   -307
 +-#define CL_DBL_MIN_EXP      -1021
 +-#define CL_DBL_RADIX        2
 +-#define CL_DBL_MAX          0x1.fffffffffffffp1023
 +-#define CL_DBL_MIN          0x1.0p-1022
 +-#define CL_DBL_EPSILON      0x1.0p-52
 +-
 +-#define  CL_M_E             2.718281828459045090796
 +-#define  CL_M_LOG2E         1.442695040888963387005
 +-#define  CL_M_LOG10E        0.434294481903251816668
 +-#define  CL_M_LN2           0.693147180559945286227
 +-#define  CL_M_LN10          2.302585092994045901094
 +-#define  CL_M_PI            3.141592653589793115998
 +-#define  CL_M_PI_2          1.570796326794896557999
 +-#define  CL_M_PI_4          0.785398163397448278999
 +-#define  CL_M_1_PI          0.318309886183790691216
 +-#define  CL_M_2_PI          0.636619772367581382433
 +-#define  CL_M_2_SQRTPI      1.128379167095512558561
 +-#define  CL_M_SQRT2         1.414213562373095145475
 +-#define  CL_M_SQRT1_2       0.707106781186547572737
 +-
 +-#define  CL_M_E_F           2.71828174591064f
 +-#define  CL_M_LOG2E_F       1.44269502162933f
 +-#define  CL_M_LOG10E_F      0.43429449200630f
 +-#define  CL_M_LN2_F         0.69314718246460f
 +-#define  CL_M_LN10_F        2.30258512496948f
 +-#define  CL_M_PI_F          3.14159274101257f
 +-#define  CL_M_PI_2_F        1.57079637050629f
 +-#define  CL_M_PI_4_F        0.78539818525314f
 +-#define  CL_M_1_PI_F        0.31830987334251f
 +-#define  CL_M_2_PI_F        0.63661974668503f
 +-#define  CL_M_2_SQRTPI_F    1.12837922573090f
 +-#define  CL_M_SQRT2_F       1.41421353816986f
 +-#define  CL_M_SQRT1_2_F     0.70710676908493f
 +-
 +-#if defined( __GNUC__ )
 +-   #define CL_HUGE_VALF     __builtin_huge_valf()
 +-   #define CL_HUGE_VAL      __builtin_huge_val()
 +-   #define CL_NAN           __builtin_nanf( "" )
 +-#else
 +-   #define CL_HUGE_VALF     ((cl_float) 1e50)
 +-   #define CL_HUGE_VAL      ((cl_double) 1e500)
 +-   float nanf( const char * );
 +-   #define CL_NAN           nanf( "" )  
 +-#endif
 +-#define CL_MAXFLOAT         CL_FLT_MAX
 +-#define CL_INFINITY         CL_HUGE_VALF
 +-
 +-#endif
 +-
 +-#include <stddef.h>
 +-
 +-/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
 +-typedef unsigned int cl_GLuint;
 +-typedef int          cl_GLint;
 +-typedef unsigned int cl_GLenum;
 +-
 +-/*
 +- * Vector types 
 +- *
 +- *  Note:   OpenCL requires that all types be naturally aligned. 
 +- *          This means that vector types must be naturally aligned.
 +- *          For example, a vector of four floats must be aligned to
 +- *          a 16 byte boundary (calculated as 4 * the natural 4-byte 
 +- *          alignment of the float).  The alignment qualifiers here
 +- *          will only function properly if your compiler supports them
 +- *          and if you don't actively work to defeat them.  For example,
 +- *          in order for a cl_float4 to be 16 byte aligned in a struct,
 +- *          the start of the struct must itself be 16-byte aligned. 
 +- *
 +- *          Maintaining proper alignment is the user's responsibility.
 +- */
 +-
 +-/* Define basic vector types */
 +-#if defined( __VEC__ )
 +-   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
 +-   typedef vector unsigned char     __cl_uchar16;
 +-   typedef vector signed char       __cl_char16;
 +-   typedef vector unsigned short    __cl_ushort8;
 +-   typedef vector signed short      __cl_short8;
 +-   typedef vector unsigned int      __cl_uint4;
 +-   typedef vector signed int        __cl_int4;
 +-   typedef vector float             __cl_float4;
 +-   #define  __CL_UCHAR16__  1
 +-   #define  __CL_CHAR16__   1
 +-   #define  __CL_USHORT8__  1
 +-   #define  __CL_SHORT8__   1
 +-   #define  __CL_UINT4__    1
 +-   #define  __CL_INT4__     1
 +-   #define  __CL_FLOAT4__   1
 +-#endif
 +-
 +-#if defined( __SSE__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <xmmintrin.h>
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef float __cl_float4   __attribute__((vector_size(16)));
 +-    #else
 +-        typedef __m128 __cl_float4;
 +-    #endif
 +-    #define __CL_FLOAT4__   1
 +-#endif
 +-
 +-#if defined( __SSE2__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <emmintrin.h>
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
 +-        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
 +-        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
 +-        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
 +-        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
 +-        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
 +-        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
 +-        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
 +-        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
 +-    #else
 +-        typedef __m128i __cl_uchar16;
 +-        typedef __m128i __cl_char16;
 +-        typedef __m128i __cl_ushort8;
 +-        typedef __m128i __cl_short8;
 +-        typedef __m128i __cl_uint4;
 +-        typedef __m128i __cl_int4;
 +-        typedef __m128i __cl_ulong2;
 +-        typedef __m128i __cl_long2;
 +-        typedef __m128d __cl_double2;
 +-    #endif
 +-    #define __CL_UCHAR16__  1
 +-    #define __CL_CHAR16__   1
 +-    #define __CL_USHORT8__  1
 +-    #define __CL_SHORT8__   1
 +-    #define __CL_INT4__     1
 +-    #define __CL_UINT4__    1
 +-    #define __CL_ULONG2__   1
 +-    #define __CL_LONG2__    1
 +-    #define __CL_DOUBLE2__  1
 +-#endif
 +-
 +-#if defined( __MMX__ )
 +-    #include <mmintrin.h>
 +-    #if defined( __GNUC__ )
 +-        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
 +-        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
 +-        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
 +-        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
 +-        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
 +-        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
 +-        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
 +-        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
 +-        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
 +-    #else
 +-        typedef __m64       __cl_uchar8;
 +-        typedef __m64       __cl_char8;
 +-        typedef __m64       __cl_ushort4;
 +-        typedef __m64       __cl_short4;
 +-        typedef __m64       __cl_uint2;
 +-        typedef __m64       __cl_int2;
 +-        typedef __m64       __cl_ulong1;
 +-        typedef __m64       __cl_long1;
 +-        typedef __m64       __cl_float2;
 +-    #endif
 +-    #define __CL_UCHAR8__   1
 +-    #define __CL_CHAR8__    1
 +-    #define __CL_USHORT4__  1
 +-    #define __CL_SHORT4__   1
 +-    #define __CL_INT2__     1
 +-    #define __CL_UINT2__    1
 +-    #define __CL_ULONG1__   1
 +-    #define __CL_LONG1__    1
 +-    #define __CL_FLOAT2__   1
 +-#endif
 +-
 +-#if defined( __AVX__ )
 +-    #if defined( __MINGW64__ )
 +-        #include <intrin.h>
 +-    #else
 +-        #include <immintrin.h> 
 +-    #endif
 +-    #if defined( __GNUC__ )
 +-        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
 +-        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
 +-    #else
 +-        typedef __m256      __cl_float8;
 +-        typedef __m256d     __cl_double4;
 +-    #endif
 +-    #define __CL_FLOAT8__   1
 +-    #define __CL_DOUBLE4__  1
 +-#endif
 +-
 +-/* Define alignment keys */
 +-#if defined( __GNUC__ )
 +-    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
 +-#elif defined( _WIN32) && (_MSC_VER)
 +-    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
 +-    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
 +-    /* #include <crtdefs.h>                                                                                             */
 +-    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
 +-    #define CL_ALIGNED(_x)
 +-#else
 +-   #warning  Need to implement some method to align data here
 +-   #define  CL_ALIGNED(_x)
 +-#endif
 +-
 +-/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-    /* .xyzw and .s0123...{f|F} are supported */
 +-    #define CL_HAS_NAMED_VECTOR_FIELDS 1
 +-    /* .hi and .lo are supported */
 +-    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
 +-#endif
 +-
 +-/* Define cl_vector types */
 +-
 +-/* ---- cl_charn ---- */
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(2) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y; };
 +-   __extension__ struct{ cl_char  s0, s1; };
 +-   __extension__ struct{ cl_char  lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2;
 +-#endif
 +-}cl_char2;
 +-
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(4) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_char2 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[2];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4;
 +-#endif
 +-}cl_char4;
 +-
 +-/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
 +-typedef  cl_char4  cl_char3;
 +-
 +-typedef union
 +-{
 +-    cl_char   CL_ALIGNED(8) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_char4 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[4];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4[2];
 +-#endif
 +-#if defined( __CL_CHAR8__ )
 +-    __cl_char8     v8;
 +-#endif
 +-}cl_char8;
 +-
 +-typedef union
 +-{
 +-    cl_char  CL_ALIGNED(16) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_char8 lo, hi; };
 +-#endif
 +-#if defined( __CL_CHAR2__) 
 +-    __cl_char2     v2[8];
 +-#endif
 +-#if defined( __CL_CHAR4__) 
 +-    __cl_char4     v4[4];
 +-#endif
 +-#if defined( __CL_CHAR8__ )
 +-    __cl_char8     v8[2];
 +-#endif
 +-#if defined( __CL_CHAR16__ )
 +-    __cl_char16    v16;
 +-#endif
 +-}cl_char16;
 +-
 +-
 +-/* ---- cl_ucharn ---- */
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(2) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y; };
 +-   __extension__ struct{ cl_uchar  s0, s1; };
 +-   __extension__ struct{ cl_uchar  lo, hi; };
 +-#endif
 +-#if defined( __cl_uchar2__) 
 +-    __cl_uchar2     v2;
 +-#endif
 +-}cl_uchar2;
 +-
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(4) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_uchar2 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[2];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4;
 +-#endif
 +-}cl_uchar4;
 +-
 +-/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
 +-typedef  cl_uchar4  cl_uchar3;
 +-
 +-typedef union
 +-{
 +-    cl_uchar   CL_ALIGNED(8) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_uchar4 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[4];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4[2];
 +-#endif
 +-#if defined( __CL_UCHAR8__ )
 +-    __cl_uchar8     v8;
 +-#endif
 +-}cl_uchar8;
 +-
 +-typedef union
 +-{
 +-    cl_uchar  CL_ALIGNED(16) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_uchar8 lo, hi; };
 +-#endif
 +-#if defined( __CL_UCHAR2__) 
 +-    __cl_uchar2     v2[8];
 +-#endif
 +-#if defined( __CL_UCHAR4__) 
 +-    __cl_uchar4     v4[4];
 +-#endif
 +-#if defined( __CL_UCHAR8__ )
 +-    __cl_uchar8     v8[2];
 +-#endif
 +-#if defined( __CL_UCHAR16__ )
 +-    __cl_uchar16    v16;
 +-#endif
 +-}cl_uchar16;
 +-
 +-
 +-/* ---- cl_shortn ---- */
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(4) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y; };
 +-   __extension__ struct{ cl_short  s0, s1; };
 +-   __extension__ struct{ cl_short  lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2;
 +-#endif
 +-}cl_short2;
 +-
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(8) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_short2 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[2];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4;
 +-#endif
 +-}cl_short4;
 +-
 +-/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
 +-typedef  cl_short4  cl_short3;
 +-
 +-typedef union
 +-{
 +-    cl_short   CL_ALIGNED(16) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_short4 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[4];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4[2];
 +-#endif
 +-#if defined( __CL_SHORT8__ )
 +-    __cl_short8     v8;
 +-#endif
 +-}cl_short8;
 +-
 +-typedef union
 +-{
 +-    cl_short  CL_ALIGNED(32) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_short8 lo, hi; };
 +-#endif
 +-#if defined( __CL_SHORT2__) 
 +-    __cl_short2     v2[8];
 +-#endif
 +-#if defined( __CL_SHORT4__) 
 +-    __cl_short4     v4[4];
 +-#endif
 +-#if defined( __CL_SHORT8__ )
 +-    __cl_short8     v8[2];
 +-#endif
 +-#if defined( __CL_SHORT16__ )
 +-    __cl_short16    v16;
 +-#endif
 +-}cl_short16;
 +-
 +-
 +-/* ---- cl_ushortn ---- */
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(4) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y; };
 +-   __extension__ struct{ cl_ushort  s0, s1; };
 +-   __extension__ struct{ cl_ushort  lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2;
 +-#endif
 +-}cl_ushort2;
 +-
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(8) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_ushort2 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[2];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4;
 +-#endif
 +-}cl_ushort4;
 +-
 +-/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
 +-typedef  cl_ushort4  cl_ushort3;
 +-
 +-typedef union
 +-{
 +-    cl_ushort   CL_ALIGNED(16) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_ushort4 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[4];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4[2];
 +-#endif
 +-#if defined( __CL_USHORT8__ )
 +-    __cl_ushort8     v8;
 +-#endif
 +-}cl_ushort8;
 +-
 +-typedef union
 +-{
 +-    cl_ushort  CL_ALIGNED(32) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_ushort8 lo, hi; };
 +-#endif
 +-#if defined( __CL_USHORT2__) 
 +-    __cl_ushort2     v2[8];
 +-#endif
 +-#if defined( __CL_USHORT4__) 
 +-    __cl_ushort4     v4[4];
 +-#endif
 +-#if defined( __CL_USHORT8__ )
 +-    __cl_ushort8     v8[2];
 +-#endif
 +-#if defined( __CL_USHORT16__ )
 +-    __cl_ushort16    v16;
 +-#endif
 +-}cl_ushort16;
 +-
 +-/* ---- cl_intn ---- */
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y; };
 +-   __extension__ struct{ cl_int  s0, s1; };
 +-   __extension__ struct{ cl_int  lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2;
 +-#endif
 +-}cl_int2;
 +-
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_int2 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[2];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4;
 +-#endif
 +-}cl_int4;
 +-
 +-/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
 +-typedef  cl_int4  cl_int3;
 +-
 +-typedef union
 +-{
 +-    cl_int   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_int4 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[4];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4[2];
 +-#endif
 +-#if defined( __CL_INT8__ )
 +-    __cl_int8     v8;
 +-#endif
 +-}cl_int8;
 +-
 +-typedef union
 +-{
 +-    cl_int  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_int8 lo, hi; };
 +-#endif
 +-#if defined( __CL_INT2__) 
 +-    __cl_int2     v2[8];
 +-#endif
 +-#if defined( __CL_INT4__) 
 +-    __cl_int4     v4[4];
 +-#endif
 +-#if defined( __CL_INT8__ )
 +-    __cl_int8     v8[2];
 +-#endif
 +-#if defined( __CL_INT16__ )
 +-    __cl_int16    v16;
 +-#endif
 +-}cl_int16;
 +-
 +-
 +-/* ---- cl_uintn ---- */
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y; };
 +-   __extension__ struct{ cl_uint  s0, s1; };
 +-   __extension__ struct{ cl_uint  lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2;
 +-#endif
 +-}cl_uint2;
 +-
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_uint2 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[2];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4;
 +-#endif
 +-}cl_uint4;
 +-
 +-/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
 +-typedef  cl_uint4  cl_uint3;
 +-
 +-typedef union
 +-{
 +-    cl_uint   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_uint4 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[4];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4[2];
 +-#endif
 +-#if defined( __CL_UINT8__ )
 +-    __cl_uint8     v8;
 +-#endif
 +-}cl_uint8;
 +-
 +-typedef union
 +-{
 +-    cl_uint  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_uint8 lo, hi; };
 +-#endif
 +-#if defined( __CL_UINT2__) 
 +-    __cl_uint2     v2[8];
 +-#endif
 +-#if defined( __CL_UINT4__) 
 +-    __cl_uint4     v4[4];
 +-#endif
 +-#if defined( __CL_UINT8__ )
 +-    __cl_uint8     v8[2];
 +-#endif
 +-#if defined( __CL_UINT16__ )
 +-    __cl_uint16    v16;
 +-#endif
 +-}cl_uint16;
 +-
 +-/* ---- cl_longn ---- */
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y; };
 +-   __extension__ struct{ cl_long  s0, s1; };
 +-   __extension__ struct{ cl_long  lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2;
 +-#endif
 +-}cl_long2;
 +-
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_long2 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[2];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4;
 +-#endif
 +-}cl_long4;
 +-
 +-/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
 +-typedef  cl_long4  cl_long3;
 +-
 +-typedef union
 +-{
 +-    cl_long   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_long4 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[4];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4[2];
 +-#endif
 +-#if defined( __CL_LONG8__ )
 +-    __cl_long8     v8;
 +-#endif
 +-}cl_long8;
 +-
 +-typedef union
 +-{
 +-    cl_long  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_long8 lo, hi; };
 +-#endif
 +-#if defined( __CL_LONG2__) 
 +-    __cl_long2     v2[8];
 +-#endif
 +-#if defined( __CL_LONG4__) 
 +-    __cl_long4     v4[4];
 +-#endif
 +-#if defined( __CL_LONG8__ )
 +-    __cl_long8     v8[2];
 +-#endif
 +-#if defined( __CL_LONG16__ )
 +-    __cl_long16    v16;
 +-#endif
 +-}cl_long16;
 +-
 +-
 +-/* ---- cl_ulongn ---- */
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y; };
 +-   __extension__ struct{ cl_ulong  s0, s1; };
 +-   __extension__ struct{ cl_ulong  lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2;
 +-#endif
 +-}cl_ulong2;
 +-
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_ulong2 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[2];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4;
 +-#endif
 +-}cl_ulong4;
 +-
 +-/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
 +-typedef  cl_ulong4  cl_ulong3;
 +-
 +-typedef union
 +-{
 +-    cl_ulong   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_ulong4 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[4];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4[2];
 +-#endif
 +-#if defined( __CL_ULONG8__ )
 +-    __cl_ulong8     v8;
 +-#endif
 +-}cl_ulong8;
 +-
 +-typedef union
 +-{
 +-    cl_ulong  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_ulong8 lo, hi; };
 +-#endif
 +-#if defined( __CL_ULONG2__) 
 +-    __cl_ulong2     v2[8];
 +-#endif
 +-#if defined( __CL_ULONG4__) 
 +-    __cl_ulong4     v4[4];
 +-#endif
 +-#if defined( __CL_ULONG8__ )
 +-    __cl_ulong8     v8[2];
 +-#endif
 +-#if defined( __CL_ULONG16__ )
 +-    __cl_ulong16    v16;
 +-#endif
 +-}cl_ulong16;
 +-
 +-
 +-/* --- cl_floatn ---- */
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(8) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float  x, y; };
 +-   __extension__ struct{ cl_float  s0, s1; };
 +-   __extension__ struct{ cl_float  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2;
 +-#endif
 +-}cl_float2;
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(16) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float   x, y, z, w; };
 +-   __extension__ struct{ cl_float   s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_float2  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[2];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4;
 +-#endif
 +-}cl_float4;
 +-
 +-/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
 +-typedef  cl_float4  cl_float3;
 +-
 +-typedef union
 +-{
 +-    cl_float   CL_ALIGNED(32) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float   x, y, z, w; };
 +-   __extension__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_float4  lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[4];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4[2];
 +-#endif
 +-#if defined( __CL_FLOAT8__ )
 +-    __cl_float8     v8;
 +-#endif
 +-}cl_float8;
 +-
 +-typedef union
 +-{
 +-    cl_float  CL_ALIGNED(64) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_float8 lo, hi; };
 +-#endif
 +-#if defined( __CL_FLOAT2__) 
 +-    __cl_float2     v2[8];
 +-#endif
 +-#if defined( __CL_FLOAT4__) 
 +-    __cl_float4     v4[4];
 +-#endif
 +-#if defined( __CL_FLOAT8__ )
 +-    __cl_float8     v8[2];
 +-#endif
 +-#if defined( __CL_FLOAT16__ )
 +-    __cl_float16    v16;
 +-#endif
 +-}cl_float16;
 +-
 +-/* --- cl_doublen ---- */
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(16) s[2];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y; };
 +-   __extension__ struct{ cl_double s0, s1; };
 +-   __extension__ struct{ cl_double lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2;
 +-#endif
 +-}cl_double2;
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(32) s[4];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3; };
 +-   __extension__ struct{ cl_double2 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[2];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4;
 +-#endif
 +-}cl_double4;
 +-
 +-/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
 +-typedef  cl_double4  cl_double3;
 +-
 +-typedef union
 +-{
 +-    cl_double   CL_ALIGNED(64) s[8];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
 +-   __extension__ struct{ cl_double4 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[4];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4[2];
 +-#endif
 +-#if defined( __CL_DOUBLE8__ )
 +-    __cl_double8     v8;
 +-#endif
 +-}cl_double8;
 +-
 +-typedef union
 +-{
 +-    cl_double  CL_ALIGNED(128) s[16];
 +-#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
 +-   __extension__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
 +-   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
 +-   __extension__ struct{ cl_double8 lo, hi; };
 +-#endif
 +-#if defined( __CL_DOUBLE2__) 
 +-    __cl_double2     v2[8];
 +-#endif
 +-#if defined( __CL_DOUBLE4__) 
 +-    __cl_double4     v4[4];
 +-#endif
 +-#if defined( __CL_DOUBLE8__ )
 +-    __cl_double8     v8[2];
 +-#endif
 +-#if defined( __CL_DOUBLE16__ )
 +-    __cl_double16    v16;
 +-#endif
 +-}cl_double16;
 +-
 +-/* Macro to facilitate debugging 
 +- * Usage:
 +- *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
 +- *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
 +- *   Each line thereafter of OpenCL C source must end with: \n\
 +- *   The last line ends in ";
 +- *
 +- *   Example:
 +- *
 +- *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
 +- *   kernel void foo( int a, float * b )             \n\
 +- *   {                                               \n\
 +- *      // my comment                                \n\
 +- *      *b[ get_global_id(0)] = a;                   \n\
 +- *   }                                               \n\
 +- *   ";
 +- *
 +- * This should correctly set up the line, (column) and file information for your source 
 +- * string so you can do source level debugging.
 +- */
 +-#define  __CL_STRINGIFY( _x )               # _x
 +-#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
 +-#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" 
 +-  
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __CL_PLATFORM_H  */
 ++#include_next <CL/cl_platform.h>
- Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_gl.h
++Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_gl.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_gl.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_gl.h	2013-04-15 18:25:01.036323041 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_gl.h	2013-05-14 20:04:50.338033844 +0200
+++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_gl.h	2013-05-14 20:04:53.678033695 +0200
 +@@ -1,161 +1 @@
 +-/**********************************************************************************
 +- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
 +- *
 +- * Permission is hereby granted, free of charge, to any person obtaining a
 +- * copy of this software and/or associated documentation files (the
 +- * "Materials"), to deal in the Materials without restriction, including
 +- * without limitation the rights to use, copy, modify, merge, publish,
 +- * distribute, sublicense, and/or sell copies of the Materials, and to
 +- * permit persons to whom the Materials are furnished to do so, subject to
 +- * the following conditions:
 +- *
 +- * The above copyright notice and this permission notice shall be included
 +- * in all copies or substantial portions of the Materials.
 +- *
 +- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 +- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 +- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 +- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 +- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 +- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 +- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 +- **********************************************************************************/
 +-
 +-#ifndef __OPENCL_CL_GL_H
 +-#define __OPENCL_CL_GL_H
 +-
 +-#ifdef __APPLE__
 +-#include <OpenCL/cl.h>
 +-#else
 +-#include <CL/cl.h>
 +-#endif	
 +-
 +-#ifdef __cplusplus
 +-extern "C" {
 +-#endif
 +-
 +-typedef cl_uint     cl_gl_object_type;
 +-typedef cl_uint     cl_gl_texture_info;
 +-typedef cl_uint     cl_gl_platform_info;
 +-typedef struct __GLsync *cl_GLsync;
 +-
 +-/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
 +-#define CL_GL_OBJECT_BUFFER                     0x2000
 +-#define CL_GL_OBJECT_TEXTURE2D                  0x2001
 +-#define CL_GL_OBJECT_TEXTURE3D                  0x2002
 +-#define CL_GL_OBJECT_RENDERBUFFER               0x2003
 +-#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
 +-#define CL_GL_OBJECT_TEXTURE1D                  0x200F
 +-#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
 +-#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
 +-
 +-/* cl_gl_texture_info           */
 +-#define CL_GL_TEXTURE_TARGET                    0x2004
 +-#define CL_GL_MIPMAP_LEVEL                      0x2005
 +-
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateFromGLBuffer(cl_context     /* context */,
 +-                     cl_mem_flags   /* flags */,
 +-                     cl_GLuint      /* bufobj */,
 +-                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateFromGLTexture(cl_context      /* context */,
 +-                      cl_mem_flags    /* flags */,
 +-                      cl_GLenum       /* target */,
 +-                      cl_GLint        /* miplevel */,
 +-                      cl_GLuint       /* texture */,
 +-                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
 +-    
 +-extern CL_API_ENTRY cl_mem CL_API_CALL
 +-clCreateFromGLRenderbuffer(cl_context   /* context */,
 +-                           cl_mem_flags /* flags */,
 +-                           cl_GLuint    /* renderbuffer */,
 +-                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLObjectInfo(cl_mem                /* memobj */,
 +-                  cl_gl_object_type *   /* gl_object_type */,
 +-                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
 +-                  
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLTextureInfo(cl_mem               /* memobj */,
 +-                   cl_gl_texture_info   /* param_name */,
 +-                   size_t               /* param_value_size */,
 +-                   void *               /* param_value */,
 +-                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
 +-                          cl_uint               /* num_objects */,
 +-                          const cl_mem *        /* mem_objects */,
 +-                          cl_uint               /* num_events_in_wait_list */,
 +-                          const cl_event *      /* event_wait_list */,
 +-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
 +-                          cl_uint               /* num_objects */,
 +-                          const cl_mem *        /* mem_objects */,
 +-                          cl_uint               /* num_events_in_wait_list */,
 +-                          const cl_event *      /* event_wait_list */,
 +-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 +-
 +-
 +-// Deprecated OpenCL 1.1 APIs
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 +-clCreateFromGLTexture2D(cl_context      /* context */,
 +-                        cl_mem_flags    /* flags */,
 +-                        cl_GLenum       /* target */,
 +-                        cl_GLint        /* miplevel */,
 +-                        cl_GLuint       /* texture */,
 +-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
 +-clCreateFromGLTexture3D(cl_context      /* context */,
 +-                        cl_mem_flags    /* flags */,
 +-                        cl_GLenum       /* target */,
 +-                        cl_GLint        /* miplevel */,
 +-                        cl_GLuint       /* texture */,
 +-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 +-    
 +-/* cl_khr_gl_sharing extension  */
 +-    
 +-#define cl_khr_gl_sharing 1
 +-    
 +-typedef cl_uint     cl_gl_context_info;
 +-    
 +-/* Additional Error Codes  */
 +-#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
 +-    
 +-/* cl_gl_context_info  */
 +-#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 +-#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
 +-    
 +-/* Additional cl_context_properties  */
 +-#define CL_GL_CONTEXT_KHR                       0x2008
 +-#define CL_EGL_DISPLAY_KHR                      0x2009
 +-#define CL_GLX_DISPLAY_KHR                      0x200A
 +-#define CL_WGL_HDC_KHR                          0x200B
 +-#define CL_CGL_SHAREGROUP_KHR                   0x200C
 +-    
 +-extern CL_API_ENTRY cl_int CL_API_CALL
 +-clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
 +-                      cl_gl_context_info            /* param_name */,
 +-                      size_t                        /* param_value_size */,
 +-                      void *                        /* param_value */,
 +-                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 +-    
 +-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
 +-    const cl_context_properties * properties,
 +-    cl_gl_context_info            param_name,
 +-    size_t                        param_value_size,
 +-    void *                        param_value,
 +-    size_t *                      param_value_size_ret);
 +-
 +-#ifdef __cplusplus
 +-}
 +-#endif
 +-
 +-#endif  /* __OPENCL_CL_GL_H */
 ++#include_next <CL/cl_gl.h>
diff --cc debian/patches/missing-header
index dab28b7,0000000..09276d5
mode 100644,000000..100644
--- a/debian/patches/missing-header
+++ b/debian/patches/missing-header
@@@ -1,16 -1,0 +1,16 @@@
 +Description: Fix missing include
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-04-03
 +
- Index: beignet-0.1+git20130418+0546d2e/backend/src/sys/alloc.hpp
++Index: beignet-0.1+git20130514+19e9c58/backend/src/sys/alloc.hpp
 +===================================================================
- --- beignet-0.1+git20130418+0546d2e.orig/backend/src/sys/alloc.hpp	2013-04-18 05:21:35.000000000 +0200
- +++ beignet-0.1+git20130418+0546d2e/backend/src/sys/alloc.hpp	2013-04-18 11:53:00.793430809 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/sys/alloc.hpp	2013-05-14 20:04:48.810033912 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/sys/alloc.hpp	2013-05-14 20:05:43.974031452 +0200
 +@@ -27,6 +27,7 @@
 + #include "sys/platform.hpp"
 + #include "sys/assert.hpp"
 + #include <algorithm>
 ++#include <limits>
 + 
 + namespace gbe
 + {
diff --cc debian/patches/respect-flags
index c7c84ff,0000000..1b17e6c
mode 100644,000000..100644
--- a/debian/patches/respect-flags
+++ b/debian/patches/respect-flags
@@@ -1,105 -1,0 +1,94 @@@
 +Description: Respect CFLAGS/CXXFLAGS from Debian
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-04-16
 +
- Index: beignet-0.1+git20130419+9c11c18/CMakeLists.txt
++Index: beignet-0.1+git20130514+19e9c58/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130419+9c11c18.orig/CMakeLists.txt	2013-04-19 10:28:49.000000000 +0200
- +++ beignet-0.1+git20130419+9c11c18/CMakeLists.txt	2013-04-19 19:05:35.000000000 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/CMakeLists.txt	2013-05-14 20:06:02.014030648 +0200
 +@@ -18,7 +18,6 @@
 + 
 + INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
 + 
 +-SET(CMAKE_VERBOSE_MAKEFILE "false")
 + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake/")
 + SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
 + SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
- @@ -55,8 +54,8 @@
-    ADD_DEFINITIONS(-DUSE_FULSIM=0)
-  ENDIF (USE_FULSIM)
-  
- -SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
- -SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
- +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
- +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
-  
-  # Front end stuff we need
-  #INCLUDE(CMake/FindLLVM.cmake)
- Index: beignet-0.1+git20130419+9c11c18/backend/CMakeLists.txt
++Index: beignet-0.1+git20130514+19e9c58/backend/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130419+9c11c18.orig/backend/CMakeLists.txt	2013-04-19 10:28:49.000000000 +0200
- +++ beignet-0.1+git20130419+9c11c18/backend/CMakeLists.txt	2013-04-19 19:09:27.509625385 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/backend/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/CMakeLists.txt	2013-05-14 20:06:02.014030648 +0200
 +@@ -45,39 +45,39 @@
 + if (COMPILER STREQUAL "GCC")
 +   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -fPIC -Wall")
 +   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  ${LLVM_CFLAGS}")
 +-  set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  -Wno-invalid-offsetof -fno-rtti -std=c++0x")
 ++  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS}  -Wno-invalid-offsetof -fno-rtti -std=c++0x")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-E")
 +-  set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-undefined ${LLVM_LFLAGS}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_CXX_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
 ++  set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined ${LLVM_LFLAGS}")
 ++  set (CMAKE_CXX_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-E")
 +-  set (CMAKE_C_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 + elseif (COMPILER STREQUAL "CLANG")
 +   set (CMAKE_C_COMPILER             "clang")
 +   set (CMAKE_C_FLAGS                "-Wall -std=c99")
 +-  set (CMAKE_C_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_C_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_C_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_C_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_C_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 +   set (CMAKE_CXX_COMPILER             "clang++")
 +   set (CMAKE_CXX_FLAGS "-fstrict-aliasing -msse2 -fPIC -Wall -Wno-format-security -Wno-invalid-offsetof -std=c++0x")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
 +-  set (CMAKE_CXX_FLAGS_RELEASE        "-O2 -DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_DEBUG          "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_MINSIZEREL     "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_RELEASE        "-DNDEBUG -DGBE_DEBUG=0")
 +   set (CMAKE_AR      "/usr/bin/llvm-ar")
 +   set (CMAKE_LINKER  "/usr/bin/llvm-ld")
 +   set (CMAKE_NM      "/usr/bin/llvm-nm")
 +@@ -91,10 +91,10 @@
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
 +   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MODE_FLAG}")
 +-  set (CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DGBE_DEBUG=1")
 +-  set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O2 -DGBE_DEBUG=1")
 +-  set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O2 -DGBE_DEBUG=0")
 +-  set (CCMAKE_CXX_FLAGS_MINSIZEREL "-Os -DGBE_DEBUG=0")
 ++  set (CMAKE_CXX_FLAGS_DEBUG "-DGBE_DEBUG=1")
 ++  set (CCMAKE_CXX_FLAGS_RELWITHDEBINFO "-DGBE_DEBUG=1")
 ++  set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -DGBE_DEBUG=0")
 ++  set (CCMAKE_CXX_FLAGS_MINSIZEREL "-DGBE_DEBUG=0")
 +   set (CMAKE_EXE_LINKER_FLAGS "")
 + endif ()
 + 
diff --cc debian/patches/series
index d1dc3a1,0000000..2d64b61
mode 100644,000000..100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@@ -1,8 -1,0 +1,21 @@@
 +khronos
 +verbose
- implement-gefa
 +missing-header
 +soname
 +respect-flags
- clang-3.0
 +const64
++0001-Generate-all-supported-as_-functions.patch
++0002-Define-all-convert_-functions.patch
++0003-Add-long-and-ulong-types-to-conversions.patch
++0004-Make-libgbm-optional-without-EGL-support.patch
++0005-Define-clamp-value-lower-upper.patch
++0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
++0007-Correct-type-of-device-properties.patch
++0008-Update-gitignore-files.patch
++0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
++0010-CL-Support-kernel-side-defined-samplers.patch
++0011-utests-Add-one-test-cases-for-sampler-support.patch
++0012-GBE-remove-sampler-address-space.patch
++0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
++0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
++0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
diff --cc debian/patches/soname
index 32597fa,0000000..2cca305
mode 100644,000000..100644
--- a/debian/patches/soname
+++ b/debian/patches/soname
@@@ -1,33 -1,0 +1,33 @@@
 +Description: Use proper SONAME
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-04-15
 +
- Index: beignet-0.1+git20130418+0546d2e/backend/src/CMakeLists.txt
++Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130418+0546d2e.orig/backend/src/CMakeLists.txt	2013-04-18 05:21:35.000000000 +0200
- +++ beignet-0.1+git20130418+0546d2e/backend/src/CMakeLists.txt	2013-04-18 11:53:59.985434244 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:04:47.974033949 +0200
+++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
 +@@ -114,7 +114,10 @@
 +                       ${LLVM_MODULE_LIBS}
 +                       ${CMAKE_THREAD_LIBS_INIT}
 +                       ${CMAKE_DL_LIBS})
 +-
 ++set_target_properties(gbe
 ++                        PROPERTIES
 ++                        VERSION 0.1
 ++                        SOVERSION 0)
 + install (TARGETS gbe LIBRARY DESTINATION lib)
 + install (FILES backend/program.h DESTINATION include/gen)
 + 
- Index: beignet-0.1+git20130418+0546d2e/src/CMakeLists.txt
++Index: beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt
 +===================================================================
- --- beignet-0.1+git20130418+0546d2e.orig/src/CMakeLists.txt	2013-04-18 05:21:35.000000000 +0200
- +++ beignet-0.1+git20130418+0546d2e/src/CMakeLists.txt	2013-04-18 11:53:59.985434244 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/src/CMakeLists.txt	2013-05-14 20:04:47.974033949 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
 +@@ -58,4 +58,8 @@
 +                       ${OPENGL_LIBRARIES}
 +                       ${OPTIONAL_EGL_LIBRARY}
 +                       ${GBM_LIBRARY})
 ++set_target_properties(cl
 ++                        PROPERTIES
 ++                        VERSION 0.1
 ++                        SOVERSION 0)
 + install (TARGETS cl LIBRARY DESTINATION lib)
diff --cc debian/patches/verbose
index 033a2be,0000000..ce1a8fc
mode 100644,000000..100644
--- a/debian/patches/verbose
+++ b/debian/patches/verbose
@@@ -1,16 -1,0 +1,16 @@@
 +Description: More verbose errors
 +Author: Simon Richter <sjr at debian.org>
 +Last-Update: 2013-04-01
 +
- Index: beignet-0.0.0+git2013.04.11+e6b503e/src/cl_utils.h
++Index: beignet-0.1+git20130514+19e9c58/src/cl_utils.h
 +===================================================================
- --- beignet-0.0.0+git2013.04.11+e6b503e.orig/src/cl_utils.h	2013-04-12 08:13:48.000000000 +0200
- +++ beignet-0.0.0+git2013.04.11+e6b503e/src/cl_utils.h	2013-04-15 18:28:17.640334450 +0200
++--- beignet-0.1+git20130514+19e9c58.orig/src/cl_utils.h	2013-05-14 20:04:49.846033866 +0200
+++++ beignet-0.1+git20130514+19e9c58/src/cl_utils.h	2013-05-14 20:05:00.822033376 +0200
 +@@ -80,6 +80,7 @@
 + 
 + #define FATAL(...)                                          \
 + do {                                                        \
 ++  fprintf(stderr, "in function %s:\n", __FUNCTION__);       \
 +   fprintf(stderr, "error: ");                               \
 +   fprintf(stderr, __VA_ARGS__);                             \
 +   fprintf(stderr, "\n");                                    \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-devel mailing list