[Pkg-opencl-devel] [beignet] 14/47: Imported Debian patch 0.1+git20130514+19e9c58-1

Fri Oct 31 21:45:47 UTC 2014

This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit e18100e41fde531ede9016f360134c6879977c8d
Author: Simon Richter <sjr at debian.org>
Date:   Tue May 14 20:04:29 2013 +0200

    Imported Debian patch 0.1+git20130514+19e9c58-1
---
 debian/changelog                                   |    7 +
 debian/control                                     |    6 +-
 ...0001-Generate-all-supported-as_-functions.patch | 1283 +++++++++++++++
 .../0002-Define-all-convert_-functions.patch       |  946 +++++++++++
 ...3-Add-long-and-ulong-types-to-conversions.patch | 1719 ++++++++++++++++++++
 ...-Make-libgbm-optional-without-EGL-support.patch |   45 +
 .../0005-Define-clamp-value-lower-upper.patch      |  131 ++
 ...Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch |   58 +
 .../0007-Correct-type-of-device-properties.patch   |   34 +
 debian/patches/0008-Update-gitignore-files.patch   |   65 +
 ...the-sampler-implementation-to-comply-with.patch |  418 +++++
 ...0-CL-Support-kernel-side-defined-samplers.patch |  378 +++++
 ...ts-Add-one-test-cases-for-sampler-support.patch |  150 ++
 .../0012-GBE-remove-sampler-address-space.patch    |   74 +
 ...lar-register-support-in-loadImmInstructio.patch |   41 +
 ...rate-all-samplers-allocation-at-compile-t.patch |  407 +++++
 ...me-Optimize-Sample-TypedWrite-instruction.patch | 1027 ++++++++++++
 debian/patches/clang-3.0                           |   20 -
 debian/patches/const64                             |    8 +-
 debian/patches/implement-gefa                      |   21 -
 debian/patches/khronos                             |   36 +-
 debian/patches/missing-header                      |    6 +-
 debian/patches/respect-flags                       |   23 +-
 debian/patches/series                              |   17 +-
 debian/patches/soname                              |   12 +-
 debian/patches/verbose                             |    6 +-
 26 files changed, 6841 insertions(+), 97 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index d845e58..0c025f5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+beignet (0.1+git20130514+19e9c58-1) experimental; urgency=low
+
+  * New upstream release
+  * Added a number of tentative patches
+
+ -- Simon Richter <sjr at debian.org>  Tue, 14 May 2013 20:04:29 +0200
+
 beignet (0.1+git20130502+63e60ed-1) experimental; urgency=low
 
   * New upstream release
diff --git a/debian/control b/debian/control
index d75231b..c3c69b7 100644
--- a/debian/control
+++ b/debian/control
@@ -1,8 +1,8 @@
 Source: beignet
 Priority: extra
 Maintainer: Simon Richter <sjr at debian.org>
-Build-Depends: debhelper (>= 9), cmake, pkg-config, ocl-icd-dev, ocl-icd-opencl-dev, libdrm-dev, libgbm-dev, libxfixes-dev, libxext-dev, llvm-3.2-dev | llvm-dev (>= 1:3.2)
-Build-Conflicts: libegl1-mesa-dev (<< 9)
+Build-Depends: debhelper (>= 9), cmake, pkg-config, ocl-icd-dev, ocl-icd-opencl-dev, libdrm-dev, libxfixes-dev, libxext-dev, llvm-3.2-dev | llvm-dev (>= 1:3.2)
+Build-Conflicts: libegl1-mesa-dev (<< 9), libgbm-dev
 Standards-Version: 3.9.4
 Section: libs
 Homepage: http://cgit.freedesktop.org/beignet/
@@ -22,7 +22,7 @@ Description: Intel OpenCL library
 Package: beignet0.0.1
 Section: libs
 Architecture: i386 amd64 kfreebsd-i386 kfreebsd-amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, clang (<< 1:3.1)
+Depends: ${shlibs:Depends}, ${misc:Depends}, clang (>= 1:3.1)
 Provides: opencl-icd
 Description: Intel OpenCL library
  OpenCL (Open Computing Language) is a multivendor open standard for
diff --git a/debian/patches/0001-Generate-all-supported-as_-functions.patch b/debian/patches/0001-Generate-all-supported-as_-functions.patch
new file mode 100644
index 0000000..86ad1e7
--- /dev/null
+++ b/debian/patches/0001-Generate-all-supported-as_-functions.patch
@@ -0,0 +1,1283 @@
+From 5cee017bb0148bd253ba1b4b6f986f4e0571e3ac Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Tue, 7 May 2013 15:41:45 +0200
+Subject: [PATCH 01/15] Generate all supported as_* functions
+To: beignet at lists.freedesktop.org
+
+This adds support for all type conversions currently possible.
+
+The conversion functions can be updated by invoking the
+update_conversions.sh script.
+---
+ backend/src/gen_conversions.sh    |   89 +++
+ backend/src/ocl_stdlib.h          | 1138 ++++++++++++++++++++++++++++++++++++-
+ backend/src/update_conversions.sh |   11 +
+ 3 files changed, 1231 insertions(+), 7 deletions(-)
+ create mode 100755 backend/src/gen_conversions.sh
+ create mode 100755 backend/src/update_conversions.sh
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:14.714024732 +0200
+@@ -0,0 +1,89 @@
++#! /bin/sh -e
++
++# Supported base types and their lengths
++TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
++
++# Supported vector lengths
++VECTOR_LENGTHS="1 2 3 4 8 16"
++
++## No user serviceable parts below here
++
++# Generate list of union sizes
++for type in $TYPES; do
++        size=`IFS=:; set -- dummy $type; echo $3`
++        for vector_length in $VECTOR_LENGTHS; do
++                union_sizes="$union_sizes `expr $vector_length \* $size`"
++        done
++done
++union_sizes="`echo $union_sizes | tr ' ' '\n' | sort -n | uniq`"
++
++# For each union size
++for union_size in $union_sizes; do
++
++        # Define an union that contains all vector types that have the same size as the union
++        unionname="union _type_cast_${union_size}_b"
++        echo "$unionname {"
++        for type in $TYPES; do
++                basetype=`IFS=:; set -- dummy $type; echo $2`
++                basesize=`IFS=:; set -- dummy $type; echo $3`
++                for vector_length in $VECTOR_LENGTHS; do
++                        vector_size_in_union="`expr $vector_length \* $basesize`"
++                        if test $union_size -ne $vector_size_in_union; then
++                                continue
++                        fi
++                        if test $vector_length -eq 1; then
++                                vectortype=$basetype
++                        else
++                                vectortype=$basetype$vector_length
++                        fi
++                        echo "  $vectortype _$vectortype;"
++                done
++                
++        done
++        echo "};"
++        echo
++
++        # For each tuple of vector types that has the same size as the current union size,
++        # define an as_* function that converts types without changing binary representation.
++        for ftype in $TYPES; do
++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
++                fbasesize=`IFS=:; set -- dummy $ftype; echo $3`
++                for fvector_length in $VECTOR_LENGTHS; do
++                        fvector_size_in_union="`expr $fvector_length \* $fbasesize`"
++                        if test $union_size -ne $fvector_size_in_union; then
++                                continue
++                        fi
++                        if test $fvector_length -eq 1; then
++                                fvectortype=$fbasetype
++                        else
++                                fvectortype=$fbasetype$fvector_length
++                        fi
++                        for ttype in $TYPES; do
++                                tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
++                                tbasesize=`IFS=:; set -- dummy $ttype; echo $3`
++                                if test $fbasetype = $tbasetype; then
++                                        continue
++                                fi
++                                for tvector_length in $VECTOR_LENGTHS; do
++                                        tvector_size_in_union="`expr $tvector_length \* $tbasesize`"
++                                        if test $union_size -ne $tvector_size_in_union; then
++                                                continue
++                                        fi
++                                        if test $tvector_length -eq 1; then
++                                                tvectortype=$tbasetype
++                                        else
++                                                tvectortype=$tbasetype$tvector_length
++                                        fi
++                                        echo "INLINE OVERLOADABLE $tvectortype as_$tvectortype($fvectortype v) {"
++                                        echo "  $unionname u;"
++                                        echo "  u._$fvectortype = v;"
++                                        echo "  return u._$tvectortype;"
++                                        echo "}"
++                                        echo
++                                done
++                        done
++                done
++                
++        done
++
++done
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:14.718024731 +0200
+@@ -79,18 +79,1142 @@
+ typedef __texture struct _image3d_t* image3d_t;
+ typedef __sampler uint* sampler_t;
+ typedef size_t event_t;
++
+ /////////////////////////////////////////////////////////////////////////////
+ // OpenCL conversions & type casting
+ /////////////////////////////////////////////////////////////////////////////
+-union type_cast_4_b {
+-  float f;
+-  uchar4 u4;
+-};
+-uchar4 INLINE_OVERLOADABLE as_uchar4(float f) {
+-    union type_cast_4_b u;
+-    u.f = f;
+-    return u.u4;
++
++// ##BEGIN_CONVERSIONS##
++union _type_cast_1_b {
++  char _char;
++  uchar _uchar;
++};
++
++INLINE OVERLOADABLE uchar as_uchar(char v) {
++  union _type_cast_1_b u;
++  u._char = v;
++  return u._uchar;
++}
++
++INLINE OVERLOADABLE char as_char(uchar v) {
++  union _type_cast_1_b u;
++  u._uchar = v;
++  return u._char;
++}
++
++union _type_cast_2_b {
++  short _short;
++  ushort _ushort;
++  char2 _char2;
++  uchar2 _uchar2;
++};
++
++INLINE OVERLOADABLE ushort as_ushort(short v) {
++  union _type_cast_2_b u;
++  u._short = v;
++  return u._ushort;
++}
++
++INLINE OVERLOADABLE char2 as_char2(short v) {
++  union _type_cast_2_b u;
++  u._short = v;
++  return u._char2;
++}
++
++INLINE OVERLOADABLE uchar2 as_uchar2(short v) {
++  union _type_cast_2_b u;
++  u._short = v;
++  return u._uchar2;
++}
++
++INLINE OVERLOADABLE short as_short(ushort v) {
++  union _type_cast_2_b u;
++  u._ushort = v;
++  return u._short;
++}
++
++INLINE OVERLOADABLE char2 as_char2(ushort v) {
++  union _type_cast_2_b u;
++  u._ushort = v;
++  return u._char2;
++}
++
++INLINE OVERLOADABLE uchar2 as_uchar2(ushort v) {
++  union _type_cast_2_b u;
++  u._ushort = v;
++  return u._uchar2;
++}
++
++INLINE OVERLOADABLE short as_short(char2 v) {
++  union _type_cast_2_b u;
++  u._char2 = v;
++  return u._short;
++}
++
++INLINE OVERLOADABLE ushort as_ushort(char2 v) {
++  union _type_cast_2_b u;
++  u._char2 = v;
++  return u._ushort;
++}
++
++INLINE OVERLOADABLE uchar2 as_uchar2(char2 v) {
++  union _type_cast_2_b u;
++  u._char2 = v;
++  return u._uchar2;
++}
++
++INLINE OVERLOADABLE short as_short(uchar2 v) {
++  union _type_cast_2_b u;
++  u._uchar2 = v;
++  return u._short;
++}
++
++INLINE OVERLOADABLE ushort as_ushort(uchar2 v) {
++  union _type_cast_2_b u;
++  u._uchar2 = v;
++  return u._ushort;
++}
++
++INLINE OVERLOADABLE char2 as_char2(uchar2 v) {
++  union _type_cast_2_b u;
++  u._uchar2 = v;
++  return u._char2;
++}
++
++union _type_cast_3_b {
++  char3 _char3;
++  uchar3 _uchar3;
++};
++
++INLINE OVERLOADABLE uchar3 as_uchar3(char3 v) {
++  union _type_cast_3_b u;
++  u._char3 = v;
++  return u._uchar3;
++}
++
++INLINE OVERLOADABLE char3 as_char3(uchar3 v) {
++  union _type_cast_3_b u;
++  u._uchar3 = v;
++  return u._char3;
++}
++
++union _type_cast_4_b {
++  int _int;
++  uint _uint;
++  short2 _short2;
++  ushort2 _ushort2;
++  char4 _char4;
++  uchar4 _uchar4;
++  float _float;
++};
++
++INLINE OVERLOADABLE uint as_uint(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE short2 as_short2(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._uchar4;
++}
++
++INLINE OVERLOADABLE float as_float(int v) {
++  union _type_cast_4_b u;
++  u._int = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE short2 as_short2(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._uchar4;
++}
++
++INLINE OVERLOADABLE float as_float(uint v) {
++  union _type_cast_4_b u;
++  u._uint = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE uint as_uint(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._uchar4;
++}
++
++INLINE OVERLOADABLE float as_float(short2 v) {
++  union _type_cast_4_b u;
++  u._short2 = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE uint as_uint(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE short2 as_short2(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._uchar4;
++}
++
++INLINE OVERLOADABLE float as_float(ushort2 v) {
++  union _type_cast_4_b u;
++  u._ushort2 = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE uint as_uint(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE short2 as_short2(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._uchar4;
++}
++
++INLINE OVERLOADABLE float as_float(char4 v) {
++  union _type_cast_4_b u;
++  u._char4 = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE uint as_uint(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE short2 as_short2(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE float as_float(uchar4 v) {
++  union _type_cast_4_b u;
++  u._uchar4 = v;
++  return u._float;
++}
++
++INLINE OVERLOADABLE int as_int(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._int;
++}
++
++INLINE OVERLOADABLE uint as_uint(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._uint;
++}
++
++INLINE OVERLOADABLE short2 as_short2(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._short2;
++}
++
++INLINE OVERLOADABLE ushort2 as_ushort2(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._ushort2;
++}
++
++INLINE OVERLOADABLE char4 as_char4(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._char4;
++}
++
++INLINE OVERLOADABLE uchar4 as_uchar4(float v) {
++  union _type_cast_4_b u;
++  u._float = v;
++  return u._uchar4;
++}
++
++union _type_cast_6_b {
++  short3 _short3;
++  ushort3 _ushort3;
++};
++
++INLINE OVERLOADABLE ushort3 as_ushort3(short3 v) {
++  union _type_cast_6_b u;
++  u._short3 = v;
++  return u._ushort3;
++}
++
++INLINE OVERLOADABLE short3 as_short3(ushort3 v) {
++  union _type_cast_6_b u;
++  u._ushort3 = v;
++  return u._short3;
++}
++
++union _type_cast_8_b {
++  int2 _int2;
++  uint2 _uint2;
++  short4 _short4;
++  ushort4 _ushort4;
++  char8 _char8;
++  uchar8 _uchar8;
++  float2 _float2;
++};
++
++INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE int2 as_int2(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._uchar8;
++}
++
++union _type_cast_12_b {
++  int3 _int3;
++  uint3 _uint3;
++  float3 _float3;
++};
++
++INLINE OVERLOADABLE uint3 as_uint3(int3 v) {
++  union _type_cast_12_b u;
++  u._int3 = v;
++  return u._uint3;
++}
++
++INLINE OVERLOADABLE float3 as_float3(int3 v) {
++  union _type_cast_12_b u;
++  u._int3 = v;
++  return u._float3;
++}
++
++INLINE OVERLOADABLE int3 as_int3(uint3 v) {
++  union _type_cast_12_b u;
++  u._uint3 = v;
++  return u._int3;
++}
++
++INLINE OVERLOADABLE float3 as_float3(uint3 v) {
++  union _type_cast_12_b u;
++  u._uint3 = v;
++  return u._float3;
++}
++
++INLINE OVERLOADABLE int3 as_int3(float3 v) {
++  union _type_cast_12_b u;
++  u._float3 = v;
++  return u._int3;
++}
++
++INLINE OVERLOADABLE uint3 as_uint3(float3 v) {
++  union _type_cast_12_b u;
++  u._float3 = v;
++  return u._uint3;
++}
++
++union _type_cast_16_b {
++  int4 _int4;
++  uint4 _uint4;
++  short8 _short8;
++  ushort8 _ushort8;
++  char16 _char16;
++  uchar16 _uchar16;
++  float4 _float4;
++};
++
++INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE int4 as_int4(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._uchar16;
++}
++
++union _type_cast_32_b {
++  int8 _int8;
++  uint8 _uint8;
++  short16 _short16;
++  ushort16 _ushort16;
++  float8 _float8;
++};
++
++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE int8 as_int8(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE int8 as_int8(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE int8 as_int8(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._ushort16;
++}
++
++union _type_cast_64_b {
++  int16 _int16;
++  uint16 _uint16;
++  float16 _float16;
++};
++
++INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
++  union _type_cast_64_b u;
++  u._int16 = v;
++  return u._uint16;
++}
++
++INLINE OVERLOADABLE float16 as_float16(int16 v) {
++  union _type_cast_64_b u;
++  u._int16 = v;
++  return u._float16;
++}
++
++INLINE OVERLOADABLE int16 as_int16(uint16 v) {
++  union _type_cast_64_b u;
++  u._uint16 = v;
++  return u._int16;
++}
++
++INLINE OVERLOADABLE float16 as_float16(uint16 v) {
++  union _type_cast_64_b u;
++  u._uint16 = v;
++  return u._float16;
++}
++
++INLINE OVERLOADABLE int16 as_int16(float16 v) {
++  union _type_cast_64_b u;
++  u._float16 = v;
++  return u._int16;
++}
++
++INLINE OVERLOADABLE uint16 as_uint16(float16 v) {
++  union _type_cast_64_b u;
++  u._float16 = v;
++  return u._uint16;
+ }
++
++// ##END_CONVERSIONS##
++
+ #define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
+     return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
+  }
+Index: beignet-0.1+git20130514+19e9c58/backend/src/update_conversions.sh
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/update_conversions.sh	2013-05-14 20:08:14.718024731 +0200
+@@ -0,0 +1,11 @@
++#! /bin/sh -e
++
++STDLIB_HEADER=ocl_stdlib.h
++
++exec >$STDLIB_HEADER.tmp
++sed -n -e '1,/##BEGIN_CONVERSIONS##/p' $STDLIB_HEADER
++./gen_conversions.sh
++sed -n -e '/##END_CONVERSIONS##/,$p' $STDLIB_HEADER
++exec >&2
++
++mv $STDLIB_HEADER.tmp $STDLIB_HEADER
diff --git a/debian/patches/0002-Define-all-convert_-functions.patch b/debian/patches/0002-Define-all-convert_-functions.patch
new file mode 100644
index 0000000..06586ec
--- /dev/null
+++ b/debian/patches/0002-Define-all-convert_-functions.patch
@@ -0,0 +1,946 @@
+From b3a5ab4df6690230feae128a26d9fa7a4cefdd2b Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Tue, 14 May 2013 16:45:46 +0200
+Subject: [PATCH 02/15] Define all convert_* functions.
+To: beignet at lists.freedesktop.org
+
+These functions convert between vectors of the same length by casting each
+member in turn.
+---
+ backend/src/gen_conversions.sh |   49 +++
+ backend/src/ocl_stdlib.h       |  863 ++++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 889 insertions(+), 23 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/gen_conversions.sh	2013-05-14 20:08:14.714024732 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:21.594024425 +0200
+@@ -87,3 +87,52 @@
+         done
+ 
+ done
++
++# For all vector lengths and types, generate conversion functions
++for vector_length in $VECTOR_LENGTHS; do
++        if test $vector_length -eq 1; then
++                continue;
++        fi
++        for ftype in $TYPES; do
++                fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
++                for ttype in $TYPES; do
++                        tbasetype=`IFS=:; set -- dummy $ttype; echo $2`
++                        if test $fbasetype = $tbasetype; then
++                                continue
++                        fi
++                        fvectortype=$fbasetype$vector_length
++                        tvectortype=$tbasetype$vector_length
++                        construct="($tbasetype)(v.s0)"
++                        if test $vector_length -gt 1; then
++                                construct="$construct, ($tbasetype)(v.s1)"
++                        fi
++                        if test $vector_length -gt 2; then
++                                construct="$construct, ($tbasetype)(v.s2)"
++                        fi
++                        if test $vector_length -gt 3; then
++                                construct="$construct, ($tbasetype)(v.s3)"
++                        fi
++                        if test $vector_length -gt 4; then
++                                construct="$construct, ($tbasetype)(v.s4)"
++                                construct="$construct, ($tbasetype)(v.s5)"
++                                construct="$construct, ($tbasetype)(v.s6)"
++                                construct="$construct, ($tbasetype)(v.s7)"
++                        fi
++                        if test $vector_length -gt 8; then
++                                construct="$construct, ($tbasetype)(v.s8)"
++                                construct="$construct, ($tbasetype)(v.s9)"
++                                construct="$construct, ($tbasetype)(v.sA)"
++                                construct="$construct, ($tbasetype)(v.sB)"
++                                construct="$construct, ($tbasetype)(v.sC)"
++                                construct="$construct, ($tbasetype)(v.sD)"
++                                construct="$construct, ($tbasetype)(v.sE)"
++                                construct="$construct, ($tbasetype)(v.sF)"
++                        fi
++                        
++                        echo "INLINE OVERLOADABLE $tvectortype convert_$tvectortype($fvectortype v) {"
++                        echo "  return ($tvectortype)($construct);"
++                        echo "}"
++                        echo
++                done
++        done
++done
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:14.718024731 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:21.598024425 +0200
+@@ -1213,31 +1213,848 @@
+   return u._uint16;
+ }
+ 
++INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(int2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(int2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(int2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(int2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(int2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(uint2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(uint2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(uint2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(uint2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(uint2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(short2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(short2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(short2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(short2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(short2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(short2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(ushort2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(ushort2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(ushort2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(ushort2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(ushort2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(char2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(char2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(char2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(char2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(char2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(char2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(uchar2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(uchar2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(uchar2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(uchar2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(uchar2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(float2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(float2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(int3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(int3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(int3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(int3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(int3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(uint3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(uint3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(uint3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(uint3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(uint3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(short3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(short3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(short3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(short3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(short3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(short3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(ushort3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(ushort3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(ushort3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(ushort3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(ushort3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(char3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(char3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(char3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(char3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(char3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(char3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(uchar3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(uchar3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(uchar3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(uchar3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(uchar3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(float3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(float3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(float3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(float3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(float3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(float3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(int4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(int4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(int4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(int4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(int4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(uint4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(uint4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(uint4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(uint4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(uint4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(short4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(short4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(short4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(short4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(short4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(short4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(ushort4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(ushort4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(ushort4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(ushort4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(ushort4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(char4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(char4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(char4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(char4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(char4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(char4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(uchar4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(uchar4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(uchar4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(uchar4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(uchar4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(float4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(float4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(float4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(float4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(float4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(float4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(int8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(int8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(int8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(int8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(int8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(uint8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(uint8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(uint8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(uint8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(uint8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(short8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(short8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(short8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(short8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(short8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(short8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(ushort8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(ushort8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(ushort8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(ushort8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(ushort8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(char8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(char8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(char8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(char8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(char8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(char8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(uchar8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(uchar8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(uchar8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(uchar8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(uchar8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(float8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(float8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(float8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(float8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(float8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(float8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(int16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(int16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(int16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(int16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(int16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(uint16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(uint16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(uint16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(uint16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(uint16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(short16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(short16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(short16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(short16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(short16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(short16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(ushort16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(ushort16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(ushort16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(ushort16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(ushort16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(char16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(char16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(char16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(char16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(char16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(char16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(uchar16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(uchar16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(uchar16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(uchar16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(uchar16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(float16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(float16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(float16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(float16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(float16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(float16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
+ // ##END_CONVERSIONS##
+ 
+-#define DEF(type, n, type2) type##n INLINE_OVERLOADABLE convert_##type##n(type2##n d) { \
+-    return (type##n)((type)(d.s0), (type)(d.s1), (type)(d.s2), (type)(d.s3)); \
+- }
+-#define DEF2(type) DEF(type, 4, char); \
+-                   DEF(type, 4, uchar); \
+-                   DEF(type, 4, short); \
+-                   DEF(type, 4, ushort); \
+-                   DEF(type, 4, int); \
+-                   DEF(type, 4, uint); \
+-                   DEF(type, 4, long); \
+-                   DEF(type, 4, ulong); \
+-                   DEF(type, 4, float);
+-DEF2(char);
+-DEF2(uchar);
+-DEF2(short);
+-DEF2(ushort);
+-DEF2(int);
+-DEF2(uint);
+-DEF2(long);
+-DEF2(ulong);
+-DEF2(float);
+-#undef DEF2
+-#undef DEF
+ /////////////////////////////////////////////////////////////////////////////
+ // OpenCL preprocessor directives & macros
+ /////////////////////////////////////////////////////////////////////////////
diff --git a/debian/patches/0003-Add-long-and-ulong-types-to-conversions.patch b/debian/patches/0003-Add-long-and-ulong-types-to-conversions.patch
new file mode 100644
index 0000000..22a7945
--- /dev/null
+++ b/debian/patches/0003-Add-long-and-ulong-types-to-conversions.patch
@@ -0,0 +1,1719 @@
+From a10c3357283b8e2714a41b2d31f0f9831073202a Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Tue, 14 May 2013 17:00:45 +0200
+Subject: [PATCH 03/15] Add long and ulong types to conversions.
+To: beignet at lists.freedesktop.org
+
+This enables as_* and convert_* for the long and ulong data types.
+---
+ backend/src/gen_conversions.sh |    2 +-
+ backend/src/ocl_stdlib.h       | 1248 +++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 1234 insertions(+), 16 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/gen_conversions.sh	2013-05-14 20:08:21.594024425 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/gen_conversions.sh	2013-05-14 20:08:23.422024343 +0200
+@@ -1,7 +1,7 @@
+ #! /bin/sh -e
+ 
+ # Supported base types and their lengths
+-TYPES="int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
++TYPES="long:8 ulong:8 int:4 uint:4 short:2 ushort:2 char:1 uchar:1 float:4"
+ 
+ # Supported vector lengths
+ VECTOR_LENGTHS="1 2 3 4 8 16"
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:21.598024425 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:08:23.426024343 +0200
+@@ -478,6 +478,8 @@
+ }
+ 
+ union _type_cast_8_b {
++  long _long;
++  ulong _ulong;
+   int2 _int2;
+   uint2 _uint2;
+   short4 _short4;
+@@ -487,6 +489,114 @@
+   float2 _float2;
+ };
+ 
++INLINE OVERLOADABLE ulong as_ulong(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._ulong;
++}
++
++INLINE OVERLOADABLE int2 as_int2(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(long v) {
++  union _type_cast_8_b u;
++  u._long = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE long as_long(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE int2 as_int2(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._int2;
++}
++
++INLINE OVERLOADABLE uint2 as_uint2(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._uint2;
++}
++
++INLINE OVERLOADABLE short4 as_short4(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._short4;
++}
++
++INLINE OVERLOADABLE ushort4 as_ushort4(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._ushort4;
++}
++
++INLINE OVERLOADABLE char8 as_char8(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._char8;
++}
++
++INLINE OVERLOADABLE uchar8 as_uchar8(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._uchar8;
++}
++
++INLINE OVERLOADABLE float2 as_float2(ulong v) {
++  union _type_cast_8_b u;
++  u._ulong = v;
++  return u._float2;
++}
++
++INLINE OVERLOADABLE long as_long(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(int2 v) {
++  union _type_cast_8_b u;
++  u._int2 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE uint2 as_uint2(int2 v) {
+   union _type_cast_8_b u;
+   u._int2 = v;
+@@ -523,6 +633,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(uint2 v) {
++  union _type_cast_8_b u;
++  u._uint2 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(uint2 v) {
+   union _type_cast_8_b u;
+   u._uint2 = v;
+@@ -559,6 +681,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(short4 v) {
++  union _type_cast_8_b u;
++  u._short4 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(short4 v) {
+   union _type_cast_8_b u;
+   u._short4 = v;
+@@ -595,6 +729,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(ushort4 v) {
++  union _type_cast_8_b u;
++  u._ushort4 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(ushort4 v) {
+   union _type_cast_8_b u;
+   u._ushort4 = v;
+@@ -631,6 +777,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(char8 v) {
++  union _type_cast_8_b u;
++  u._char8 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(char8 v) {
+   union _type_cast_8_b u;
+   u._char8 = v;
+@@ -667,6 +825,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(uchar8 v) {
++  union _type_cast_8_b u;
++  u._uchar8 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(uchar8 v) {
+   union _type_cast_8_b u;
+   u._uchar8 = v;
+@@ -703,6 +873,18 @@
+   return u._float2;
+ }
+ 
++INLINE OVERLOADABLE long as_long(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._long;
++}
++
++INLINE OVERLOADABLE ulong as_ulong(float2 v) {
++  union _type_cast_8_b u;
++  u._float2 = v;
++  return u._ulong;
++}
++
+ INLINE OVERLOADABLE int2 as_int2(float2 v) {
+   union _type_cast_8_b u;
+   u._float2 = v;
+@@ -782,6 +964,8 @@
+ }
+ 
+ union _type_cast_16_b {
++  long2 _long2;
++  ulong2 _ulong2;
+   int4 _int4;
+   uint4 _uint4;
+   short8 _short8;
+@@ -791,6 +975,114 @@
+   float4 _float4;
+ };
+ 
++INLINE OVERLOADABLE ulong2 as_ulong2(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._ulong2;
++}
++
++INLINE OVERLOADABLE int4 as_int4(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(long2 v) {
++  union _type_cast_16_b u;
++  u._long2 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE long2 as_long2(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE int4 as_int4(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._int4;
++}
++
++INLINE OVERLOADABLE uint4 as_uint4(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._uint4;
++}
++
++INLINE OVERLOADABLE short8 as_short8(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._short8;
++}
++
++INLINE OVERLOADABLE ushort8 as_ushort8(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._ushort8;
++}
++
++INLINE OVERLOADABLE char16 as_char16(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._char16;
++}
++
++INLINE OVERLOADABLE uchar16 as_uchar16(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._uchar16;
++}
++
++INLINE OVERLOADABLE float4 as_float4(ulong2 v) {
++  union _type_cast_16_b u;
++  u._ulong2 = v;
++  return u._float4;
++}
++
++INLINE OVERLOADABLE long2 as_long2(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(int4 v) {
++  union _type_cast_16_b u;
++  u._int4 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE uint4 as_uint4(int4 v) {
+   union _type_cast_16_b u;
+   u._int4 = v;
+@@ -827,6 +1119,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(uint4 v) {
++  union _type_cast_16_b u;
++  u._uint4 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(uint4 v) {
+   union _type_cast_16_b u;
+   u._uint4 = v;
+@@ -863,6 +1167,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(short8 v) {
++  union _type_cast_16_b u;
++  u._short8 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(short8 v) {
+   union _type_cast_16_b u;
+   u._short8 = v;
+@@ -899,6 +1215,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(ushort8 v) {
++  union _type_cast_16_b u;
++  u._ushort8 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(ushort8 v) {
+   union _type_cast_16_b u;
+   u._ushort8 = v;
+@@ -935,6 +1263,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(char16 v) {
++  union _type_cast_16_b u;
++  u._char16 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(char16 v) {
+   union _type_cast_16_b u;
+   u._char16 = v;
+@@ -971,6 +1311,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(uchar16 v) {
++  union _type_cast_16_b u;
++  u._uchar16 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(uchar16 v) {
+   union _type_cast_16_b u;
+   u._uchar16 = v;
+@@ -1007,6 +1359,18 @@
+   return u._float4;
+ }
+ 
++INLINE OVERLOADABLE long2 as_long2(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._long2;
++}
++
++INLINE OVERLOADABLE ulong2 as_ulong2(float4 v) {
++  union _type_cast_16_b u;
++  u._float4 = v;
++  return u._ulong2;
++}
++
+ INLINE OVERLOADABLE int4 as_int4(float4 v) {
+   union _type_cast_16_b u;
+   u._float4 = v;
+@@ -1043,7 +1407,26 @@
+   return u._uchar16;
+ }
+ 
++union _type_cast_24_b {
++  long3 _long3;
++  ulong3 _ulong3;
++};
++
++INLINE OVERLOADABLE ulong3 as_ulong3(long3 v) {
++  union _type_cast_24_b u;
++  u._long3 = v;
++  return u._ulong3;
++}
++
++INLINE OVERLOADABLE long3 as_long3(ulong3 v) {
++  union _type_cast_24_b u;
++  u._ulong3 = v;
++  return u._long3;
++}
++
+ union _type_cast_32_b {
++  long4 _long4;
++  ulong4 _ulong4;
+   int8 _int8;
+   uint8 _uint8;
+   short16 _short16;
+@@ -1051,30 +1434,126 @@
+   float8 _float8;
+ };
+ 
+-INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
++INLINE OVERLOADABLE ulong4 as_ulong4(long4 v) {
+   union _type_cast_32_b u;
+-  u._int8 = v;
++  u._long4 = v;
++  return u._ulong4;
++}
++
++INLINE OVERLOADABLE int8 as_int8(long4 v) {
++  union _type_cast_32_b u;
++  u._long4 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(long4 v) {
++  union _type_cast_32_b u;
++  u._long4 = v;
+   return u._uint8;
+ }
+ 
+-INLINE OVERLOADABLE short16 as_short16(int8 v) {
++INLINE OVERLOADABLE short16 as_short16(long4 v) {
+   union _type_cast_32_b u;
+-  u._int8 = v;
++  u._long4 = v;
+   return u._short16;
+ }
+ 
+-INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
++INLINE OVERLOADABLE ushort16 as_ushort16(long4 v) {
+   union _type_cast_32_b u;
+-  u._int8 = v;
++  u._long4 = v;
+   return u._ushort16;
+ }
+ 
+-INLINE OVERLOADABLE float8 as_float8(int8 v) {
++INLINE OVERLOADABLE float8 as_float8(long4 v) {
++  union _type_cast_32_b u;
++  u._long4 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE long4 as_long4(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE int8 as_int8(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._int8;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(ulong4 v) {
++  union _type_cast_32_b u;
++  u._ulong4 = v;
++  return u._float8;
++}
++
++INLINE OVERLOADABLE long4 as_long4(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._ulong4;
++}
++
++INLINE OVERLOADABLE uint8 as_uint8(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._uint8;
++}
++
++INLINE OVERLOADABLE short16 as_short16(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._short16;
++}
++
++INLINE OVERLOADABLE ushort16 as_ushort16(int8 v) {
++  union _type_cast_32_b u;
++  u._int8 = v;
++  return u._ushort16;
++}
++
++INLINE OVERLOADABLE float8 as_float8(int8 v) {
+   union _type_cast_32_b u;
+   u._int8 = v;
+   return u._float8;
+ }
+ 
++INLINE OVERLOADABLE long4 as_long4(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(uint8 v) {
++  union _type_cast_32_b u;
++  u._uint8 = v;
++  return u._ulong4;
++}
++
+ INLINE OVERLOADABLE int8 as_int8(uint8 v) {
+   union _type_cast_32_b u;
+   u._uint8 = v;
+@@ -1099,6 +1578,18 @@
+   return u._float8;
+ }
+ 
++INLINE OVERLOADABLE long4 as_long4(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(short16 v) {
++  union _type_cast_32_b u;
++  u._short16 = v;
++  return u._ulong4;
++}
++
+ INLINE OVERLOADABLE int8 as_int8(short16 v) {
+   union _type_cast_32_b u;
+   u._short16 = v;
+@@ -1123,6 +1614,18 @@
+   return u._float8;
+ }
+ 
++INLINE OVERLOADABLE long4 as_long4(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(ushort16 v) {
++  union _type_cast_32_b u;
++  u._ushort16 = v;
++  return u._ulong4;
++}
++
+ INLINE OVERLOADABLE int8 as_int8(ushort16 v) {
+   union _type_cast_32_b u;
+   u._ushort16 = v;
+@@ -1147,6 +1650,18 @@
+   return u._float8;
+ }
+ 
++INLINE OVERLOADABLE long4 as_long4(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._long4;
++}
++
++INLINE OVERLOADABLE ulong4 as_ulong4(float8 v) {
++  union _type_cast_32_b u;
++  u._float8 = v;
++  return u._ulong4;
++}
++
+ INLINE OVERLOADABLE int8 as_int8(float8 v) {
+   union _type_cast_32_b u;
+   u._float8 = v;
+@@ -1172,11 +1687,73 @@
+ }
+ 
+ union _type_cast_64_b {
++  long8 _long8;
++  ulong8 _ulong8;
+   int16 _int16;
+   uint16 _uint16;
+   float16 _float16;
+ };
+ 
++INLINE OVERLOADABLE ulong8 as_ulong8(long8 v) {
++  union _type_cast_64_b u;
++  u._long8 = v;
++  return u._ulong8;
++}
++
++INLINE OVERLOADABLE int16 as_int16(long8 v) {
++  union _type_cast_64_b u;
++  u._long8 = v;
++  return u._int16;
++}
++
++INLINE OVERLOADABLE uint16 as_uint16(long8 v) {
++  union _type_cast_64_b u;
++  u._long8 = v;
++  return u._uint16;
++}
++
++INLINE OVERLOADABLE float16 as_float16(long8 v) {
++  union _type_cast_64_b u;
++  u._long8 = v;
++  return u._float16;
++}
++
++INLINE OVERLOADABLE long8 as_long8(ulong8 v) {
++  union _type_cast_64_b u;
++  u._ulong8 = v;
++  return u._long8;
++}
++
++INLINE OVERLOADABLE int16 as_int16(ulong8 v) {
++  union _type_cast_64_b u;
++  u._ulong8 = v;
++  return u._int16;
++}
++
++INLINE OVERLOADABLE uint16 as_uint16(ulong8 v) {
++  union _type_cast_64_b u;
++  u._ulong8 = v;
++  return u._uint16;
++}
++
++INLINE OVERLOADABLE float16 as_float16(ulong8 v) {
++  union _type_cast_64_b u;
++  u._ulong8 = v;
++  return u._float16;
++}
++
++INLINE OVERLOADABLE long8 as_long8(int16 v) {
++  union _type_cast_64_b u;
++  u._int16 = v;
++  return u._long8;
++}
++
++INLINE OVERLOADABLE ulong8 as_ulong8(int16 v) {
++  union _type_cast_64_b u;
++  u._int16 = v;
++  return u._ulong8;
++}
++
+ INLINE OVERLOADABLE uint16 as_uint16(int16 v) {
+   union _type_cast_64_b u;
+   u._int16 = v;
+@@ -1189,6 +1766,18 @@
+   return u._float16;
+ }
+ 
++INLINE OVERLOADABLE long8 as_long8(uint16 v) {
++  union _type_cast_64_b u;
++  u._uint16 = v;
++  return u._long8;
++}
++
++INLINE OVERLOADABLE ulong8 as_ulong8(uint16 v) {
++  union _type_cast_64_b u;
++  u._uint16 = v;
++  return u._ulong8;
++}
++
+ INLINE OVERLOADABLE int16 as_int16(uint16 v) {
+   union _type_cast_64_b u;
+   u._uint16 = v;
+@@ -1201,6 +1790,18 @@
+   return u._float16;
+ }
+ 
++INLINE OVERLOADABLE long8 as_long8(float16 v) {
++  union _type_cast_64_b u;
++  u._float16 = v;
++  return u._long8;
++}
++
++INLINE OVERLOADABLE ulong8 as_ulong8(float16 v) {
++  union _type_cast_64_b u;
++  u._float16 = v;
++  return u._ulong8;
++}
++
+ INLINE OVERLOADABLE int16 as_int16(float16 v) {
+   union _type_cast_64_b u;
+   u._float16 = v;
+@@ -1213,6 +1814,95 @@
+   return u._uint16;
+ }
+ 
++union _type_cast_128_b {
++  long16 _long16;
++  ulong16 _ulong16;
++};
++
++INLINE OVERLOADABLE ulong16 as_ulong16(long16 v) {
++  union _type_cast_128_b u;
++  u._long16 = v;
++  return u._ulong16;
++}
++
++INLINE OVERLOADABLE long16 as_long16(ulong16 v) {
++  union _type_cast_128_b u;
++  u._ulong16 = v;
++  return u._long16;
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(long2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(long2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(long2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(long2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(long2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(long2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(long2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(long2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE long2 convert_long2(ulong2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE int2 convert_int2(ulong2 v) {
++  return (int2)((int)(v.s0), (int)(v.s1));
++}
++
++INLINE OVERLOADABLE uint2 convert_uint2(ulong2 v) {
++  return (uint2)((uint)(v.s0), (uint)(v.s1));
++}
++
++INLINE OVERLOADABLE short2 convert_short2(ulong2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(ulong2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(ulong2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(ulong2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE float2 convert_float2(ulong2 v) {
++  return (float2)((float)(v.s0), (float)(v.s1));
++}
++
++INLINE OVERLOADABLE long2 convert_long2(int2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(int2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE uint2 convert_uint2(int2 v) {
+   return (uint2)((uint)(v.s0), (uint)(v.s1));
+ }
+@@ -1237,6 +1927,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(uint2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(uint2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(uint2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1261,6 +1959,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(short2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(short2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(short2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1285,6 +1991,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(ushort2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(ushort2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(ushort2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1309,6 +2023,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(char2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(char2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(char2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1333,6 +2055,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(uchar2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(uchar2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(uchar2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1357,6 +2087,14 @@
+   return (float2)((float)(v.s0), (float)(v.s1));
+ }
+ 
++INLINE OVERLOADABLE long2 convert_long2(float2 v) {
++  return (long2)((long)(v.s0), (long)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong2 convert_ulong2(float2 v) {
++  return (ulong2)((ulong)(v.s0), (ulong)(v.s1));
++}
++
+ INLINE OVERLOADABLE int2 convert_int2(float2 v) {
+   return (int2)((int)(v.s0), (int)(v.s1));
+ }
+@@ -1365,20 +2103,92 @@
+   return (uint2)((uint)(v.s0), (uint)(v.s1));
+ }
+ 
+-INLINE OVERLOADABLE short2 convert_short2(float2 v) {
+-  return (short2)((short)(v.s0), (short)(v.s1));
++INLINE OVERLOADABLE short2 convert_short2(float2 v) {
++  return (short2)((short)(v.s0), (short)(v.s1));
++}
++
++INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
++  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++}
++
++INLINE OVERLOADABLE char2 convert_char2(float2 v) {
++  return (char2)((char)(v.s0), (char)(v.s1));
++}
++
++INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
++  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(long3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(long3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(long3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(long3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(long3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(long3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(long3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
++}
++
++INLINE OVERLOADABLE float3 convert_float3(long3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
++}
++
++INLINE OVERLOADABLE long3 convert_long3(ulong3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE int3 convert_int3(ulong3 v) {
++  return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
++}
++
++INLINE OVERLOADABLE uint3 convert_uint3(ulong3 v) {
++  return (uint3)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2));
++}
++
++INLINE OVERLOADABLE short3 convert_short3(ulong3 v) {
++  return (short3)((short)(v.s0), (short)(v.s1), (short)(v.s2));
++}
++
++INLINE OVERLOADABLE ushort3 convert_ushort3(ulong3 v) {
++  return (ushort3)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2));
++}
++
++INLINE OVERLOADABLE char3 convert_char3(ulong3 v) {
++  return (char3)((char)(v.s0), (char)(v.s1), (char)(v.s2));
++}
++
++INLINE OVERLOADABLE uchar3 convert_uchar3(ulong3 v) {
++  return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
+-INLINE OVERLOADABLE ushort2 convert_ushort2(float2 v) {
+-  return (ushort2)((ushort)(v.s0), (ushort)(v.s1));
++INLINE OVERLOADABLE float3 convert_float3(ulong3 v) {
++  return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
+-INLINE OVERLOADABLE char2 convert_char2(float2 v) {
+-  return (char2)((char)(v.s0), (char)(v.s1));
++INLINE OVERLOADABLE long3 convert_long3(int3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
+ }
+ 
+-INLINE OVERLOADABLE uchar2 convert_uchar2(float2 v) {
+-  return (uchar2)((uchar)(v.s0), (uchar)(v.s1));
++INLINE OVERLOADABLE ulong3 convert_ulong3(int3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
+ }
+ 
+ INLINE OVERLOADABLE uint3 convert_uint3(int3 v) {
+@@ -1405,6 +2215,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(uint3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(uint3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(uint3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1429,6 +2247,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(short3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(short3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(short3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1453,6 +2279,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(ushort3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(ushort3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(ushort3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1477,6 +2311,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(char3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(char3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(char3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1501,6 +2343,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(uchar3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(uchar3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(uchar3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1525,6 +2375,14 @@
+   return (float3)((float)(v.s0), (float)(v.s1), (float)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE long3 convert_long3(float3 v) {
++  return (long3)((long)(v.s0), (long)(v.s1), (long)(v.s2));
++}
++
++INLINE OVERLOADABLE ulong3 convert_ulong3(float3 v) {
++  return (ulong3)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2));
++}
++
+ INLINE OVERLOADABLE int3 convert_int3(float3 v) {
+   return (int3)((int)(v.s0), (int)(v.s1), (int)(v.s2));
+ }
+@@ -1549,6 +2407,78 @@
+   return (uchar3)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2));
+ }
+ 
++INLINE OVERLOADABLE ulong4 convert_ulong4(long4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(long4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(long4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(long4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(long4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(long4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(long4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(long4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE long4 convert_long4(ulong4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE int4 convert_int4(ulong4 v) {
++  return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
++}
++
++INLINE OVERLOADABLE uint4 convert_uint4(ulong4 v) {
++  return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
++}
++
++INLINE OVERLOADABLE short4 convert_short4(ulong4 v) {
++  return (short4)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3));
++}
++
++INLINE OVERLOADABLE ushort4 convert_ushort4(ulong4 v) {
++  return (ushort4)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3));
++}
++
++INLINE OVERLOADABLE char4 convert_char4(ulong4 v) {
++  return (char4)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3));
++}
++
++INLINE OVERLOADABLE uchar4 convert_uchar4(ulong4 v) {
++  return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
++}
++
++INLINE OVERLOADABLE float4 convert_float4(ulong4 v) {
++  return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
++}
++
++INLINE OVERLOADABLE long4 convert_long4(int4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(int4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE uint4 convert_uint4(int4 v) {
+   return (uint4)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3));
+ }
+@@ -1573,6 +2503,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(uint4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(uint4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(uint4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1597,6 +2535,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(short4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(short4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(short4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1621,6 +2567,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(ushort4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(ushort4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(ushort4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1645,6 +2599,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(char4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(char4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(char4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1669,6 +2631,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(uchar4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(uchar4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(uchar4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1693,6 +2663,14 @@
+   return (float4)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE long4 convert_long4(float4 v) {
++  return (long4)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3));
++}
++
++INLINE OVERLOADABLE ulong4 convert_ulong4(float4 v) {
++  return (ulong4)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3));
++}
++
+ INLINE OVERLOADABLE int4 convert_int4(float4 v) {
+   return (int4)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3));
+ }
+@@ -1717,6 +2695,78 @@
+   return (uchar4)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3));
+ }
+ 
++INLINE OVERLOADABLE ulong8 convert_ulong8(long8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(long8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(long8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(long8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(long8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(long8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(long8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(long8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE long8 convert_long8(ulong8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE int8 convert_int8(ulong8 v) {
++  return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
++}
++
++INLINE OVERLOADABLE uint8 convert_uint8(ulong8 v) {
++  return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
++}
++
++INLINE OVERLOADABLE short8 convert_short8(ulong8 v) {
++  return (short8)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7));
++}
++
++INLINE OVERLOADABLE ushort8 convert_ushort8(ulong8 v) {
++  return (ushort8)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7));
++}
++
++INLINE OVERLOADABLE char8 convert_char8(ulong8 v) {
++  return (char8)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7));
++}
++
++INLINE OVERLOADABLE uchar8 convert_uchar8(ulong8 v) {
++  return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
++}
++
++INLINE OVERLOADABLE float8 convert_float8(ulong8 v) {
++  return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
++}
++
++INLINE OVERLOADABLE long8 convert_long8(int8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(int8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE uint8 convert_uint8(int8 v) {
+   return (uint8)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7));
+ }
+@@ -1741,6 +2791,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(uint8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(uint8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(uint8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1765,6 +2823,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(short8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(short8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(short8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1789,6 +2855,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(ushort8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(ushort8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(ushort8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1813,6 +2887,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(char8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(char8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(char8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1837,6 +2919,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(uchar8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(uchar8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(uchar8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1861,6 +2951,14 @@
+   return (float8)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE long8 convert_long8(float8 v) {
++  return (long8)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7));
++}
++
++INLINE OVERLOADABLE ulong8 convert_ulong8(float8 v) {
++  return (ulong8)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7));
++}
++
+ INLINE OVERLOADABLE int8 convert_int8(float8 v) {
+   return (int8)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7));
+ }
+@@ -1885,6 +2983,78 @@
+   return (uchar8)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7));
+ }
+ 
++INLINE OVERLOADABLE ulong16 convert_ulong16(long16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(long16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(long16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(long16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(long16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(long16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(long16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(long16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE long16 convert_long16(ulong16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE int16 convert_int16(ulong16 v) {
++  return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
++}
++
++INLINE OVERLOADABLE uint16 convert_uint16(ulong16 v) {
++  return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
++}
++
++INLINE OVERLOADABLE short16 convert_short16(ulong16 v) {
++  return (short16)((short)(v.s0), (short)(v.s1), (short)(v.s2), (short)(v.s3), (short)(v.s4), (short)(v.s5), (short)(v.s6), (short)(v.s7), (short)(v.s8), (short)(v.s9), (short)(v.sA), (short)(v.sB), (short)(v.sC), (short)(v.sD), (short)(v.sE), (short)(v.sF));
++}
++
++INLINE OVERLOADABLE ushort16 convert_ushort16(ulong16 v) {
++  return (ushort16)((ushort)(v.s0), (ushort)(v.s1), (ushort)(v.s2), (ushort)(v.s3), (ushort)(v.s4), (ushort)(v.s5), (ushort)(v.s6), (ushort)(v.s7), (ushort)(v.s8), (ushort)(v.s9), (ushort)(v.sA), (ushort)(v.sB), (ushort)(v.sC), (ushort)(v.sD), (ushort)(v.sE), (ushort)(v.sF));
++}
++
++INLINE OVERLOADABLE char16 convert_char16(ulong16 v) {
++  return (char16)((char)(v.s0), (char)(v.s1), (char)(v.s2), (char)(v.s3), (char)(v.s4), (char)(v.s5), (char)(v.s6), (char)(v.s7), (char)(v.s8), (char)(v.s9), (char)(v.sA), (char)(v.sB), (char)(v.sC), (char)(v.sD), (char)(v.sE), (char)(v.sF));
++}
++
++INLINE OVERLOADABLE uchar16 convert_uchar16(ulong16 v) {
++  return (uchar16)((uchar)(v.s0), (uchar)(v.s1), (uchar)(v.s2), (uchar)(v.s3), (uchar)(v.s4), (uchar)(v.s5), (uchar)(v.s6), (uchar)(v.s7), (uchar)(v.s8), (uchar)(v.s9), (uchar)(v.sA), (uchar)(v.sB), (uchar)(v.sC), (uchar)(v.sD), (uchar)(v.sE), (uchar)(v.sF));
++}
++
++INLINE OVERLOADABLE float16 convert_float16(ulong16 v) {
++  return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
++}
++
++INLINE OVERLOADABLE long16 convert_long16(int16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(int16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE uint16 convert_uint16(int16 v) {
+   return (uint16)((uint)(v.s0), (uint)(v.s1), (uint)(v.s2), (uint)(v.s3), (uint)(v.s4), (uint)(v.s5), (uint)(v.s6), (uint)(v.s7), (uint)(v.s8), (uint)(v.s9), (uint)(v.sA), (uint)(v.sB), (uint)(v.sC), (uint)(v.sD), (uint)(v.sE), (uint)(v.sF));
+ }
+@@ -1909,6 +3079,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(uint16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(uint16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(uint16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
+@@ -1933,6 +3111,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(short16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(short16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(short16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
+@@ -1957,6 +3143,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(ushort16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(ushort16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(ushort16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
+@@ -1981,6 +3175,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(char16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(char16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(char16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
+@@ -2005,6 +3207,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(uchar16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(uchar16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(uchar16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
+@@ -2029,6 +3239,14 @@
+   return (float16)((float)(v.s0), (float)(v.s1), (float)(v.s2), (float)(v.s3), (float)(v.s4), (float)(v.s5), (float)(v.s6), (float)(v.s7), (float)(v.s8), (float)(v.s9), (float)(v.sA), (float)(v.sB), (float)(v.sC), (float)(v.sD), (float)(v.sE), (float)(v.sF));
+ }
+ 
++INLINE OVERLOADABLE long16 convert_long16(float16 v) {
++  return (long16)((long)(v.s0), (long)(v.s1), (long)(v.s2), (long)(v.s3), (long)(v.s4), (long)(v.s5), (long)(v.s6), (long)(v.s7), (long)(v.s8), (long)(v.s9), (long)(v.sA), (long)(v.sB), (long)(v.sC), (long)(v.sD), (long)(v.sE), (long)(v.sF));
++}
++
++INLINE OVERLOADABLE ulong16 convert_ulong16(float16 v) {
++  return (ulong16)((ulong)(v.s0), (ulong)(v.s1), (ulong)(v.s2), (ulong)(v.s3), (ulong)(v.s4), (ulong)(v.s5), (ulong)(v.s6), (ulong)(v.s7), (ulong)(v.s8), (ulong)(v.s9), (ulong)(v.sA), (ulong)(v.sB), (ulong)(v.sC), (ulong)(v.sD), (ulong)(v.sE), (ulong)(v.sF));
++}
++
+ INLINE OVERLOADABLE int16 convert_int16(float16 v) {
+   return (int16)((int)(v.s0), (int)(v.s1), (int)(v.s2), (int)(v.s3), (int)(v.s4), (int)(v.s5), (int)(v.s6), (int)(v.s7), (int)(v.s8), (int)(v.s9), (int)(v.sA), (int)(v.sB), (int)(v.sC), (int)(v.sD), (int)(v.sE), (int)(v.sF));
+ }
diff --git a/debian/patches/0004-Make-libgbm-optional-without-EGL-support.patch b/debian/patches/0004-Make-libgbm-optional-without-EGL-support.patch
new file mode 100644
index 0000000..274623b
--- /dev/null
+++ b/debian/patches/0004-Make-libgbm-optional-without-EGL-support.patch
@@ -0,0 +1,45 @@
+From 499ec4e50734039504af732964985a3e094434ee Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Mon, 13 May 2013 23:02:16 +0200
+Subject: [PATCH 04/15] Make libgbm optional without EGL support
+To: beignet at lists.freedesktop.org
+
+If EGL or GBM cannot be found, the EGL support is disabled, and then
+neither library is required.
+---
+ src/CMakeLists.txt |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/CMakeLists.txt	2013-05-14 20:05:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt	2013-05-14 20:08:57.006022846 +0200
+@@ -28,14 +28,16 @@
+     x11/dricommon.c 
+     x11/va_dri2.c)
+ 
+-if (EGL_FOUND)
++if (EGL_FOUND AND GBM_FOUND)
+ set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c x11/gbm_dri2_x11_platform.c)
+ SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}")
+ SET(CMAKE_C_FLAGS "-DHAS_EGL ${CMAKE_C_FLAGS}")
+ SET(OPTIONAL_EGL_LIBRARY "${EGL_LIBRARY}")
+-else(EGL_FOUND)
++SET(OPTIONAL_GBM_LIBRARY "${GBM_LIBRARY}")
++else(EGL_FOUND AND GBM_FOUND)
+ SET(OPTIONAL_EGL_LIBRARY "")
+-endif (EGL_FOUND)
++SET(OPTIONAL_GBM_LIBRARY "")
++endif (EGL_FOUND AND GBM_FOUND)
+ 
+ if (OCLIcd_FOUND)
+ set (OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c)
+@@ -57,7 +59,7 @@
+                       ${DRM_LIBRARY}
+                       ${OPENGL_LIBRARIES}
+                       ${OPTIONAL_EGL_LIBRARY}
+-                      ${GBM_LIBRARY})
++                      ${OPTIONAL_GBM_LIBRARY})
+ set_target_properties(cl
+                         PROPERTIES
+                         VERSION 0.1
diff --git a/debian/patches/0005-Define-clamp-value-lower-upper.patch b/debian/patches/0005-Define-clamp-value-lower-upper.patch
new file mode 100644
index 0000000..976fdbf
--- /dev/null
+++ b/debian/patches/0005-Define-clamp-value-lower-upper.patch
@@ -0,0 +1,131 @@
+From 780dcc213bd8c37c297c5d7a089ad355cb31649d Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Mon, 13 May 2013 09:09:11 +0200
+Subject: [PATCH 05/15] Define clamp(value, lower, upper)
+To: beignet at lists.freedesktop.org
+
+The clamp(value, lower, upper) function is part of the standard library.
+
+ - Define the function, using min() and max() on the lower level
+ - Remove private definitions from kernels
+---
+ backend/src/ocl_stdlib.h                    |   23 +++++++++++++----------
+ kernels/compiler_julia.cl                   |    2 --
+ kernels/compiler_julia_no_break.cl          |    2 --
+ kernels/compiler_menger_sponge.cl           |    2 --
+ kernels/compiler_menger_sponge_no_shadow.cl |    2 --
+ kernels/compiler_nautilus.cl                |    4 +---
+ 6 files changed, 14 insertions(+), 21 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:08:23.426024343 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:02.546022599 +0200
+@@ -3643,21 +3643,24 @@
+ /////////////////////////////////////////////////////////////////////////////
+ // Common Functions (see 6.11.4 of OCL 1.1 spec)
+ /////////////////////////////////////////////////////////////////////////////
+-#define DECL_MIN_MAX(TYPE) \
++#define DECL_MIN_MAX_CLAMP(TYPE) \
+ INLINE OVERLOADABLE TYPE max(TYPE a, TYPE b) { \
+   return a > b ? a : b; \
+ } \
+ INLINE OVERLOADABLE TYPE min(TYPE a, TYPE b) { \
+   return a < b ? a : b; \
++} \
++INLINE OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u) { \
++  return max(min(v, u), l); \
+ }
+-DECL_MIN_MAX(float)
+-DECL_MIN_MAX(int)
+-DECL_MIN_MAX(short)
+-DECL_MIN_MAX(char)
+-DECL_MIN_MAX(uint)
+-DECL_MIN_MAX(unsigned short)
+-DECL_MIN_MAX(unsigned char)
+-#undef DECL_MIN_MAX
++DECL_MIN_MAX_CLAMP(float)
++DECL_MIN_MAX_CLAMP(int)
++DECL_MIN_MAX_CLAMP(short)
++DECL_MIN_MAX_CLAMP(char)
++DECL_MIN_MAX_CLAMP(uint)
++DECL_MIN_MAX_CLAMP(unsigned short)
++DECL_MIN_MAX_CLAMP(unsigned char)
++#undef DECL_MIN_MAX_CLAMP
+ 
+ INLINE OVERLOADABLE float __gen_ocl_internal_fmax(float a, float b) { return max(a,b); }
+ INLINE OVERLOADABLE float __gen_ocl_internal_fmin(float a, float b) { return min(a,b); }
+Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_julia.cl
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_julia.cl	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_julia.cl	2013-05-14 20:09:02.546022599 +0200
+@@ -14,8 +14,6 @@
+   return I - 2.0f * dot(N, I) * N;
+ }
+ 
+-inline float clamp(x,m,M) { return max(min(x,M),m); }
+-
+ inline uint pack_fp4(float4 u4) {
+   uint u;
+   u = (((uint) u4.x)) |
+Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_julia_no_break.cl
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_julia_no_break.cl	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_julia_no_break.cl	2013-05-14 20:09:02.546022599 +0200
+@@ -14,8 +14,6 @@
+   return I - 2.0f * dot(N, I) * N;
+ }
+ 
+-inline float clamp(x,m,M) { return max(min(x,M),m); }
+-
+ inline uint pack_fp4(float4 u4) {
+   uint u;
+   u = (((uint) u4.x)) |
+Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge.cl
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_menger_sponge.cl	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge.cl	2013-05-14 20:09:02.546022599 +0200
+@@ -25,8 +25,6 @@
+   return I - 2.0f * dot(N, I) * N;
+ }
+ 
+-inline float clamp(x,m,M) { return max(min(x,M),m); }
+-
+ inline uint pack_fp4(float4 u4) {
+   uint u;
+   u = (((uint) u4.x)) |
+Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge_no_shadow.cl
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_menger_sponge_no_shadow.cl	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_menger_sponge_no_shadow.cl	2013-05-14 20:09:02.546022599 +0200
+@@ -25,8 +25,6 @@
+   return I - 2.0f * dot(N, I) * N;
+ }
+ 
+-inline float clamp(x,m,M) { return max(min(x,M),m); }
+-
+ inline uint pack_fp4(float4 u4) {
+   uint u;
+   u = (((uint) u4.x)) |
+Index: beignet-0.1+git20130514+19e9c58/kernels/compiler_nautilus.cl
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/kernels/compiler_nautilus.cl	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/kernels/compiler_nautilus.cl	2013-05-14 20:09:02.546022599 +0200
+@@ -14,8 +14,6 @@
+   return I - 2.0f * dot(N, I) * N;
+ }
+ 
+-inline float clamp(x,m,M) { return max(min(x,M),m); }
+-
+ inline uint pack_fp4(float4 u4) {
+   uint u;
+   u = (((uint) u4.x)) |
+@@ -59,7 +57,7 @@
+   for(int q=0;q<100;q++)
+   {
+      float l = e(o+0.5f*(vec3)(cos(1.1f*(float)(q)),cos(1.6f*(float)(q)),cos(1.4f*(float)(q))))-m;
+-     a+=clamp(4.0f*l,0.0f,1.0f);
++     a+=floor(clamp(4.0f*l,0.0f,1.0f));
+   }
+   v*=a/100.0f;
+   vec4 gl_FragColor=(vec4)(v,1.0f);
diff --git a/debian/patches/0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch b/debian/patches/0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
new file mode 100644
index 0000000..4984913
--- /dev/null
+++ b/debian/patches/0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
@@ -0,0 +1,58 @@
+From d7c7354c4857ed9934086e3bdaf2b07e55f46bd1 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Mon, 13 May 2013 12:43:42 +0200
+Subject: [PATCH 06/15] Add clGetDeviceInfo(..., CL_BUILT_IN_KERNELS, ...)
+To: beignet at lists.freedesktop.org
+
+Currently, there are no built-in kernels, so this function returns an empty
+string.
+---
+ src/cl_device_id.c |    1 +
+ src/cl_device_id.h |    2 ++
+ src/cl_gt_device.h |    1 +
+ 3 files changed, 4 insertions(+)
+
+Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.c	2013-05-14 20:09:06.922022404 +0200
+@@ -230,6 +230,7 @@
+     DECL_STRING_FIELD(PROFILE, profile)
+     DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version)
+     DECL_STRING_FIELD(EXTENSIONS, extensions);
++    DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels)
+ 
+     case CL_DRIVER_VERSION:
+       if (param_value_size_ret) {
+Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.h	2013-05-14 20:09:06.922022404 +0200
+@@ -84,6 +84,7 @@
+   const char *opencl_c_version;
+   const char *extensions;
+   const char *driver_version;
++  const char *built_in_kernels;
+   size_t name_sz;
+   size_t vendor_sz;
+   size_t version_sz;
+@@ -91,6 +92,7 @@
+   size_t opencl_c_version_sz;
+   size_t extensions_sz;
+   size_t driver_version_sz;
++  size_t built_in_kernels_sz;
+   /* Kernel specific info that we're assigning statically */
+   size_t wg_sz;
+   size_t compile_wg_sz[3];
+Index: beignet-0.1+git20130514+19e9c58/src/cl_gt_device.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_gt_device.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_gt_device.h	2013-05-14 20:09:06.922022404 +0200
+@@ -72,6 +72,7 @@
+ DECL_INFO_STRING(profile, "FULL_PROFILE")
+ DECL_INFO_STRING(opencl_c_version, "OpenCL 1.10")
+ DECL_INFO_STRING(extensions, "")
++DECL_INFO_STRING(built_in_kernels, "")
+ DECL_INFO_STRING(driver_version, LIBCL_VERSION_STRING)
+ #undef DECL_INFO_STRING
+ 
diff --git a/debian/patches/0007-Correct-type-of-device-properties.patch b/debian/patches/0007-Correct-type-of-device-properties.patch
new file mode 100644
index 0000000..577bd5b
--- /dev/null
+++ b/debian/patches/0007-Correct-type-of-device-properties.patch
@@ -0,0 +1,34 @@
+From 5f6ccf7410d17c9e775ce30e8deb5036d6b79ab4 Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Mon, 13 May 2013 12:43:57 +0200
+Subject: [PATCH 07/15] Correct type of device properties
+To: beignet at lists.freedesktop.org
+
+ - CL_DEVICE_MAX_PARAMETER_SIZE is of type size_t
+ - CL_DEVICE_MAX_WORK_GROUP_SIZE is of type size_t
+---
+ src/cl_device_id.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/src/cl_device_id.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_device_id.h	2013-05-14 20:09:06.922022404 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_device_id.h	2013-05-14 20:09:10.686022236 +0200
+@@ -29,7 +29,7 @@
+   cl_uint  max_thread_per_unit;
+   cl_uint  max_work_item_dimensions;
+   size_t   max_work_item_sizes[3];
+-  cl_uint  max_work_group_size;
++  size_t   max_work_group_size;
+   cl_uint  preferred_vector_width_char;
+   cl_uint  preferred_vector_width_short;
+   cl_uint  preferred_vector_width_int;
+@@ -56,7 +56,7 @@
+   size_t   image3d_max_height;
+   size_t   image3d_max_depth;
+   cl_uint  max_samplers;
+-  cl_uint  max_parameter_size;
++  size_t   max_parameter_size;
+   cl_uint  mem_base_addr_align;
+   cl_uint  min_data_type_align_size;
+   cl_device_fp_config single_fp_config;
diff --git a/debian/patches/0008-Update-gitignore-files.patch b/debian/patches/0008-Update-gitignore-files.patch
new file mode 100644
index 0000000..1cdfdec
--- /dev/null
+++ b/debian/patches/0008-Update-gitignore-files.patch
@@ -0,0 +1,65 @@
+From 9555f0c6417c0af7f4a72084f28c40d63b1a93dc Mon Sep 17 00:00:00 2001
+From: Simon Richter <Simon.Richter at hogyros.de>
+Date: Mon, 13 May 2013 20:01:08 +0200
+Subject: [PATCH 08/15] Update gitignore files
+To: beignet at lists.freedesktop.org
+
+ - Ignore CMake built files
+ - Ignore .so files only in the subdirectory
+ - Ignore generated .bmp files
+ - Ignore generated config headers
+ - Ignore generated source for OCL
+---
+ .gitignore             |    5 ++++-
+ backend/src/.gitignore |    3 +++
+ src/.gitignore         |    2 ++
+ utests/.gitignore      |   13 +++++++++++++
+ 4 files changed, 22 insertions(+), 1 deletion(-)
+ create mode 100644 src/.gitignore
+ create mode 100644 utests/.gitignore
+
+Index: beignet-0.1+git20130514+19e9c58/.gitignore
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/.gitignore	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/.gitignore	2013-05-14 20:09:13.630022105 +0200
+@@ -1,2 +1,5 @@
+ *.o
+-*.so*
++CMakeCache.txt
++CMakeFiles/
++Makefile
++cmake_install.cmake
+Index: beignet-0.1+git20130514+19e9c58/backend/src/.gitignore
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/.gitignore	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/.gitignore	2013-05-14 20:09:13.630022105 +0200
+@@ -1 +1,4 @@
++GBEConfig.h
++libgbe.so
++ocl_common_defines_str.cpp
+ ocl_stdlib_str.cpp
+Index: beignet-0.1+git20130514+19e9c58/src/.gitignore
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/src/.gitignore	2013-05-14 20:09:13.630022105 +0200
+@@ -0,0 +1,2 @@
++OCLConfig.h
++libcl.so
+Index: beignet-0.1+git20130514+19e9c58/utests/.gitignore
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/utests/.gitignore	2013-05-14 20:09:13.630022105 +0200
+@@ -0,0 +1,13 @@
++compiler_box_blur.bmp
++compiler_box_blur_float.bmp
++compiler_clod.bmp
++compiler_julia.bmp
++compiler_julia_no_break.bmp
++compiler_mandelbrot.bmp
++compiler_mandelbrot_alternate.bmp
++compiler_menger_sponge_no_shadow.bmp
++compiler_nautilus.bmp
++compiler_ribbon.bmp
++flat_address_space
++libutests.so
++utest_run
diff --git a/debian/patches/0009-GBE-refine-the-sampler-implementation-to-comply-with.patch b/debian/patches/0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
new file mode 100644
index 0000000..361bc64
--- /dev/null
+++ b/debian/patches/0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
@@ -0,0 +1,418 @@
+From 951d7db26d4ae1919d52219677fcf36242b8330a Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:18 +0800
+Subject: [PATCH 09/15] GBE: refine the sampler implementation to comply with
+ spec.
+To: beignet at lists.freedesktop.org
+
+The previous implementation is to use a new address space pointer to
+represent a sampler. The reason is that there is no specified data
+type for sampler_t in LLVM front end thus we can't determine the
+sampler argument type if we use a normal interger to represnet the
+sampler. But that breaks the OCL spec, the spec allows the kernel
+to define and initialize sampler variables in kernel side.
+
+Now I use a little tricky way to fix this problem. First, I decide
+to use normal unsigned interger to represent sampler_t in kernel side.
+Then at compile time, I check read_imagexxx function's sampler
+arguments. If the argument is a constant value, then it should be a
+kernel side defined sampler, then I insert the sampler type into a
+global sampler set for the current kernel function. If the argument
+is not a constant value, then I will check whether it's a kernel
+argument, if it is, then I fix up the corresponding kernel arg type
+to SAMPLER there.
+
+To unify the kernel side defined sampler and kernel argument sampler,
+I add two new gbe API. To export all the kernel side defined sampler
+data and size to the runtime library. Then latter, the runtime library
+can use this information to append new sampler to the unified sampler
+buffer and bind all the sampler at one time.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/CMakeLists.txt            |    2 +
+ backend/src/backend/program.cpp       |   20 +++++++++-
+ backend/src/backend/program.h         |    8 ++++
+ backend/src/backend/program.hpp       |   11 ++++++
+ backend/src/ir/function.cpp           |    1 +
+ backend/src/ir/function.hpp           |   11 ++++++
+ backend/src/ir/sampler.cpp            |   46 ++++++++++++++++++++++
+ backend/src/ir/sampler.hpp            |   67 +++++++++++++++++++++++++++++++++
+ backend/src/llvm/llvm_gen_backend.cpp |   22 ++++++++++-
+ backend/src/ocl_stdlib.h              |    6 +--
+ 10 files changed, 189 insertions(+), 5 deletions(-)
+ create mode 100644 backend/src/ir/sampler.cpp
+ create mode 100644 backend/src/ir/sampler.hpp
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:09:16.362021983 +0200
+@@ -61,6 +61,8 @@
+     ir/unit.hpp
+     ir/constant.cpp
+     ir/constant.hpp
++    ir/sampler.cpp
++    ir/sampler.hpp
+     ir/instruction.cpp
+     ir/instruction.hpp
+     ir/liveness.cpp
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.cpp	2013-05-14 20:07:18.782027225 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp	2013-05-14 20:09:16.366021983 +0200
+@@ -49,10 +49,11 @@
+ namespace gbe {
+ 
+   Kernel::Kernel(const std::string &name) :
+-    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL)
++    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL)
+   {}
+   Kernel::~Kernel(void) {
+     if(ctx) GBE_DELETE(ctx);
++    if(samplerSet) GBE_DELETE(samplerSet);
+     GBE_SAFE_DELETE_ARRAY(args);
+   }
+   int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
+@@ -90,6 +91,7 @@
+     for (const auto &pair : set) {
+       const std::string &name = pair.first;
+       Kernel *kernel = this->compileKernel(unit, name);
++      kernel->setSamplerSet(pair.second->getSamplerSet());
+       kernels.insert(std::make_pair(name, kernel));
+     }
+     return true;
+@@ -250,6 +252,18 @@
+     return kernel->setConstBufSize(argID, sz);
+   }
+ 
++  static size_t kernelGetSamplerSize(gbe_kernel gbeKernel) {
++    if (gbeKernel == NULL) return 0;
++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
++    return kernel->getSamplerSize();
++  }
++
++  static void kernelGetSamplerData(gbe_kernel gbeKernel, uint32_t *samplers) {
++    if (gbeKernel == NULL) return;
++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
++    kernel->getSamplerData(samplers);
++  }
++
+   static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
+     return 0u;
+   }
+@@ -277,6 +291,8 @@
+ GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
+ GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
+ GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
++GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL;
++GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL;
+ 
+ namespace gbe
+ {
+@@ -304,6 +320,8 @@
+       gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
+       gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
+       gbe_kernel_use_slm = gbe::kernelUseSLM;
++      gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize;
++      gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
+       genSetupCallBacks();
+     }
+   };
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h	2013-05-14 20:09:16.366021983 +0200
+@@ -114,6 +114,14 @@
+ typedef void (gbe_program_get_global_constant_data_cb)(gbe_program gbeProgram, char *mem);
+ extern gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data;
+ 
++/*! Get the size of defined samplers */
++typedef size_t (gbe_kernel_get_sampler_size_cb)(gbe_kernel gbeKernel);
++extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size;
++
++/*! Get the content of defined samplers */
++typedef void (gbe_kernel_get_sampler_data_cb)(gbe_kernel gbeKernel, uint32_t *samplers);
++extern gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data;
++
+ /*! Destroy and deallocate the given program */
+ typedef void (gbe_program_delete_cb)(gbe_program);
+ extern gbe_program_delete_cb *gbe_program_delete;
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.hpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp	2013-05-14 20:09:16.366021983 +0200
+@@ -29,6 +29,8 @@
+ #include "backend/context.hpp"
+ #include "ir/constant.hpp"
+ #include "ir/unit.hpp"
++#include "ir/function.hpp"
++#include "ir/sampler.hpp"
+ #include "sys/hash_map.hpp"
+ #include "sys/vector.hpp"
+ #include <string>
+@@ -108,6 +110,14 @@
+       }
+       return -1;
+     }
++    /*! Set sampler set. */
++    void setSamplerSet(ir::SamplerSet *from) {
++      samplerSet = from;
++    }
++    /*! Get defined sampler size */
++    size_t getSamplerSize(void) const { return samplerSet->getDataSize(); }
++    /*! Get defined sampler value array */
++    void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); }
+   protected:
+     friend class Context;      //!< Owns the kernels
+     const std::string name;    //!< Kernel name
+@@ -119,6 +129,7 @@
+     uint32_t stackSize;        //!< Stack size (may be 0 if unused)
+     bool useSLM;               //!< SLM requires a special HW config
+     Context *ctx;              //!< Save context after compiler to alloc constant buffer curbe
++    ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
+     GBE_CLASS(Kernel);         //!< Use custom allocators
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp	2013-05-14 20:09:16.366021983 +0200
+@@ -46,6 +46,7 @@
+     name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false)
+   {
+     initProfile(*this);
++    samplerSet = GBE_NEW(SamplerSet);
+   }
+ 
+   Function::~Function(void) {
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:16.366021983 +0200
+@@ -28,6 +28,7 @@
+ #include "ir/register.hpp"
+ #include "ir/instruction.hpp"
+ #include "ir/profile.hpp"
++#include "ir/sampler.hpp"
+ #include "sys/vector.hpp"
+ #include "sys/set.hpp"
+ #include "sys/map.hpp"
+@@ -217,6 +218,12 @@
+       for (auto arg : args) if (arg->reg == reg) return arg;
+       return NULL;
+     }
++
++    INLINE FunctionArgument *getArg(const Register &reg) {
++      for (auto arg : args) if (arg->reg == reg) return arg;
++      return NULL;
++    }
++
+     /*! Get output register */
+     INLINE Register getOutput(uint32_t ID) const { return outputs[ID]; }
+     /*! Get the argument location for the pushed register */
+@@ -281,6 +288,9 @@
+     INLINE bool getUseSLM(void) const { return this->useSLM; }
+     /*! Change the SLM config for the function */
+     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
++    /*! Get sampler set in this function */
++    SamplerSet* getSamplerSet(void) {return samplerSet; }
++    //const SamplerSet& getSamplerSet(void) const {return samplerSet; }
+   private:
+     friend class Context;           //!< Can freely modify a function
+     std::string name;               //!< Function name
+@@ -296,6 +306,7 @@
+     LocationMap locationMap;        //!< Pushed function arguments (loc->reg)
+     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
+     bool useSLM;                    //!< Is SLM required?
++    SamplerSet *samplerSet;
+     GBE_CLASS(Function);            //!< Use custom allocator
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:16.366021983 +0200
+@@ -0,0 +1,46 @@
++/*
++ * Copyright © 2012 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++
++/**
++ * \file sampler.cpp
++ *
++ */
++#include "sampler.hpp"
++#include "context.hpp"
++
++namespace gbe {
++namespace ir {
++
++  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
++  {
++    int i = 0;
++
++    for(auto it = regMap.begin();
++        it != regMap.end(); ++it, ++i)
++    {
++      if (it->first == samplerValue)
++        return it->second;
++    }
++    Register reg = ctx->reg(FAMILY_DWORD);
++    ctx->LOADI(ir::TYPE_S32, reg, ctx->newIntegerImmediate(i, ir::TYPE_S32));
++    regMap.insert(std::make_pair(samplerValue, reg));
++    return reg;
++  }
++
++} /* namespace ir */
++} /* namespace gbe */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp	2013-05-14 20:09:16.366021983 +0200
+@@ -0,0 +1,67 @@
++/*
++ * Copyright © 2012 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++
++/**
++ * \file sampler.hpp
++ *
++ * \author Benjamin Segovia <benjamin.segovia at intel.com>
++ */
++#ifndef __GBE_IR_SAMPLER_HPP__
++#define __GBE_IR_SAMPLER_HPP__
++
++#include "ir/register.hpp"
++#include "sys/map.hpp"
++
++
++namespace gbe {
++namespace ir {
++
++  /*! A sampler set is a set of global samplers which are defined as constant global
++   * sampler or defined in the outermost kernel scope variables. According to the spec
++   * all the variable should have a initialized integer value and can't be modified.
++   */
++  class Context;
++
++  class SamplerSet
++  {
++  public:
++    /*! Append the specified sampler and return the allocated offset.
++     *  If the speficied sampler is exist, only return the previous offset and
++     *  don't append it again. Return -1, if failed.*/
++    Register append(uint32_t clkSamplerValue, Context *ctx);
++    size_t getDataSize(void) { return regMap.size(); }
++    size_t getDataSize(void) const { return regMap.size(); }
++    void getData(uint32_t *samplers) const {
++      for ( auto &it : regMap)
++        *samplers++ = it.first;
++    }
++
++    void operator = (const SamplerSet& other) {
++      regMap.insert(other.regMap.begin(), other.regMap.end());
++    }
++
++    SamplerSet(const SamplerSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
++    SamplerSet() {}
++  private:
++    map<uint32_t, Register> regMap;
++    GBE_CLASS(SamplerSet);
++  };
++} /* namespace ir */
++} /* namespace gbe */
++
++#endif /* __GBE_IR_SAMPLER_HPP__ */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:16.370021983 +0200
+@@ -1990,7 +1990,27 @@
+           case GEN_OCL_READ_IMAGE15:
+           {
+             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
+-            GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
++            GBE_ASSERT(AI != AE);
++            Constant *CPV = dyn_cast<Constant>(*AI);
++            ir::Register sampler;
++            if (CPV != NULL)
++            {
++              // This is not a kernel argument sampler, we need to append it to sampler set,
++              // and allocate a sampler slot for it.
++               auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
++               GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
++               sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
++            } else {
++              // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
++              // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
++              // work around.
++              sampler = this->getRegister(*AI);
++              ir::FunctionArgument *arg =  ctx.getFunction().getArg(sampler);
++              GBE_ASSERT(arg != NULL);
++              arg->type = ir::FunctionArgument::SAMPLER;
++            }
++            ++AI;
++
+             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
+             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+             ir::Register wcoord;
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:09:02.546022599 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:16.370021983 +0200
+@@ -46,7 +46,6 @@
+ #define __constant __attribute__((address_space(2)))
+ #define __local __attribute__((address_space(3)))
+ #define __texture __attribute__((address_space(4)))
+-#define __sampler __attribute__((address_space(5)))
+ #define global __global
+ //#define local __local
+ #define constant __constant
+@@ -77,7 +76,8 @@
+ typedef __texture struct _image2d_t* image2d_t;
+ struct _image3d_t;
+ typedef __texture struct _image3d_t* image3d_t;
+-typedef __sampler uint* sampler_t;
++//typedef __sampler const uint* sampler_t;
++typedef uint sampler_t;
+ typedef size_t event_t;
+ 
+ /////////////////////////////////////////////////////////////////////////////
+@@ -3966,7 +3966,7 @@
+   INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, sampler_t sampler, coord_type coord) \
+   {\
+     GET_IMAGE(cl_image, surface_id);\
+-    return __gen_ocl_read_image ##suffix(surface_id, (uint)sampler, coord.s0, coord.s1);\
++    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord.s0, coord.s1);\
+   }
+ 
+ #define DECL_WRITE_IMAGE(type, suffix, coord_type) \
diff --git a/debian/patches/0010-CL-Support-kernel-side-defined-samplers.patch b/debian/patches/0010-CL-Support-kernel-side-defined-samplers.patch
new file mode 100644
index 0000000..302980d
--- /dev/null
+++ b/debian/patches/0010-CL-Support-kernel-side-defined-samplers.patch
@@ -0,0 +1,378 @@
+From 63a49c9c392dc31802167d88a3b309f646977903 Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:19 +0800
+Subject: [PATCH 10/15] CL: Support kernel side defined samplers.
+To: beignet at lists.freedesktop.org
+
+We changed the way to handle samplers. We gather all the kernel side
+defined samplers and those sampler in kernel argument into one samplers
+array. And don't allocate one single sampler each time.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ src/cl_command_queue.c      |    4 ++--
+ src/cl_command_queue_gen7.c |    2 ++
+ src/cl_driver.h             |    6 +++---
+ src/cl_driver_defs.c        |    2 +-
+ src/cl_kernel.c             |   36 ++++++++++++++++++++++-----------
+ src/cl_kernel.h             |    3 +++
+ src/cl_sampler.c            |   43 ++++++++++++++++++++++++++++++++++++++++
+ src/cl_sampler.h            |    5 +++++
+ src/intel/intel_gpgpu.c     |   46 ++++++++++++++++++++-----------------------
+ 9 files changed, 104 insertions(+), 43 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:19.978021822 +0200
+@@ -119,8 +119,6 @@
+                           k->args[i].mem->w, k->args[i].mem->h,
+                           k->args[i].mem->pitch, k->args[i].mem->tiling);
+     } else if (arg_type == GBE_ARG_SAMPLER) {
+-      uint32_t *curbe_index = (uint32_t*)(k->curbe + offset);
+-      cl_gpgpu_insert_sampler(queue->gpgpu, curbe_index, k->args[i].sampler);
+     } else
+       cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+   }
+@@ -379,6 +377,8 @@
+   else
+     FATAL ("Unknown Gen Device");
+ 
++  k->arg_sampler_sz = 0;
++
+ #if USE_FULSIM
+   if (run_it != NULL && strcmp(run_it, "1") == 0) {
+     TRY (cl_fulsim_dump_all_surfaces, queue, k);
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:19.978021822 +0200
+@@ -224,6 +224,8 @@
+ 
+   /* Bind user buffers */
+   cl_command_queue_bind_surface(queue, ker);
++  /* Bind all samplers */
++  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->arg_sampler_sz + ker->sampler_sz);
+ 
+   /* Bind a stack if needed */
+   cl_bind_stack(gpgpu, ker);
+Index: beignet-0.1+git20130514+19e9c58/src/cl_driver.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_driver.h	2013-05-14 20:09:19.978021822 +0200
+@@ -110,9 +110,9 @@
+ typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint);
+ extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
+ 
+-/* Insert a sampler */
+-typedef void (cl_gpgpu_insert_sampler_cb)(cl_gpgpu, uint32_t *curbe_index, cl_sampler sampler);
+-extern cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler;
++/* bind samplers defined in both kernel and kernel args. */
++typedef void (cl_gpgpu_bind_sampler_cb)(cl_gpgpu, uint32_t *samplers, size_t sampler_sz);
++extern cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler;
+ 
+ /* Set a 2d texture */
+ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
+Index: beignet-0.1+git20130514+19e9c58/src/cl_driver_defs.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver_defs.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_driver_defs.c	2013-05-14 20:09:19.978021822 +0200
+@@ -59,5 +59,5 @@
+ LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL;
+ LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL;
+ LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
+-LOCAL cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler = NULL;
++LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL;
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:19.978021822 +0200
+@@ -110,6 +110,7 @@
+   if (arg_type == GBE_ARG_VALUE) {
+     if (UNLIKELY(value == NULL))
+       return CL_INVALID_KERNEL_ARGS;
++
+     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+     assert(offset + sz <= k->curbe_sz);
+     memcpy(k->curbe + offset, value, sz);
+@@ -129,20 +130,22 @@
+     return CL_SUCCESS;
+   }
+ 
+-  /* For a sampler*/
++  /* Is it a sampler*/
+   if (arg_type == GBE_ARG_SAMPLER) {
+-     cl_sampler sampler;
+-     if (UNLIKELY(value == NULL))
++    cl_sampler sampler;
++    memcpy(&sampler, value, sz);
++    if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
+       return CL_INVALID_KERNEL_ARGS;
+-     sampler = *(cl_sampler*)value;
+-
+-     if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
+-       return CL_INVALID_ARG_VALUE;
+-     k->args[index].local_sz = 0;
+-     k->args[index].is_set = 1;
+-     k->args[index].mem = NULL;
+-     k->args[index].sampler = sampler;
+-     return CL_SUCCESS;
++    uint32_t slot;
++    k->args[index].local_sz = 0;
++    k->args[index].is_set = 1;
++    k->args[index].mem = NULL;
++    k->args[index].sampler = sampler;
++    slot = cl_arg_sampler_insert(k, sampler);
++    offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
++    assert(offset + sz <= k->curbe_sz);
++    memcpy(k->curbe + offset, &slot, sizeof(slot));
++    return CL_SUCCESS;
+   }
+ 
+   /* Otherwise, we just need to check that this is a buffer */
+@@ -203,6 +206,12 @@
+ 
+   /* Create the curbe */
+   k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque);
++
++  /* Get sampler data & size */
++  k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque);
++  k->arg_sampler_sz = 0;
++  assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
++  gbe_kernel_get_sampler_data(k->opaque, k->samplers);
+ }
+ 
+ LOCAL cl_kernel
+@@ -221,6 +230,9 @@
+   to->program = from->program;
+   to->arg_n = from->arg_n;
+   to->curbe_sz = from->curbe_sz;
++  to->sampler_sz = from->sampler_sz;
++  to->arg_sampler_sz = from->arg_sampler_sz;
++  memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
+   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
+   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:19.978021822 +0200
+@@ -52,7 +52,10 @@
+   gbe_kernel opaque;          /* (Opaque) compiler structure for the OCL kernel */
+   char *curbe;                /* One curbe per kernel */
+   size_t curbe_sz;            /* Size of it */
++  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel */
++  size_t sampler_sz;          /* sampler size defined in kernel */
+   cl_argument *args;          /* To track argument setting */
++  size_t arg_sampler_sz;      /* sampler size defined in kernel args */
+   uint32_t arg_n:31;          /* Number of arguments */
+   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
+ };
+Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.c	2013-05-14 20:09:19.978021822 +0200
+@@ -22,9 +22,50 @@
+ #include "cl_utils.h"
+ #include "cl_alloc.h"
+ #include "cl_khr_icd.h"
++#include "cl_kernel.h"
+ 
+ #include <assert.h>
+ 
++uint32_t cl_to_clk(cl_bool normalized_coords,
++                   cl_addressing_mode address,
++                   cl_filter_mode filter)
++{
++  int clk_address;
++  int clk_filter;
++  switch (address) {
++  case CL_ADDRESS_NONE: clk_address = CLK_ADDRESS_NONE; break;
++  case CL_ADDRESS_CLAMP: clk_address = CLK_ADDRESS_CLAMP; break;
++  case CL_ADDRESS_CLAMP_TO_EDGE: clk_address = CLK_ADDRESS_CLAMP_TO_EDGE; break;
++  case CL_ADDRESS_REPEAT: clk_address = CLK_ADDRESS_REPEAT; break;
++  case CL_ADDRESS_MIRRORED_REPEAT: clk_address = CLK_ADDRESS_MIRRORED_REPEAT; break;
++  default:
++    assert(0);
++  }
++  switch(filter) {
++  case CL_FILTER_NEAREST: clk_filter = CLK_FILTER_NEAREST; break;
++  case CL_FILTER_LINEAR: clk_filter = CLK_FILTER_LINEAR; break;
++  default:
++    assert(0);
++  }
++  return (clk_address << __CLK_ADDRESS_BASE)
++         | (normalized_coords << __CLK_NORMALIZED_BASE)
++         | (clk_filter << __CLK_FILTER_BASE);
++}
++
++int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler)
++{
++  int i, slot_id;
++  for(i = 0; i < k->sampler_sz; i++)
++  {
++    if (k->samplers[i] == sampler->clkSamplerValue)
++      return i;
++  }
++  slot_id = k->sampler_sz + k->arg_sampler_sz;
++  k->samplers[slot_id] = sampler->clkSamplerValue;
++  k->arg_sampler_sz++;
++  return slot_id;
++}
++
+ LOCAL cl_sampler
+ cl_sampler_new(cl_context ctx,
+                cl_bool normalized_coords,
+@@ -54,6 +95,8 @@
+   sampler->ctx = ctx;
+   cl_context_add_ref(ctx);
+ 
++  sampler->clkSamplerValue = cl_to_clk(normalized_coords, address, filter);
++
+ exit:
+   if (errcode_ret)
+     *errcode_ret = err;
+Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.h	2013-05-14 20:09:19.978021822 +0200
+@@ -21,6 +21,7 @@
+ #define __CL_SAMPLER_H__
+ 
+ #include "CL/cl.h"
++#include "../backend/src/ocl_common_defines.h"
+ #include <stdint.h>
+ 
+ /* How to access images */
+@@ -33,6 +34,7 @@
+   cl_bool normalized_coords; /* Are coordinates normalized? */
+   cl_addressing_mode address;/* CLAMP / REPEAT and so on... */
+   cl_filter_mode filter;     /* LINEAR / NEAREST mostly */
++  uint32_t clkSamplerValue;
+ };
+ 
+ /* Create a new sampler object */
+@@ -48,5 +50,8 @@
+ /* Add one more reference to this object */
+ extern void cl_sampler_add_ref(cl_sampler);
+ 
++/* insert a new argument sampler */
++int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler);
++
+ #endif /* __CL_SAMPLER_H__ */
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:19.982021822 +0200
+@@ -471,16 +471,6 @@
+ }
+ 
+ static int
+-intel_gpgpu_get_free_sampler_index(intel_gpgpu_t *gpgpu)
+-{
+-  int slot;
+-  assert(~gpgpu->sampler_bitmap != 0);
+-  slot = __fls(~gpgpu->sampler_bitmap);
+-  gpgpu->sampler_bitmap |= (1 << slot);
+-  return slot;
+-}
+-
+-static int
+ intel_get_surface_type(cl_mem_object_type type)
+ {
+   switch (type) {
+@@ -662,10 +652,10 @@
+ int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
+ {
+    switch( cl_address_mode ) {
+-   case CL_ADDRESS_NONE:
+-   case CL_ADDRESS_REPEAT:
++   case CLK_ADDRESS_NONE:
++   case CLK_ADDRESS_REPEAT:
+       return GEN_TEXCOORDMODE_WRAP;
+-   case CL_ADDRESS_CLAMP:
++   case CLK_ADDRESS_CLAMP:
+       /* GL_CLAMP is the weird mode where coordinates are clamped to
+        * [0.0, 1.0], so linear filtering of coordinates outside of
+        * [0.0, 1.0] give you half edge texel value and half border
+@@ -679,9 +669,9 @@
+          return GEN_TEXCOORDMODE_CLAMP;
+       else
+          return GEN_TEXCOORDMODE_CLAMP_BORDER;
+-   case CL_ADDRESS_CLAMP_TO_EDGE:
++   case CLK_ADDRESS_CLAMP_TO_EDGE:
+       return GEN_TEXCOORDMODE_CLAMP;
+-   case CL_ADDRESS_MIRRORED_REPEAT:
++   case CLK_ADDRESS_MIRRORED_REPEAT:
+       return GEN_TEXCOORDMODE_MIRROR;
+    default:
+       return GEN_TEXCOORDMODE_WRAP;
+@@ -689,35 +679,33 @@
+ }
+ 
+ static void
+-intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t *curbe_index, cl_sampler cl_sampler)
++intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler)
+ {
+-  int index;
+   int using_nearest = 0;
+   uint32_t wrap_mode;
+   gen7_sampler_state_t *sampler;
+ 
+-  index = intel_gpgpu_get_free_sampler_index(gpgpu);
+   sampler = (gen7_sampler_state_t *)gpgpu->sampler_state_b.bo->virtual + index;
+-  if (!cl_sampler->normalized_coords)
++  if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
+     sampler->ss3.non_normalized_coord = 1;
+   else
+     sampler->ss3.non_normalized_coord = 0;
+ 
+-  switch (cl_sampler->filter) {
+-  case CL_FILTER_NEAREST:
++  switch (clk_sampler & __CLK_FILTER_MASK) {
++  case CLK_FILTER_NEAREST:
+     sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST;
+     sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+     sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST;
+     using_nearest = 1;
+     break;
+-  case CL_FILTER_LINEAR:
++  case CLK_FILTER_LINEAR:
+     sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR;
+     sampler->ss0.mip_filter = GEN_MIPFILTER_NONE;
+     sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR;
+     break;
+   }
+ 
+-  wrap_mode = translate_wrap_mode(cl_sampler->address, using_nearest);
++  wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
+   sampler->ss3.r_wrap_mode = wrap_mode;
+   sampler->ss3.s_wrap_mode = wrap_mode;
+   sampler->ss3.t_wrap_mode = wrap_mode;
+@@ -738,7 +726,15 @@
+      sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
+                                    GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
+                                    GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
+-  *curbe_index = index;
++}
++
++static void
++intel_gpgpu_bind_sampler(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz)
++{
++  int index;
++  assert(sampler_sz <= GEN_MAX_SAMPLERS);
++  for(index = 0; index < sampler_sz; index++)
++    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index]);
+ }
+ 
+ static void
+@@ -815,6 +811,6 @@
+   cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end;
+   cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush;
+   cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
+-  cl_gpgpu_insert_sampler = (cl_gpgpu_insert_sampler_cb *) intel_gpgpu_insert_sampler;
++  cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler;
+ }
+ 
diff --git a/debian/patches/0011-utests-Add-one-test-cases-for-sampler-support.patch b/debian/patches/0011-utests-Add-one-test-cases-for-sampler-support.patch
new file mode 100644
index 0000000..48a8ef6
--- /dev/null
+++ b/debian/patches/0011-utests-Add-one-test-cases-for-sampler-support.patch
@@ -0,0 +1,150 @@
+From b4dc8376a24cc4e0bfacdb57ca3a1da87c8c90f4 Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:20 +0800
+Subject: [PATCH 11/15] utests: Add one test cases for sampler support.
+To: beignet at lists.freedesktop.org
+
+This new case tests define sampler in kernel side and in the
+kernel argument.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ kernels/test_copy_image1.cl     |   33 +++++++++++++++++
+ utests/CMakeLists.txt           |    1 +
+ utests/compiler_copy_image1.cpp |   77 +++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 111 insertions(+)
+ create mode 100644 kernels/test_copy_image1.cl
+ create mode 100644 utests/compiler_copy_image1.cpp
+
+Index: beignet-0.1+git20130514+19e9c58/kernels/test_copy_image1.cl
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/kernels/test_copy_image1.cl	2013-05-14 20:09:21.802021741 +0200
+@@ -0,0 +1,33 @@
++#define S(A,B,C) CLK_NORMALIZED_COORDS_##A | CLK_ADDRESS_##B | CLK_FILTER_##C
++
++#define COPY_IMAGE(_dst, _sampler, scoord, dcoord) \
++  color = read_imagei(src, _sampler, scoord);\
++  write_imagei(_dst, dcoord, color)
++
++__kernel void
++test_copy_image1(__read_only image2d_t src,
++                 __write_only image2d_t dst0,
++                 sampler_t sampler0,
++                 __write_only image2d_t dst1,
++                 __write_only image2d_t dst2,
++                 __write_only image2d_t dst3,
++                 __write_only image2d_t dst4,
++                 float w_inv, float h_inv)
++{
++  const sampler_t sampler1 = S(FALSE, REPEAT, NEAREST);
++  const sampler_t sampler2 = S(FALSE, CLAMP, NEAREST);
++  const sampler_t sampler3 = S(FALSE, MIRRORED_REPEAT, NEAREST);
++  const sampler_t sampler4 = S(TRUE, REPEAT, NEAREST);
++  int2 coord;
++  float2 fcoord;
++  int4 color;
++  coord.x = (int)get_global_id(0);
++  coord.y = (int)get_global_id(1);
++  fcoord.x = coord.x * w_inv;
++  fcoord.y = coord.y * h_inv;
++  COPY_IMAGE(dst0, sampler0, coord, coord);
++  COPY_IMAGE(dst1, sampler1, coord, coord);
++  COPY_IMAGE(dst2, sampler2, coord, coord);
++  COPY_IMAGE(dst3, sampler3, coord, coord);
++  COPY_IMAGE(dst4, sampler4, fcoord, coord);
++}
+Index: beignet-0.1+git20130514+19e9c58/utests/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/utests/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/utests/CMakeLists.txt	2013-05-14 20:09:21.802021741 +0200
+@@ -74,6 +74,7 @@
+   compiler_local_memory_barrier_wg64.cpp
+   compiler_movforphi_undef.cpp
+   compiler_volatile.cpp
++  compiler_copy_image1.cpp
+   runtime_createcontext.cpp
+   utest_assert.cpp
+   utest.cpp
+Index: beignet-0.1+git20130514+19e9c58/utests/compiler_copy_image1.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/utests/compiler_copy_image1.cpp	2013-05-14 20:09:21.802021741 +0200
+@@ -0,0 +1,77 @@
++#include "utest_helper.hpp"
++
++static void compiler_copy_image1(void)
++{
++  const size_t w = 512;
++  const size_t h = 512;
++  cl_image_format format;
++  cl_image_desc desc;
++  cl_sampler sampler;
++
++  // Setup kernel and images
++  OCL_CREATE_KERNEL("test_copy_image1");
++  buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h);
++  for (uint32_t j = 0; j < h; ++j)
++    for (uint32_t i = 0; i < w; i++)
++      ((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
++
++  format.image_channel_order = CL_RGBA;
++  format.image_channel_data_type = CL_UNSIGNED_INT8;
++  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
++  desc.image_width = w;
++  desc.image_height = h;
++  desc.image_row_pitch = w * sizeof(uint32_t);
++  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
++  OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
++
++  desc.image_row_pitch = 0;
++  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
++  OCL_CREATE_IMAGE(buf[2], 0, &format, &desc, NULL);
++  OCL_CREATE_IMAGE(buf[3], 0, &format, &desc, NULL);
++  OCL_CREATE_IMAGE(buf[4], 0, &format, &desc, NULL);
++  OCL_CREATE_IMAGE(buf[5], 0, &format, &desc, NULL);
++  free(buf_data[0]);
++  buf_data[0] = NULL;
++
++  // Run the kernel
++  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
++  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
++  OCL_SET_ARG(2, sizeof(sampler), &sampler);
++  OCL_SET_ARG(3, sizeof(cl_mem), &buf[2]);
++  OCL_SET_ARG(4, sizeof(cl_mem), &buf[3]);
++  OCL_SET_ARG(5, sizeof(cl_mem), &buf[4]);
++  OCL_SET_ARG(6, sizeof(cl_mem), &buf[5]);
++  float w_inv = 1.0/w;
++  float h_inv = 1.0/h;
++  OCL_SET_ARG(7, sizeof(float), &w_inv);
++  OCL_SET_ARG(8, sizeof(float), &h_inv);
++
++  globals[0] = w;
++  globals[1] = h;
++  locals[0] = 16;
++  locals[1] = 16;
++  OCL_NDRANGE(2);
++
++  // Check result
++  OCL_MAP_BUFFER(0);
++  OCL_MAP_BUFFER(1);
++  OCL_MAP_BUFFER(2);
++  OCL_MAP_BUFFER(3);
++  OCL_MAP_BUFFER(4);
++  OCL_MAP_BUFFER(5);
++
++  for(uint32_t k = 0; k < 5; k++)
++  {
++    for (uint32_t j = 0; j < h; ++j)
++      for (uint32_t i = 0; i < w; i++)
++        OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == ((uint32_t*)buf_data[1 + k])[j * w + i]);
++  }
++  OCL_UNMAP_BUFFER(0);
++  OCL_UNMAP_BUFFER(1);
++  OCL_UNMAP_BUFFER(2);
++  OCL_UNMAP_BUFFER(3);
++  OCL_UNMAP_BUFFER(4);
++  OCL_UNMAP_BUFFER(5);
++}
++
++MAKE_UTEST_FROM_FUNCTION(compiler_copy_image1);
diff --git a/debian/patches/0012-GBE-remove-sampler-address-space.patch b/debian/patches/0012-GBE-remove-sampler-address-space.patch
new file mode 100644
index 0000000..899b141
--- /dev/null
+++ b/debian/patches/0012-GBE-remove-sampler-address-space.patch
@@ -0,0 +1,74 @@
+From a87eca304c690096511c4db9b6cddf2544ab6d3f Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:21 +0800
+Subject: [PATCH 12/15] GBE: remove sampler address space.
+To: beignet at lists.freedesktop.org
+
+As now sampler_t is a normal integer data type, we don't
+need the sampler address space any more.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/ir/instruction.cpp        |    1 -
+ backend/src/ir/instruction.hpp        |    1 -
+ backend/src/llvm/llvm_gen_backend.cpp |    3 ---
+ backend/src/ocl_stdlib.h              |    1 -
+ 4 files changed, 6 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.cpp	2013-05-14 20:07:24.282026980 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp	2013-05-14 20:09:23.482021666 +0200
+@@ -915,7 +915,6 @@
+       case MEM_CONSTANT: return out << "constant";
+       case MEM_PRIVATE: return out << "private";
+       case IMAGE: return out << "image";
+-      case SAMPLER: return out << "sampler";
+       case MEM_INVALID: return out << "invalid";
+     };
+     return out;
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.hpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp	2013-05-14 20:09:23.482021666 +0200
+@@ -50,7 +50,6 @@
+     MEM_CONSTANT,   //!< Immutable global memory
+     MEM_PRIVATE,    //!< Per thread private memory
+     IMAGE,          //!< For texture image.
+-    SAMPLER,        //!< For sampler.
+     MEM_INVALID
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:16.370021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:23.486021665 +0200
+@@ -256,7 +256,6 @@
+       case 2: return ir::MEM_CONSTANT;
+       case 3: return ir::MEM_LOCAL;
+       case 4: return ir::IMAGE;
+-      case 5: return ir::SAMPLER;
+     }
+     GBE_ASSERT(false);
+     return ir::MEM_GLOBAL;
+@@ -916,8 +915,6 @@
+               case ir::IMAGE:
+                 ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
+               break;
+-              case ir::SAMPLER:
+-                ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, ptrSize);
+               break;
+               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
+             }
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_stdlib.h	2013-05-14 20:09:16.370021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_stdlib.h	2013-05-14 20:09:23.486021665 +0200
+@@ -76,7 +76,6 @@
+ typedef __texture struct _image2d_t* image2d_t;
+ struct _image3d_t;
+ typedef __texture struct _image3d_t* image3d_t;
+-//typedef __sampler const uint* sampler_t;
+ typedef uint sampler_t;
+ typedef size_t event_t;
+ 
diff --git a/debian/patches/0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch b/debian/patches/0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
new file mode 100644
index 0000000..8b04c61
--- /dev/null
+++ b/debian/patches/0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
@@ -0,0 +1,41 @@
+From 08ee5ccdb12d2ffe6afe23532f557e758c2dd8ec Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:22 +0800
+Subject: [PATCH 13/15] GBE: add scalar register support in
+ loadImmInstruction.
+To: beignet at lists.freedesktop.org
+
+There is a slight possibility that the destination register
+is a scalar register. We need to check it here.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/backend/gen_insn_selection.cpp |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_insn_selection.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:25.126021592 +0200
+@@ -1546,6 +1546,13 @@
+       const Immediate imm = insn.getImmediate();
+       const GenRegister dst = sel.selReg(insn.getDst(0), type);
+ 
++      sel.push();
++      if (sel.isScalarOrBool(insn.getDst(0)) == true) {
++        sel.curr.execWidth = 1;
++        sel.curr.predicate = GEN_PREDICATE_NONE;
++        sel.curr.noMask = 1;
++      }
++
+       switch (type) {
+         case TYPE_U32:
+         case TYPE_S32:
+@@ -1559,6 +1566,7 @@
+         case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
+         default: NOT_SUPPORTED;
+       }
++      sel.pop();
+       return true;
+     }
+ 
diff --git a/debian/patches/0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch b/debian/patches/0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
new file mode 100644
index 0000000..d5b838e
--- /dev/null
+++ b/debian/patches/0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
@@ -0,0 +1,407 @@
+From 832e548e52983eed1b84cb9b605f56492626e28b Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:23 +0800
+Subject: [PATCH 14/15] GBE: concentrate all samplers' allocation at compile
+ time.
+To: beignet at lists.freedesktop.org
+
+This is the first step to do image/sampler allocation fully
+at compile time. Thus we can determine all the sampler id and image
+bti index at compile time. So it can make the following things
+easier or faster:
+
+1. After we finish both image/sampler, we can treat all image bti and sampler
+   as constant and can get their value when we encode the Sampler and TypedWrite
+   instructions. Then we don't need to compute the message header at runtime which
+   cost 3 instructions each call.
+
+2. get image width/height/depth. As we know the surface bti at compile time,
+   we can put those data at specified curbe entry and generate correct indirect
+   register access to get those information at compile time.
+
+This is the first step. And just finish the sampler part. Now all the
+samplers including those defeined in kernel arguments will be allocated
+at compile time. At runtime, it just need to fill in the sampler value
+into the proper slot which map to the specified input argument. Then the
+driver will create and bind the sampler to the correct slot.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/ir/function.hpp           |   17 +++++++++--
+ backend/src/ir/sampler.cpp            |   53 ++++++++++++++++++++++++++-------
+ backend/src/ir/sampler.hpp            |   25 +++++++++++-----
+ backend/src/llvm/llvm_gen_backend.cpp |    7 +----
+ backend/src/ocl_common_defines.h      |    9 ++++--
+ src/cl_command_queue.c                |    2 --
+ src/cl_command_queue_gen7.c           |    2 +-
+ src/cl_kernel.c                       |   14 ++++-----
+ src/cl_kernel.h                       |    5 ++--
+ src/cl_sampler.c                      |   22 ++++++++------
+ src/cl_sampler.h                      |    4 +--
+ src/intel/intel_gpgpu.c               |    2 +-
+ 12 files changed, 106 insertions(+), 56 deletions(-)
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:26.818021517 +0200
+@@ -197,6 +197,18 @@
+       GBE_ASSERT(args[ID] != NULL);
+       return *args[ID];
+     }
++
++    /*! Get arg ID. */
++    INLINE int32_t getArgID(FunctionArgument *requestArg) {
++      for (uint32_t ID = 0; ID < args.size(); ID++)
++      {
++        if ( args[ID] == requestArg )
++          return ID;
++      }
++      GBE_ASSERTM(0, "Failed to get a valid argument ID.");
++      return -1;
++    }
++
+     /*! Get the number of pushed registers */
+     INLINE uint32_t pushedNum(void) const { return pushMap.size(); }
+     /*! Get the pushed data location for the given register */
+@@ -289,8 +301,7 @@
+     /*! Change the SLM config for the function */
+     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
+     /*! Get sampler set in this function */
+-    SamplerSet* getSamplerSet(void) {return samplerSet; }
+-    //const SamplerSet& getSamplerSet(void) const {return samplerSet; }
++    SamplerSet* getSamplerSet(void) const {return samplerSet; }
+   private:
+     friend class Context;           //!< Can freely modify a function
+     std::string name;               //!< Function name
+@@ -306,7 +317,7 @@
+     LocationMap locationMap;        //!< Pushed function arguments (loc->reg)
+     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
+     bool useSLM;                    //!< Is SLM required?
+-    SamplerSet *samplerSet;
++    SamplerSet *samplerSet;          //!< samplers used in this function.
+     GBE_CLASS(Function);            //!< Use custom allocator
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.cpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:26.818021517 +0200
+@@ -22,25 +22,58 @@
+  */
+ #include "sampler.hpp"
+ #include "context.hpp"
++#include "ocl_common_defines.h"
+ 
+ namespace gbe {
+ namespace ir {
+ 
+-  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
++  const uint32_t SamplerSet::getIdx(const Register reg) const
+   {
+-    int i = 0;
++    auto it = regMap.find(reg);
++    GBE_ASSERT(it != regMap.end());
++    return it->second.slot;
++  }
+ 
+-    for(auto it = regMap.begin();
+-        it != regMap.end(); ++it, ++i)
+-    {
+-      if (it->first == samplerValue)
+-        return it->second;
+-    }
++  void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) {
++    struct SamplerRegSlot samplerSlot;
++    // This register is just used as a key.
++    samplerSlot.reg = reg;
++    samplerSlot.slot = samplerMap.size();
++    samplerMap.insert(std::make_pair(key, samplerSlot));
++    regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot));
++    ctx->LOADI(ir::TYPE_S32, samplerSlot.reg, ctx->newIntegerImmediate(samplerSlot.slot, ir::TYPE_S32));
++  }
++
++  Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
++  {
++    auto it = samplerMap.find(samplerValue);
++    if (it != samplerMap.end())
++        return it->second.reg;
+     Register reg = ctx->reg(FAMILY_DWORD);
+-    ctx->LOADI(ir::TYPE_S32, reg, ctx->newIntegerImmediate(i, ir::TYPE_S32));
+-    regMap.insert(std::make_pair(samplerValue, reg));
++    appendReg(reg, samplerValue, ctx);
+     return reg;
+   }
+ 
++#define SAMPLER_ID(id) ((id << __CLK_SAMPLER_ARG_BASE) | __CLK_SAMPLER_ARG_KEY_BIT)
++  void SamplerSet::append(Register samplerReg, Context *ctx)
++  {
++    ir::FunctionArgument *arg =  ctx->getFunction().getArg(samplerReg);
++    GBE_ASSERT(arg != NULL);
++
++    // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
++    // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
++    // work around.
++    arg->type = ir::FunctionArgument::SAMPLER;
++    int32_t id = ctx->getFunction().getArgID(arg);
++    GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
++
++    auto it = samplerMap.find(SAMPLER_ID(id));
++    if (it != samplerMap.end()) {
++      GBE_ASSERT(it->second.reg == samplerReg);
++      return;
++    }
++    appendReg(samplerReg, SAMPLER_ID(id), ctx);
++  }
++
+ } /* namespace ir */
+ } /* namespace gbe */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.hpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.hpp	2013-05-14 20:09:26.818021517 +0200
+@@ -27,7 +27,6 @@
+ #include "ir/register.hpp"
+ #include "sys/map.hpp"
+ 
+-
+ namespace gbe {
+ namespace ir {
+ 
+@@ -37,6 +36,11 @@
+    */
+   class Context;
+ 
++  struct SamplerRegSlot {
++    Register reg;
++    uint32_t slot;
++  };
++
+   class SamplerSet
+   {
+   public:
+@@ -44,21 +48,28 @@
+      *  If the speficied sampler is exist, only return the previous offset and
+      *  don't append it again. Return -1, if failed.*/
+     Register append(uint32_t clkSamplerValue, Context *ctx);
+-    size_t getDataSize(void) { return regMap.size(); }
+-    size_t getDataSize(void) const { return regMap.size(); }
++    /*! Append a sampler defined in kernel args. */
++    void append(Register samplerArg, Context *ctx);
++    /*! Get the sampler idx (actual location) */
++    const uint32_t getIdx(const Register reg) const;
++    size_t getDataSize(void) { return samplerMap.size(); }
++    size_t getDataSize(void) const { return samplerMap.size(); }
+     void getData(uint32_t *samplers) const {
+-      for ( auto &it : regMap)
+-        *samplers++ = it.first;
++      for(auto &it : samplerMap)
++        samplers[it.second.slot] = it.first;
+     }
+ 
+     void operator = (const SamplerSet& other) {
+       regMap.insert(other.regMap.begin(), other.regMap.end());
++      samplerMap.insert(other.samplerMap.begin(), other.samplerMap.end());
+     }
+ 
+-    SamplerSet(const SamplerSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
++    SamplerSet(const SamplerSet& other) : samplerMap(other.samplerMap.begin(), other.samplerMap.end()) { }
+     SamplerSet() {}
+   private:
+-    map<uint32_t, Register> regMap;
++    void appendReg(const Register reg, uint32_t key, Context *ctx);
++    map<uint32_t, SamplerRegSlot> samplerMap;
++    map<Register, SamplerRegSlot> regMap;
+     GBE_CLASS(SamplerSet);
+   };
+ } /* namespace ir */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:23.486021665 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:26.818021517 +0200
+@@ -1998,13 +1998,8 @@
+                GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
+                sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
+             } else {
+-              // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
+-              // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
+-              // work around.
+               sampler = this->getRegister(*AI);
+-              ir::FunctionArgument *arg =  ctx.getFunction().getArg(sampler);
+-              GBE_ASSERT(arg != NULL);
+-              arg->type = ir::FunctionArgument::SAMPLER;
++              ctx.getFunction().getSamplerSet()->append(sampler, &ctx);
+             }
+             ++AI;
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ocl_common_defines.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ocl_common_defines.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ocl_common_defines.h	2013-05-14 20:09:26.818021517 +0200
+@@ -111,9 +111,12 @@
+     __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+                                      __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+ 
+-    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
+-    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
+-                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
++    __CLK_SAMPLER_ARG_BASE         = __CLK_MIP_BASE + __CLK_SAMPLER_BITS,
++    __CLK_SAMPLER_ARG_BITS         = 8,
++    __CLK_SAMPLER_ARG_MASK         = ((1 << __CLK_SAMPLER_ARG_BITS) - 1) << __CLK_SAMPLER_ARG_BASE,
++    __CLK_SAMPLER_ARG_KEY_BIT      = (1 << (__CLK_SAMPLER_ARG_BASE + __CLK_SAMPLER_ARG_BITS)),
++    __CLK_SAMPLER_ARG_KEY_BITS     = 1,
++
+ } clk_sampler_type;
+ 
+ // Memory synchronization
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:26.818021517 +0200
+@@ -377,8 +377,6 @@
+   else
+     FATAL ("Unknown Gen Device");
+ 
+-  k->arg_sampler_sz = 0;
+-
+ #if USE_FULSIM
+   if (run_it != NULL && strcmp(run_it, "1") == 0) {
+     TRY (cl_fulsim_dump_all_surfaces, queue, k);
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:26.822021517 +0200
+@@ -225,7 +225,7 @@
+   /* Bind user buffers */
+   cl_command_queue_bind_surface(queue, ker);
+   /* Bind all samplers */
+-  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->arg_sampler_sz + ker->sampler_sz);
++  cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->sampler_sz);
+ 
+   /* Bind a stack if needed */
+   cl_bind_stack(gpgpu, ker);
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:26.822021517 +0200
+@@ -136,15 +136,11 @@
+     memcpy(&sampler, value, sz);
+     if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER))
+       return CL_INVALID_KERNEL_ARGS;
+-    uint32_t slot;
+     k->args[index].local_sz = 0;
+     k->args[index].is_set = 1;
+     k->args[index].mem = NULL;
+     k->args[index].sampler = sampler;
+-    slot = cl_arg_sampler_insert(k, sampler);
+-    offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+-    assert(offset + sz <= k->curbe_sz);
+-    memcpy(k->curbe + offset, &slot, sizeof(slot));
++    cl_set_sampler_arg_slot(k, index, sampler);
+     return CL_SUCCESS;
+   }
+ 
+@@ -209,9 +205,9 @@
+ 
+   /* Get sampler data & size */
+   k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque);
+-  k->arg_sampler_sz = 0;
+   assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
+-  gbe_kernel_get_sampler_data(k->opaque, k->samplers);
++  if (k->sampler_sz > 0)
++    gbe_kernel_get_sampler_data(k->opaque, k->samplers);
+ }
+ 
+ LOCAL cl_kernel
+@@ -231,8 +227,8 @@
+   to->arg_n = from->arg_n;
+   to->curbe_sz = from->curbe_sz;
+   to->sampler_sz = from->sampler_sz;
+-  to->arg_sampler_sz = from->arg_sampler_sz;
+-  memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
++  if (to->sampler_sz)
++    memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
+   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
+   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:26.822021517 +0200
+@@ -52,10 +52,9 @@
+   gbe_kernel opaque;          /* (Opaque) compiler structure for the OCL kernel */
+   char *curbe;                /* One curbe per kernel */
+   size_t curbe_sz;            /* Size of it */
+-  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel */
+-  size_t sampler_sz;          /* sampler size defined in kernel */
++  uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
++  size_t sampler_sz;          /* sampler size defined in kernel & kernel args. */
+   cl_argument *args;          /* To track argument setting */
+-  size_t arg_sampler_sz;      /* sampler size defined in kernel args */
+   uint32_t arg_n:31;          /* Number of arguments */
+   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
+ };
+Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.c	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.c	2013-05-14 20:09:26.822021517 +0200
+@@ -52,18 +52,22 @@
+          | (clk_filter << __CLK_FILTER_BASE);
+ }
+ 
+-int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler)
++#define IS_SAMPLER_ARG(v) (v & __CLK_SAMPLER_ARG_KEY_BIT)
++#define SAMPLER_ARG_ID(v) ((v & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE)
++int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler)
+ {
+-  int i, slot_id;
+-  for(i = 0; i < k->sampler_sz; i++)
++  int slot_id;
++  for(slot_id = 0; slot_id < k->sampler_sz; slot_id++)
+   {
+-    if (k->samplers[i] == sampler->clkSamplerValue)
+-      return i;
++    if (IS_SAMPLER_ARG(k->samplers[slot_id])) {
++     if (SAMPLER_ARG_ID(k->samplers[slot_id]) == index) {
++       k->samplers[slot_id] = (k->samplers[slot_id] & (~__CLK_SAMPLER_MASK))
++                              | sampler->clkSamplerValue;
++       return slot_id;
++     }
++    }
+   }
+-  slot_id = k->sampler_sz + k->arg_sampler_sz;
+-  k->samplers[slot_id] = sampler->clkSamplerValue;
+-  k->arg_sampler_sz++;
+-  return slot_id;
++  assert(0);
+ }
+ 
+ LOCAL cl_sampler
+Index: beignet-0.1+git20130514+19e9c58/src/cl_sampler.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_sampler.h	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_sampler.h	2013-05-14 20:09:26.822021517 +0200
+@@ -50,8 +50,8 @@
+ /* Add one more reference to this object */
+ extern void cl_sampler_add_ref(cl_sampler);
+ 
+-/* insert a new argument sampler */
+-int cl_arg_sampler_insert(cl_kernel k, cl_sampler sampler);
++/* set a sampler kernel argument */
++int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler);
+ 
+ #endif /* __CL_SAMPLER_H__ */
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-14 20:09:19.982021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:26.822021517 +0200
+@@ -734,7 +734,7 @@
+   int index;
+   assert(sampler_sz <= GEN_MAX_SAMPLERS);
+   for(index = 0; index < sampler_sz; index++)
+-    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index]);
++    intel_gpgpu_insert_sampler(gpgpu, index, samplers[index] & __CLK_SAMPLER_MASK);
+ }
+ 
+ static void
diff --git a/debian/patches/0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch b/debian/patches/0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
new file mode 100644
index 0000000..1378d51
--- /dev/null
+++ b/debian/patches/0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
@@ -0,0 +1,1027 @@
+From 4a767da3faa8ed91a3edb61a8c42a1c8c0e8b7b8 Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at linux.intel.com>
+Date: Mon, 13 May 2013 11:32:24 +0800
+Subject: [PATCH 15/15] GBE/Runtime: Optimize Sample/TypedWrite instruction.
+To: beignet at lists.freedesktop.org
+
+This commit does two major things as below:
+1. Allocate image surface at compile time, and add new gbe interfaces to let runtime know
+how many image surfaces we have, and the image allocation informations. Thus the runtime
+library know how to bind those image surfaces.
+
+2. As now for both image and sampler, at compile time, we know the eaxct binding table
+index. We no longer need to get those index from the input argument(curbe) and prepare
+the desc to the architecture register. We can use imm as the desc thus we can save
+4 out of 4 instructions for SampleInstruction and save 2 out of 12 instructions for
+the TypedWriteInstruction.
+
+This patch is also a major prepartion for the get_image_width/height/... functions.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/CMakeLists.txt                 |    2 +
+ backend/src/backend/gen_context.cpp        |   43 ++++++-----------
+ backend/src/backend/gen_encoder.cpp        |   66 ++++++++++++++++++++++----
+ backend/src/backend/gen_encoder.hpp        |   10 ++--
+ backend/src/backend/gen_insn_selection.cpp |   44 +++++++++++-------
+ backend/src/backend/program.cpp            |   33 ++++++++++++-
+ backend/src/backend/program.h              |   25 ++++++++++
+ backend/src/backend/program.hpp            |    9 ++++
+ backend/src/ir/function.cpp                |    1 +
+ backend/src/ir/function.hpp                |    4 ++
+ backend/src/ir/image.cpp                   |   69 ++++++++++++++++++++++++++++
+ backend/src/ir/image.hpp                   |   65 ++++++++++++++++++++++++++
+ backend/src/ir/instruction.hpp             |    7 +++
+ backend/src/ir/sampler.cpp                 |    3 +-
+ backend/src/llvm/llvm_gen_backend.cpp      |    1 +
+ src/cl_command_queue.c                     |   29 +++++++-----
+ src/cl_command_queue.h                     |    3 ++
+ src/cl_command_queue_gen7.c                |    2 +
+ src/cl_driver.h                            |   17 +++----
+ src/cl_kernel.c                            |   20 ++++++++
+ src/cl_kernel.h                            |    2 +
+ src/intel/intel_driver.c                   |    7 ++-
+ src/intel/intel_gpgpu.c                    |   27 ++---------
+ 23 files changed, 383 insertions(+), 106 deletions(-)
+ create mode 100644 backend/src/ir/image.cpp
+ create mode 100644 backend/src/ir/image.hpp
+
+Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:09:16.362021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:09:28.634021436 +0200
+@@ -63,6 +63,8 @@
+     ir/constant.hpp
+     ir/sampler.cpp
+     ir/sampler.hpp
++    ir/image.cpp
++    ir/image.hpp
+     ir/instruction.cpp
+     ir/instruction.hpp
+     ir/liveness.cpp
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_context.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_context.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_context.cpp	2013-05-14 20:09:28.634021436 +0200
+@@ -287,47 +287,36 @@
+   void GenContext::emitSampleInstruction(const SelectionInstruction &insn) {
+     const GenRegister dst = ra->genReg(insn.dst(0));
+     const GenRegister msgPayload = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_F);
+-    const GenRegister bti = ra->genReg(insn.src(4));
+-    const GenRegister sampler = ra->genReg(insn.src(5));
+-    const GenRegister ucoord = ra->genReg(insn.src(6));
+-    const GenRegister vcoord = ra->genReg(insn.src(7));
+-    const GenRegister wcoord = ra->genReg(insn.src(8));
+-    const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
+-    const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
++    const unsigned char bti = insn.extra.function;
++    const unsigned char sampler = insn.extra.elem;
++    const GenRegister ucoord = ra->genReg(insn.src(4));
++    const GenRegister vcoord = ra->genReg(insn.src(5));
++    const GenRegister wcoord = ra->genReg(insn.src(6));
+     uint32_t simdWidth = p->curr.execWidth;
+     p->push();
+     const uint32_t nr = msgPayload.nr;
+     // prepare mesg desc and move to a0.0.
+     // desc = bti | (sampler << 8) | (0 << 12) | (2 << 16) | (0 << 18) | (0 << 19) | (4 << 20) | (1 << 25) | (0 < 29) | (0 << 31)
+-    p->curr.execWidth = 1;
+-    p->MOV(a0_0, GenRegister::immud((GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE << 12) | (2 << 17)
+-                                    | ((4 * (simdWidth/8)) << 20)
+-                                    | ((2 * (simdWidth/8)) << 25)));
+-    p->SHL(temp, GenRegister::ud1grf(sampler.nr, sampler.subnr/sizeof(float)), GenRegister::immud(8));
+-    p->OR(a0_0, a0_0, temp);
+-    p->OR(a0_0, a0_0, GenRegister::ud1grf(bti.nr, bti.subnr/sizeof(float)));
+-    p->curr.execWidth = simdWidth;
+     /* Prepare message payload. */
+     p->MOV(GenRegister::f8grf(nr , 0), ucoord);
+     p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
+     if (insn.src(8).reg() != 0)
+       p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
+-    p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
++    p->SAMPLE(dst, msgPayload, false, bti, sampler, simdWidth, -1, 0);
+ 
+     p->pop();
+   }
+ 
+   void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) {
+     const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD);
+-    const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
+-    const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
+-    const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
+-    const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
+-    const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
+-    const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
+-    const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
+-    const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
+-    const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
++    const GenRegister ucoord = ra->genReg(insn.src(insn.extra.elem));
++    const GenRegister vcoord = ra->genReg(insn.src(1 + insn.extra.elem));
++    const GenRegister wcoord = ra->genReg(insn.src(2 + insn.extra.elem));
++    const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
++    const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
++    const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
++    const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
++    const unsigned char bti = insn.extra.function;
+ 
+     p->push();
+     uint32_t simdWidth = p->curr.execWidth;
+@@ -339,8 +328,6 @@
+ 
+     // prepare mesg desc and move to a0.0.
+     // desc = bti | (msg_type << 14) | (header_present << 19))
+-    p->MOV(a0_0, GenRegister::immud((GEN_TYPED_WRITE << 14) | (1 << 19) | (9 << 25)));
+-    p->OR(a0_0, a0_0, GenRegister::ud1grf(bti.nr, bti.subnr/sizeof(float)));
+     // prepare header, we need to enable all the 8 planes.
+     p->MOV(GenRegister::ud8grf(nr, 7), GenRegister::immud(0xff));
+     // Typed write only support SIMD8.
+@@ -368,7 +355,7 @@
+       QUARTER_MOV1(nr + 7, B);
+       QUARTER_MOV1(nr + 8, A);
+ #undef QUARTER_MOV
+-      p->TYPED_WRITE(header, a0_0);
++      p->TYPED_WRITE(header, true, bti);
+     }
+ 
+     p->pop();
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_encoder.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.cpp	2013-05-14 20:09:28.634021436 +0200
+@@ -166,6 +166,39 @@
+   }
+ #endif
+ 
++  static void setSamplerMessage(GenEncoder *p,
++                                GenInstruction *insn,
++                                unsigned char bti,
++                                unsigned char sampler,
++                                uint32_t msg_type,
++                                uint32_t response_length,
++                                uint32_t msg_length,
++                                bool header_present,
++                                uint32_t simd_mode,
++                                uint32_t return_format)
++  {
++     const GenMessageTarget sfid = GEN_SFID_SAMPLER;
++     setMessageDescriptor(p, insn, sfid, msg_length, response_length);
++     insn->bits3.sampler_gen7.bti = bti;
++     insn->bits3.sampler_gen7.sampler = sampler;
++     insn->bits3.sampler_gen7.msg_type = msg_type;
++     insn->bits3.sampler_gen7.simd_mode = simd_mode;
++  }
++
++
++  static void setTypedWriteMessage(GenEncoder *p,
++                                   GenInstruction *insn,
++                                   unsigned char bti,
++                                   unsigned char msg_type,
++                                   uint32_t msg_length,
++                                   bool header_present)
++  {
++     const GenMessageTarget sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
++     setMessageDescriptor(p, insn, sfid, msg_length, 0, header_present);
++     insn->bits3.gen7_typed_rw.bti = bti;
++     insn->bits3.gen7_typed_rw.msg_type = msg_type;
++  }
++
+   //////////////////////////////////////////////////////////////////////////
+   // Gen Emitter encoding class
+   //////////////////////////////////////////////////////////////////////////
+@@ -800,31 +833,44 @@
+   }
+ 
+   void GenEncoder::SAMPLE(GenRegister dest,
+-                          GenRegister src0,
+-                          GenRegister src1,
++                          GenRegister msg,
++                          bool header_present,
++                          unsigned char bti,
++                          unsigned char sampler,
++                          uint32_t simdWidth,
+                           uint32_t writemask,
+                           uint32_t return_format)
+   {
+      if (writemask == 0) return;
+-
++     uint32_t msg_type = (simdWidth == 16) ?
++                            GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE : GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE;
++     uint32_t response_length = (4 * (simdWidth / 8));
++     uint32_t msg_length = (2 * (simdWidth / 8));
++     if (header_present)
++       msg_length++;
++     uint32_t simd_mode = (simdWidth == 16) ?
++                            GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
+      GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+      insn->header.predicate_control = 0; /* XXX */
+      this->setHeader(insn);
+      this->setDst(insn, dest);
+-     this->setSrc0(insn, src0);
+-     this->setSrc1(insn, src1);
+-     insn->header.destreg_or_condmod = GEN_SFID_SAMPLER;
++     this->setSrc0(insn, msg);
++     setSamplerMessage(this, insn, bti, sampler, msg_type,
++                       response_length, msg_length,
++                       header_present,
++                       simd_mode, return_format);
+   }
+ 
+-  void GenEncoder::TYPED_WRITE(GenRegister header, GenRegister desc)
++  void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
+   {
+      GenInstruction *insn = this->next(GEN_OPCODE_SEND);
++     uint32_t msg_type = GEN_TYPED_WRITE;
++     uint32_t msg_length = header_present ? 9 : 8;
+      insn->header.predicate_control = 0; /* XXX */
+      this->setHeader(insn);
+      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+-     this->setSrc0(insn, header);
+-     this->setSrc1(insn, desc);
+-     insn->header.destreg_or_condmod = GEN6_SFID_DATAPORT_RENDER_CACHE;
++     this->setSrc0(insn, msg);
++     setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
+   }
+ 
+   void GenEncoder::EOT(uint32_t msg) {
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_encoder.hpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_encoder.hpp	2013-05-14 20:09:28.634021436 +0200
+@@ -140,14 +140,18 @@
+     void BYTE_SCATTER(GenRegister src, uint32_t bti, uint32_t elemSize);
+     /*! Send instruction for the sampler */
+     void SAMPLE(GenRegister dest,
+-                GenRegister src0,
+-                GenRegister src1,
++                GenRegister msg,
++                bool header_present,
++                unsigned char bti,
++                unsigned char sampler,
++                unsigned int simdWidth,
+                 uint32_t writemask,
+                 uint32_t return_format);
+ 
+     /*! TypedWrite instruction for texture */
+     void TYPED_WRITE(GenRegister header,
+-                     GenRegister desc);
++                     bool header_present,
++                     unsigned char bti);
+     /*! Extended math function (2 sources) */
+     void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1);
+     /*! Extended math function (1 source) */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:25.126021592 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/gen_insn_selection.cpp	2013-05-14 20:09:28.638021436 +0200
+@@ -466,9 +466,9 @@
+     /*! Encode ternary instructions */
+     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
+     /*! Encode sample instructions */
+-    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum);
++    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler);
+     /*! Encode typed write instructions */
+-    void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum);
++    void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti);
+     /*! Use custom allocators */
+     GBE_CLASS(Opaque);
+     friend class SelectionBlock;
+@@ -964,8 +964,11 @@
+       this->matchBasicBlock(insnNum);
+     });
+    }
+- /* XXX always 4 return values? */
+-  void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *src, uint32_t srcNum, GenRegister *msgPayloads, uint32_t msgNum) {
++
++  void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
++                                 GenRegister *src, uint32_t srcNum,
++                                 GenRegister *msgPayloads, uint32_t msgNum,
++                                 uint32_t bti, uint32_t sampler) {
+     SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum + srcNum);
+     SelectionVector *dstVector = this->appendVector();
+     SelectionVector *msgVector = this->appendVector();
+@@ -987,6 +990,9 @@
+     msgVector->regNum = msgNum;
+     msgVector->isSrc = 1;
+     msgVector->reg = &insn->src(0);
++
++    insn->extra.function = bti;
++    insn->extra.elem = sampler;
+   }
+ 
+   ///////////////////////////////////////////////////////////////////////////
+@@ -999,7 +1005,8 @@
+   }
+ 
+   void Selection::Opaque::TYPED_WRITE(GenRegister *src, uint32_t srcNum,
+-                              GenRegister *msgs, uint32_t msgNum) {
++                                      GenRegister *msgs, uint32_t msgNum,
++                                      uint32_t bti) {
+     uint32_t elemID = 0;
+     uint32_t i;
+     SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, msgNum + srcNum);
+@@ -1010,6 +1017,7 @@
+     for (i = 0; i < srcNum; ++i, ++elemID)
+       insn->src(elemID) = src[i];
+ 
++    insn->extra.function = bti;
+     insn->extra.elem = msgNum;
+     // Sends require contiguous allocation
+     msgVector->regNum = msgNum;
+@@ -1965,7 +1973,7 @@
+     {
+       using namespace ir;
+       GenRegister msgPayloads[4];
+-      GenRegister dst[insn.getDstNum()], src[insn.getSrcNum()];
++      GenRegister dst[insn.getDstNum()], src[insn.getSrcNum() - 2];
+ 
+       for( int i = 0; i < 4; ++i)
+         msgPayloads[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+@@ -1973,10 +1981,15 @@
+       for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID)
+         dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
+ 
+-      for (uint32_t valueID = 0; valueID < insn.getSrcNum(); ++valueID)
+-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
++      for (uint32_t valueID = 0; valueID < insn.getSrcNum() - 2; ++valueID)
++        src[valueID] = sel.selReg(insn.getSrc(valueID + 2), insn.getSrcType());
++
++      uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
++                       (insn.getSrc(SampleInstruction::SURFACE_BTI));
++      uint32_t sampler = sel.ctx.getFunction().getSamplerSet()->getIdx
++                           (insn.getSrc(SampleInstruction::SAMPLER_BTI));
+ 
+-      sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum(), msgPayloads, 4);
++      sel.SAMPLE(dst, insn.getDstNum(), src, insn.getSrcNum() - 2, msgPayloads, 4, bti, sampler);
+       return true;
+     }
+     DECL_CTOR(SampleInstruction, 1, 1);
+@@ -1998,17 +2011,16 @@
+       for(uint32_t i = 0; i < msgNum; i++)
+         msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ 
+-      // bti always uses TYPE_U32.
+-      src[valueID] = sel.selReg(insn.getSrc(valueID), TYPE_U32);
+-      valueID++;
+       // u, v, w coords should use coord type.
+       for (; valueID < 1 + coordNum; ++valueID)
+-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getCoordType());
++        src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getCoordType());
+ 
+-      for (; valueID < insn.getSrcNum(); ++valueID)
+-        src[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
++      for (; (valueID + 1) < insn.getSrcNum(); ++valueID)
++        src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getSrcType());
+ 
+-      sel.TYPED_WRITE(src, insn.getSrcNum(), msgs, msgNum);
++      uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
++                       (insn.getSrc(TypedWriteInstruction::SURFACE_BTI));
++      sel.TYPED_WRITE(src, insn.getSrcNum() - 1, msgs, msgNum, bti);
+       return true;
+     }
+     DECL_CTOR(TypedWriteInstruction, 1, 1);
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.cpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.cpp	2013-05-14 20:09:28.638021436 +0200
+@@ -49,11 +49,12 @@
+ namespace gbe {
+ 
+   Kernel::Kernel(const std::string &name) :
+-    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL)
++    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL), imageSet(NULL)
+   {}
+   Kernel::~Kernel(void) {
+     if(ctx) GBE_DELETE(ctx);
+     if(samplerSet) GBE_DELETE(samplerSet);
++    if(imageSet) GBE_DELETE(imageSet);
+     GBE_SAFE_DELETE_ARRAY(args);
+   }
+   int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
+@@ -92,6 +93,7 @@
+       const std::string &name = pair.first;
+       Kernel *kernel = this->compileKernel(unit, name);
+       kernel->setSamplerSet(pair.second->getSamplerSet());
++      kernel->setImageSet(pair.second->getImageSet());
+       kernels.insert(std::make_pair(name, kernel));
+     }
+     return true;
+@@ -264,6 +266,27 @@
+     kernel->getSamplerData(samplers);
+   }
+ 
++  static size_t kernelGetImageSize(gbe_kernel gbeKernel) {
++    if (gbeKernel == NULL) return 0;
++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
++    return kernel->getImageSize();
++  }
++
++  static void kernelGetImageData(gbe_kernel gbeKernel, ImageInfo *images) {
++    if (gbeKernel == NULL) return;
++    const gbe::Kernel *kernel = (const gbe::Kernel*) gbeKernel;
++    kernel->getImageData(images);
++  }
++
++  static uint32_t gbeImageBaseIndex = 0;
++  static void setImageBaseIndex(uint32_t baseIdx) {
++     gbeImageBaseIndex = baseIdx;
++  }
++
++  static uint32_t getImageBaseIndex() {
++    return gbeImageBaseIndex;
++  }
++
+   static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
+     return 0u;
+   }
+@@ -293,6 +316,10 @@
+ GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
+ GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL;
+ GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL;
++GBE_EXPORT_SYMBOL gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL;
++GBE_EXPORT_SYMBOL gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data = NULL;
++GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL;
++GBE_EXPORT_SYMBOL gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL;
+ 
+ namespace gbe
+ {
+@@ -322,6 +349,10 @@
+       gbe_kernel_use_slm = gbe::kernelUseSLM;
+       gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize;
+       gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
++      gbe_kernel_get_image_size = gbe::kernelGetImageSize;
++      gbe_kernel_get_image_data = gbe::kernelGetImageData;
++      gbe_get_image_base_index = gbe::getImageBaseIndex;
++      gbe_set_image_base_index = gbe::setImageBaseIndex;
+       genSetupCallBacks();
+     }
+   };
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.h	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.h	2013-05-14 20:09:28.638021436 +0200
+@@ -87,6 +87,31 @@
+   GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
+ };
+ 
++typedef struct ImageInfo {
++    int32_t arg_idx;
++    int32_t idx;
++    int32_t wSlot;
++    int32_t hSlot;
++    int32_t depthSlot;
++    int32_t dataTypeSlot;
++    int32_t channelOrderSlot;
++    int32_t dimOrderSlot;
++} ImageInfo;
++
++typedef void (gbe_set_image_base_index_cb)(uint32_t base_idx);
++extern gbe_set_image_base_index_cb *gbe_set_image_base_index;
++
++typedef uint32_t (gbe_get_image_base_index_cb)();
++extern gbe_get_image_base_index_cb *gbe_get_image_base_index;
++
++/*! Get the size of defined images */
++typedef size_t (gbe_kernel_get_image_size_cb)(gbe_kernel gbeKernel);
++extern gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size;
++
++/*! Get the content of defined images */
++typedef void (gbe_kernel_get_image_data_cb)(gbe_kernel gbeKernel, ImageInfo *images);
++extern gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data;
++
+ /*! Create a new program from the given source code (zero terminated string) */
+ typedef gbe_program (gbe_program_new_from_source_cb)(const char *source,
+                                                      size_t stringSize,
+Index: beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/backend/program.hpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/backend/program.hpp	2013-05-14 20:09:28.638021436 +0200
+@@ -118,6 +118,14 @@
+     size_t getSamplerSize(void) const { return samplerSet->getDataSize(); }
+     /*! Get defined sampler value array */
+     void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); }
++    /*! Set image set. */
++    void setImageSet(ir::ImageSet * from) {
++      imageSet = from;
++    }
++    /*! Get defined image size */
++    size_t getImageSize(void) const { return imageSet->getDataSize(); }
++    /*! Get defined image value array */
++    void getImageData(ImageInfo *images) const { imageSet->getData(images); }
+   protected:
+     friend class Context;      //!< Owns the kernels
+     const std::string name;    //!< Kernel name
+@@ -130,6 +138,7 @@
+     bool useSLM;               //!< SLM requires a special HW config
+     Context *ctx;              //!< Save context after compiler to alloc constant buffer curbe
+     ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
++    ir::ImageSet *imageSet;    //!< Copy from the corresponding function.
+     GBE_CLASS(Kernel);         //!< Use custom allocators
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.cpp	2013-05-14 20:09:16.366021983 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.cpp	2013-05-14 20:09:28.638021436 +0200
+@@ -47,6 +47,7 @@
+   {
+     initProfile(*this);
+     samplerSet = GBE_NEW(SamplerSet);
++    imageSet = GBE_NEW(ImageSet);
+   }
+ 
+   Function::~Function(void) {
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/function.hpp	2013-05-14 20:09:26.818021517 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/function.hpp	2013-05-14 20:09:28.638021436 +0200
+@@ -29,6 +29,7 @@
+ #include "ir/instruction.hpp"
+ #include "ir/profile.hpp"
+ #include "ir/sampler.hpp"
++#include "ir/image.hpp"
+ #include "sys/vector.hpp"
+ #include "sys/set.hpp"
+ #include "sys/map.hpp"
+@@ -302,6 +303,8 @@
+     INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
+     /*! Get sampler set in this function */
+     SamplerSet* getSamplerSet(void) const {return samplerSet; }
++    /*! Get image set in this function */
++    ImageSet* getImageSet(void) const {return imageSet; }
+   private:
+     friend class Context;           //!< Can freely modify a function
+     std::string name;               //!< Function name
+@@ -318,6 +321,7 @@
+     uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
+     bool useSLM;                    //!< Is SLM required?
+     SamplerSet *samplerSet;          //!< samplers used in this function.
++    ImageSet* imageSet;              //!< Image set in this function's arguments..
+     GBE_CLASS(Function);            //!< Use custom allocator
+   };
+ 
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/image.cpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/image.cpp	2013-05-14 20:09:28.638021436 +0200
+@@ -0,0 +1,69 @@
++/*
++ * Copyright © 2012 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++
++/**
++ * \file image.cpp
++ *
++ */
++#include "image.hpp"
++#include "context.hpp"
++#include "ocl_common_defines.h"
++#include "backend/program.h"
++
++namespace gbe {
++namespace ir {
++
++  void ImageSet::append(Register imageReg, Context *ctx)
++  {
++    ir::FunctionArgument *arg =  ctx->getFunction().getArg(imageReg);
++    GBE_ASSERTM(arg && arg->type == ir::FunctionArgument::IMAGE, "Append an invalid reg to image set.");
++    GBE_ASSERTM(regMap.find(imageReg) == regMap.end(), "Append the same image reg twice.");
++
++    int32_t id = ctx->getFunction().getArgID(arg);
++    struct ImageInfo *imageInfo = GBE_NEW(struct ImageInfo);
++    imageInfo->arg_idx = id;
++    imageInfo->idx = regMap.size() + gbe_get_image_base_index();
++    imageInfo->wSlot = -1;
++    imageInfo->hSlot = -1;
++    imageInfo->depthSlot = -1;
++    imageInfo->dataTypeSlot = -1;
++    imageInfo->channelOrderSlot = -1;
++    imageInfo->dimOrderSlot = -1;
++
++    regMap.insert(std::make_pair(imageReg, imageInfo));
++  }
++
++  const uint32_t ImageSet::getIdx(const Register imageReg) const
++  {
++    auto it = regMap.find(imageReg);
++    GBE_ASSERT(it != regMap.end());
++    return it->second->idx;
++  }
++
++  void ImageSet::getData(struct ImageInfo *imageInfos) const {
++      for(auto &it : regMap)
++        imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second;
++  }
++
++  ImageSet::~ImageSet() {
++    for(auto &it : regMap)
++      GBE_DELETE(it.second);
++  }
++
++} /* namespace ir */
++} /* namespace gbe */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/image.hpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/image.hpp	2013-05-14 20:09:28.638021436 +0200
+@@ -0,0 +1,65 @@
++/*
++ * Copyright © 2012 Intel Corporation
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
++ *
++ */
++
++/**
++ * \file image.hpp
++ *
++ */
++#ifndef __GBE_IR_IMAGE_HPP__
++#define __GBE_IR_IMAGE_HPP__
++
++#include "ir/register.hpp"
++#include "sys/map.hpp"
++
++extern "C" {
++  struct ImageInfo;
++}
++
++namespace gbe {
++namespace ir {
++
++  class Context;
++  /*! An image set is a set of images which are defined in kernel args.
++   *  We use this set to gather the images here and allocate a unique index
++   *  for each individual image. And that individual image could be used
++   *  at backend to identify this image's location.
++   */
++  class ImageSet
++  {
++  public:
++    /*! Append an image argument. */
++    void append(Register imageReg, Context *ctx);
++    /*! Get the image's index(actual location). */
++    const uint32_t getIdx(const Register imageReg) const;
++    size_t getDataSize(void) { return regMap.size(); }
++    size_t getDataSize(void) const { return regMap.size(); }
++    void getData(struct ImageInfo *imageInfos) const;
++    void operator = (const ImageSet& other) {
++      regMap.insert(other.regMap.begin(), other.regMap.end());
++    }
++    ImageSet(const ImageSet& other) : regMap(other.regMap.begin(), other.regMap.end()) { }
++    ImageSet() {}
++    ~ImageSet();
++  private:
++    map<Register, struct ImageInfo *> regMap;
++    GBE_CLASS(ImageSet);
++  };
++} /* namespace ir */
++} /* namespace gbe */
++
++#endif /* __GBE_IR_IMAGE_HPP__ */
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.hpp	2013-05-14 20:09:23.482021666 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.hpp	2013-05-14 20:09:28.642021436 +0200
+@@ -297,6 +297,9 @@
+   /*! Store data in an texture */
+   class TypedWriteInstruction : public Instruction {
+   public:
++    enum {
++     SURFACE_BTI = 0
++    };
+     /*! Return true if the given instruction is an instance of this class */
+     static bool isClassOf(const Instruction &insn);
+     Type getSrcType(void) const;
+@@ -306,6 +309,10 @@
+   /*! Load texels from a texture */
+   class SampleInstruction : public Instruction {
+   public:
++    enum {
++     SURFACE_BTI = 0,
++     SAMPLER_BTI = 1
++    };
+     /*! Return true if the given instruction is an instance of this class */
+     static bool isClassOf(const Instruction &insn);
+     Type getSrcType(void) const;
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/sampler.cpp	2013-05-14 20:09:26.818021517 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/sampler.cpp	2013-05-14 20:09:28.642021436 +0200
+@@ -36,12 +36,10 @@
+ 
+   void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) {
+     struct SamplerRegSlot samplerSlot;
+-    // This register is just used as a key.
+     samplerSlot.reg = reg;
+     samplerSlot.slot = samplerMap.size();
+     samplerMap.insert(std::make_pair(key, samplerSlot));
+     regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot));
+-    ctx->LOADI(ir::TYPE_S32, samplerSlot.reg, ctx->newIntegerImmediate(samplerSlot.slot, ir::TYPE_S32));
+   }
+ 
+   Register SamplerSet::append(uint32_t samplerValue, Context *ctx)
+@@ -49,6 +47,7 @@
+     auto it = samplerMap.find(samplerValue);
+     if (it != samplerMap.end())
+         return it->second.reg;
++    // This register is just used as a key.
+     Register reg = ctx->reg(FAMILY_DWORD);
+     appendReg(reg, samplerValue, ctx);
+     return reg;
+Index: beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:26.818021517 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/llvm/llvm_gen_backend.cpp	2013-05-14 20:09:28.642021436 +0200
+@@ -914,6 +914,7 @@
+               break;
+               case ir::IMAGE:
+                 ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
++                ctx.getFunction().getImageSet()->append(reg, &ctx);
+               break;
+               break;
+               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.c	2013-05-14 20:09:26.818021517 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.c	2013-05-14 20:09:28.642021436 +0200
+@@ -99,6 +99,21 @@
+ }
+ 
+ LOCAL cl_int
++cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
++{
++  uint32_t i;
++  for (i = 0; i < k->image_sz; i++) {
++    int id = k->images[i].arg_idx;
++    assert(gbe_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE);
++    cl_gpgpu_bind_image(queue->gpgpu, k->images[i].idx, k->args[id].mem->bo,
++                        k->args[id].mem->intel_fmt, k->args[id].mem->type,
++                        k->args[id].mem->w, k->args[id].mem->h,
++                        k->args[id].mem->pitch, k->args[id].mem->tiling);
++  }
++  return CL_SUCCESS;
++}
++
++LOCAL cl_int
+ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
+ {
+   /* Bind all user buffers (given by clSetKernelArg) */
+@@ -107,20 +122,10 @@
+   for (i = 0; i < k->arg_n; ++i) {
+     uint32_t offset; // location of the address in the curbe
+     arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+-    if (arg_type != GBE_ARG_GLOBAL_PTR &&
+-        arg_type != GBE_ARG_IMAGE &&
+-        arg_type != GBE_ARG_SAMPLER)
++    if (arg_type != GBE_ARG_GLOBAL_PTR)
+       continue;
+     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
+-    if (arg_type == GBE_ARG_IMAGE) {
+-      uint32_t *curbe_index = (uint32_t*)(k->curbe + offset);
+-      cl_gpgpu_bind_image(queue->gpgpu, curbe_index, k->args[i].mem->bo,
+-                          k->args[i].mem->intel_fmt, k->args[i].mem->type,
+-                          k->args[i].mem->w, k->args[i].mem->h,
+-                          k->args[i].mem->pitch, k->args[i].mem->tiling);
+-    } else if (arg_type == GBE_ARG_SAMPLER) {
+-    } else
+-      cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
++    cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+   }
+ 
+   return CL_SUCCESS;
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue.h	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue.h	2013-05-14 20:09:28.642021436 +0200
+@@ -70,6 +70,9 @@
+ /* Bind all the surfaces in the GPGPU state */
+ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+ 
++/* Bind all the image surfaces in the GPGPU state */
++extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel);
++
+ /*update constant buffer to final curbe */
+ extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
+ #endif /* __CL_COMMAND_QUEUE_H__ */
+Index: beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_command_queue_gen7.c	2013-05-14 20:09:26.822021517 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_command_queue_gen7.c	2013-05-14 20:09:28.642021436 +0200
+@@ -224,6 +224,8 @@
+ 
+   /* Bind user buffers */
+   cl_command_queue_bind_surface(queue, ker);
++  /* Bind user images */
++  cl_command_queue_bind_image(queue, ker);
+   /* Bind all samplers */
+   cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, ker->sampler_sz);
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/cl_driver.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_driver.h	2013-05-14 20:09:19.978021822 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_driver.h	2013-05-14 20:09:28.646021435 +0200
+@@ -116,14 +116,15 @@
+ 
+ /* Set a 2d texture */
+ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
+-                                        uint32_t *curbe_index,
+-                                        cl_buffer obj_bo,
+-                                        uint32_t format,
+-                                        uint32_t type,
+-                                        int32_t w,
+-                                        int32_t h,
+-                                        int pitch,
+-                                        cl_gpgpu_tiling tiling);
++                                      uint32_t id,
++                                      cl_buffer obj_bo,
++                                      uint32_t format,
++                                      uint32_t type,
++                                      int32_t w,
++                                      int32_t h,
++                                      int pitch,
++                                      cl_gpgpu_tiling tiling);
++
+ extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image;
+ 
+ /* Setup a stack */
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.c	2013-05-14 20:09:26.822021517 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.c	2013-05-14 20:09:28.646021435 +0200
+@@ -56,6 +56,8 @@
+         cl_mem_delete(k->args[i].mem);
+     cl_free(k->args);
+   }
++  if (k->image_sz)
++    cl_free(k->images);
+   k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
+   cl_free(k);
+ }
+@@ -208,6 +210,18 @@
+   assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
+   if (k->sampler_sz > 0)
+     gbe_kernel_get_sampler_data(k->opaque, k->samplers);
++  /* Get image data & size */
++  k->image_sz = gbe_kernel_get_image_size(k->opaque);
++  assert(k->sampler_sz <= GEN_MAX_SURFACES);
++  if (k->image_sz > 0) {
++    TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
++    gbe_kernel_get_image_data(k->opaque, k->images);
++  } else
++    k->images = NULL;
++  return;
++error:
++  cl_buffer_unreference(k->bo);
++  k->bo = NULL;
+ }
+ 
+ LOCAL cl_kernel
+@@ -227,8 +241,14 @@
+   to->arg_n = from->arg_n;
+   to->curbe_sz = from->curbe_sz;
+   to->sampler_sz = from->sampler_sz;
++  to->image_sz = from->image_sz;
+   if (to->sampler_sz)
+     memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
++  if (to->image_sz) {
++    TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0])));
++    memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0]));
++  } else
++    to->images = NULL;
+   TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
+   if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
+ 
+Index: beignet-0.1+git20130514+19e9c58/src/cl_kernel.h
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_kernel.h	2013-05-14 20:09:26.822021517 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_kernel.h	2013-05-14 20:09:28.646021435 +0200
+@@ -54,6 +54,8 @@
+   size_t curbe_sz;            /* Size of it */
+   uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
+   size_t sampler_sz;          /* sampler size defined in kernel & kernel args. */
++  struct ImageInfo *images;   /* images defined in kernel args */
++  size_t image_sz;            /* image count in kernel args */
+   cl_argument *args;          /* To track argument setting */
+   uint32_t arg_n:31;          /* Number of arguments */
+   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
+Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_driver.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_driver.c	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_driver.c	2013-05-14 20:09:28.646021435 +0200
+@@ -369,14 +369,17 @@
+   intel_driver_terminate(driver);
+   intel_driver_delete(driver);
+ }
+-
++#include "program.h"
+ static intel_driver_t*
+ cl_intel_driver_new(cl_context_prop props)
+ {
+   intel_driver_t *driver = NULL;
+   TRY_ALLOC_NO_ERR (driver, intel_driver_new());
+   intel_driver_open(driver, props);
+-
++  /* We use the first 2 slots(0,1) for all the bufs.
++   * Notify the gbe this base index, thus gbe can avoid conflicts
++   * when it allocates slots for images*/
++  gbe_set_image_base_index(2);
+ exit:
+   return driver;
+ error:
+Index: beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c
+===================================================================
+--- beignet-0.1+git20130514+19e9c58.orig/src/intel/intel_gpgpu.c	2013-05-14 20:09:26.822021517 +0200
++++ beignet-0.1+git20130514+19e9c58/src/intel/intel_gpgpu.c	2013-05-14 20:09:28.646021435 +0200
+@@ -451,25 +451,6 @@
+   heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface);
+ }
+ 
+-static inline unsigned long
+-__fls(unsigned long x)
+-{
+-        asm("bsf %1,%0"
+-            : "=r" (x)
+-            : "rm" (x));
+-        return x;
+-}
+-
+-static int
+-intel_gpgpu_get_free_img_index(intel_gpgpu_t *gpgpu)
+-{
+-  int slot;
+-  assert(~gpgpu->img_bitmap != 0);
+-  slot = __fls(~gpgpu->img_bitmap);
+-  gpgpu->img_bitmap |= (1 << slot);
+-  return slot + gpgpu->img_index_base;
+-}
+-
+ static int
+ intel_get_surface_type(cl_mem_object_type type)
+ {
+@@ -490,7 +471,7 @@
+ 
+ static void
+ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
+-                              uint32_t *curbe_index,
++                              uint32_t index,
+                               dri_bo* obj_bo,
+                               uint32_t format,
+                               cl_mem_object_type type,
+@@ -499,7 +480,6 @@
+                               int32_t pitch,
+                               int32_t tiling)
+ {
+-  int32_t index = intel_gpgpu_get_free_img_index(gpgpu);
+   surface_heap_t *heap = gpgpu->surface_heap_b.bo->virtual;
+   gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
+ 
+@@ -521,7 +501,6 @@
+   }
+   ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+   intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo);
+-  *curbe_index = index;
+   gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
+ }
+ 
+@@ -544,7 +523,7 @@
+ 
+ static void
+ intel_gpgpu_bind_image(intel_gpgpu_t *gpgpu,
+-                       uint32_t *index,
++                       uint32_t index,
+                        cl_buffer *obj_bo,
+                        uint32_t format,
+                        cl_mem_object_type type,
+@@ -554,7 +533,7 @@
+                        cl_gpgpu_tiling tiling)
+ {
+   intel_gpgpu_bind_image_gen7(gpgpu, index, (drm_intel_bo*) obj_bo, format, type, w, h, pitch, tiling);
+-  assert(*index < GEN_MAX_SURFACES);
++  assert(index < GEN_MAX_SURFACES);
+ }
+ 
+ static void
diff --git a/debian/patches/clang-3.0 b/debian/patches/clang-3.0
deleted file mode 100644
index 393b379..0000000
--- a/debian/patches/clang-3.0
+++ /dev/null
@@ -1,20 +0,0 @@
-Description: Use clang 3.0 command line
-Author: Simon Richter <sjr at debian.org>
-Last-Update: 2013-04-19
-
-Index: beignet-0.1+git20130418+0546d2e/backend/src/backend/program.cpp
-===================================================================
---- beignet-0.1+git20130418+0546d2e.orig/backend/src/backend/program.cpp	2013-04-19 10:03:28.000000000 +0200
-+++ beignet-0.1+git20130418+0546d2e/backend/src/backend/program.cpp	2013-04-19 10:04:09.867084078 +0200
-@@ -118,11 +118,7 @@
-     fclose(clFile);
- 
-     // Now compile the code to llvm using clang
--#if LLVM_VERSION_MINOR <= 1
-     std::string compileCmd = "clang -x cl -fno-color-diagnostics -emit-llvm -O3 -ccc-host-triple ptx32 -c ";
--#else
--    std::string compileCmd = "clang -ffp-contract=off -emit-llvm -O3 -target nvptx -x cl -c ";
--#endif /* LLVM_VERSION_MINOR <= 1 */
-     compileCmd += clName;
-     compileCmd += " ";
-     if(options)
diff --git a/debian/patches/const64 b/debian/patches/const64
index 5ce8d29..79d9524 100644
--- a/debian/patches/const64
+++ b/debian/patches/const64
@@ -1,8 +1,8 @@
-Index: beignet-0.1+git20130419+9c11c18/backend/src/ir/instruction.cpp
+Index: beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp
 ===================================================================
---- beignet-0.1+git20130419+9c11c18.orig/backend/src/ir/instruction.cpp	2013-04-19 10:28:49.000000000 +0200
-+++ beignet-0.1+git20130419+9c11c18/backend/src/ir/instruction.cpp	2013-04-19 19:17:43.329603279 +0200
-@@ -591,17 +591,17 @@
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/ir/instruction.cpp	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/ir/instruction.cpp	2013-05-14 20:07:24.282026980 +0200
+@@ -593,17 +593,17 @@
      static const Type madType[] = {TYPE_FLOAT};
      static const uint32_t madTypeNum = ARRAY_ELEM_NUM(madType);
  
diff --git a/debian/patches/implement-gefa b/debian/patches/implement-gefa
deleted file mode 100644
index 09c6362..0000000
--- a/debian/patches/implement-gefa
+++ /dev/null
@@ -1,21 +0,0 @@
-commit 08dadaf3f345f3feceefaacebf5045c72821b449
-Author: Simon Richter <Simon.Richter at hogyros.de>
-Date:   Tue Apr 2 15:11:01 2013 +0200
-
-    "Implement" clGetExtensionFunctionAddress()
-    
-    This function can legally return NULL in the current implementation.
-
-Index: beignet-0.1+git20130418+0546d2e/src/cl_api.c
-===================================================================
---- beignet-0.1+git20130418+0546d2e.orig/src/cl_api.c	2013-04-18 05:21:35.000000000 +0200
-+++ beignet-0.1+git20130418+0546d2e/src/cl_api.c	2013-04-18 11:52:29.225428977 +0200
-@@ -1006,7 +1006,7 @@
-                   cl_event *         event,
-                   cl_int *           errcode_ret)
- {
--  NOT_IMPLEMENTED;
-+  /* No extensions supported at present */
-   return NULL;
- }
- 
diff --git a/debian/patches/khronos b/debian/patches/khronos
index a191933..37ae544 100644
--- a/debian/patches/khronos
+++ b/debian/patches/khronos
@@ -2,10 +2,10 @@ Description: Use Khronos Group headers
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-04-01
 
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_ext.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_ext.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_ext.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_ext.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_ext.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_ext.h	2013-05-14 20:04:53.670033695 +0200
 @@ -1,251 +1 @@
 -/*******************************************************************************
 - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
@@ -259,10 +259,10 @@ Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_ext.h
 -
 -#endif /* __CL_EXT_H */
 +#include_next <CL/cl_ext.h>
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/opencl.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/opencl.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/opencl.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/opencl.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/opencl.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/opencl.h	2013-05-14 20:04:53.674033695 +0200
 @@ -1,54 +1 @@
 -/*******************************************************************************
 - * Copyright (c) 2008-2012 The Khronos Group Inc.
@@ -319,10 +319,10 @@ Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/opencl.h
 -#endif  /* __OPENCL_H   */
 -
 +#include_next <CL/opencl.h>
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_d3d10.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_d3d10.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_d3d10.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_d3d10.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_d3d10.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_d3d10.h	2013-05-14 20:04:53.674033695 +0200
 @@ -1,126 +1 @@
 -/**********************************************************************************
 - * Copyright (c) 2008-2012 The Khronos Group Inc.
@@ -451,10 +451,10 @@ Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_d3d10.h
 -#endif  // __OPENCL_CL_D3D10_H
 -
 +#include_next <CL/cl_d3d10.h>
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/cl.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/cl.h	2013-05-14 20:04:53.674033695 +0200
 @@ -1,1214 +1 @@
 -/*******************************************************************************
 - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
@@ -1671,10 +1671,10 @@ Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl.h
 -#endif  /* __OPENCL_CL_H */
 -
 +#include_next <CL/cl.h>
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_platform.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_platform.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_platform.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_platform.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_platform.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_platform.h	2013-05-14 20:04:53.678033695 +0200
 @@ -1,1254 +1 @@
 -/**********************************************************************************
 - * Copyright (c) 2008-2012 The Khronos Group Inc.
@@ -2931,10 +2931,10 @@ Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_platform.h
 -
 -#endif  /* __CL_PLATFORM_H  */
 +#include_next <CL/cl_platform.h>
-Index: beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_gl.h
+Index: beignet-0.1+git20130514+19e9c58/include/CL/cl_gl.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/include/CL/cl_gl.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/include/CL/cl_gl.h	2013-04-15 18:25:01.036323041 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/include/CL/cl_gl.h	2013-05-14 20:04:50.338033844 +0200
++++ beignet-0.1+git20130514+19e9c58/include/CL/cl_gl.h	2013-05-14 20:04:53.678033695 +0200
 @@ -1,161 +1 @@
 -/**********************************************************************************
 - * Copyright (c) 2008 - 2012 The Khronos Group Inc.
diff --git a/debian/patches/missing-header b/debian/patches/missing-header
index dab28b7..09276d5 100644
--- a/debian/patches/missing-header
+++ b/debian/patches/missing-header
@@ -2,10 +2,10 @@ Description: Fix missing include
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-04-03
 
-Index: beignet-0.1+git20130418+0546d2e/backend/src/sys/alloc.hpp
+Index: beignet-0.1+git20130514+19e9c58/backend/src/sys/alloc.hpp
 ===================================================================
---- beignet-0.1+git20130418+0546d2e.orig/backend/src/sys/alloc.hpp	2013-04-18 05:21:35.000000000 +0200
-+++ beignet-0.1+git20130418+0546d2e/backend/src/sys/alloc.hpp	2013-04-18 11:53:00.793430809 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/sys/alloc.hpp	2013-05-14 20:04:48.810033912 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/sys/alloc.hpp	2013-05-14 20:05:43.974031452 +0200
 @@ -27,6 +27,7 @@
  #include "sys/platform.hpp"
  #include "sys/assert.hpp"
diff --git a/debian/patches/respect-flags b/debian/patches/respect-flags
index c7c84ff..1b17e6c 100644
--- a/debian/patches/respect-flags
+++ b/debian/patches/respect-flags
@@ -2,10 +2,10 @@ Description: Respect CFLAGS/CXXFLAGS from Debian
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-04-16
 
-Index: beignet-0.1+git20130419+9c11c18/CMakeLists.txt
+Index: beignet-0.1+git20130514+19e9c58/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130419+9c11c18.orig/CMakeLists.txt	2013-04-19 10:28:49.000000000 +0200
-+++ beignet-0.1+git20130419+9c11c18/CMakeLists.txt	2013-04-19 19:05:35.000000000 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/CMakeLists.txt	2013-05-14 20:06:02.014030648 +0200
 @@ -18,7 +18,6 @@
  
  INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
@@ -14,21 +14,10 @@ Index: beignet-0.1+git20130419+9c11c18/CMakeLists.txt
  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMake/")
  SET(EMULATE_IVB false CACHE BOOL "To emulate IVB")
  SET(EMULATE_SNB false CACHE BOOL "To emulate SNB")
-@@ -55,8 +54,8 @@
-   ADD_DEFINITIONS(-DUSE_FULSIM=0)
- ENDIF (USE_FULSIM)
- 
--SET(CMAKE_CXX_FLAGS "-Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
--SET(CMAKE_C_FLAGS "-Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
-+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-invalid-offsetof -mfpmath=sse -fno-rtti -Wcast-align -std=c++0x -msse2 -msse3 -mssse3 -msse4.1 ")
-+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -mfpmath=sse -msse2 -Wcast-align -msse2 -msse3 -mssse3 -msse4.1")
- 
- # Front end stuff we need
- #INCLUDE(CMake/FindLLVM.cmake)
-Index: beignet-0.1+git20130419+9c11c18/backend/CMakeLists.txt
+Index: beignet-0.1+git20130514+19e9c58/backend/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130419+9c11c18.orig/backend/CMakeLists.txt	2013-04-19 10:28:49.000000000 +0200
-+++ beignet-0.1+git20130419+9c11c18/backend/CMakeLists.txt	2013-04-19 19:09:27.509625385 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/backend/CMakeLists.txt	2013-05-08 11:55:52.000000000 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/CMakeLists.txt	2013-05-14 20:06:02.014030648 +0200
 @@ -45,39 +45,39 @@
  if (COMPILER STREQUAL "GCC")
    set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -funroll-loops -Wstrict-aliasing=2 -fstrict-aliasing -msse2 -msse3 -mssse3 -msse4.1 -fPIC -Wall")
diff --git a/debian/patches/series b/debian/patches/series
index d1dc3a1..2d64b61 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,8 +1,21 @@
 khronos
 verbose
-implement-gefa
 missing-header
 soname
 respect-flags
-clang-3.0
 const64
+0001-Generate-all-supported-as_-functions.patch
+0002-Define-all-convert_-functions.patch
+0003-Add-long-and-ulong-types-to-conversions.patch
+0004-Make-libgbm-optional-without-EGL-support.patch
+0005-Define-clamp-value-lower-upper.patch
+0006-Add-clGetDeviceInfo-.-CL_BUILT_IN_KERNELS.patch
+0007-Correct-type-of-device-properties.patch
+0008-Update-gitignore-files.patch
+0009-GBE-refine-the-sampler-implementation-to-comply-with.patch
+0010-CL-Support-kernel-side-defined-samplers.patch
+0011-utests-Add-one-test-cases-for-sampler-support.patch
+0012-GBE-remove-sampler-address-space.patch
+0013-GBE-add-scalar-register-support-in-loadImmInstructio.patch
+0014-GBE-concentrate-all-samplers-allocation-at-compile-t.patch
+0015-GBE-Runtime-Optimize-Sample-TypedWrite-instruction.patch
diff --git a/debian/patches/soname b/debian/patches/soname
index 32597fa..2cca305 100644
--- a/debian/patches/soname
+++ b/debian/patches/soname
@@ -2,10 +2,10 @@ Description: Use proper SONAME
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-04-15
 
-Index: beignet-0.1+git20130418+0546d2e/backend/src/CMakeLists.txt
+Index: beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130418+0546d2e.orig/backend/src/CMakeLists.txt	2013-04-18 05:21:35.000000000 +0200
-+++ beignet-0.1+git20130418+0546d2e/backend/src/CMakeLists.txt	2013-04-18 11:53:59.985434244 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/backend/src/CMakeLists.txt	2013-05-14 20:04:47.974033949 +0200
++++ beignet-0.1+git20130514+19e9c58/backend/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
 @@ -114,7 +114,10 @@
                        ${LLVM_MODULE_LIBS}
                        ${CMAKE_THREAD_LIBS_INIT}
@@ -18,10 +18,10 @@ Index: beignet-0.1+git20130418+0546d2e/backend/src/CMakeLists.txt
  install (TARGETS gbe LIBRARY DESTINATION lib)
  install (FILES backend/program.h DESTINATION include/gen)
  
-Index: beignet-0.1+git20130418+0546d2e/src/CMakeLists.txt
+Index: beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt
 ===================================================================
---- beignet-0.1+git20130418+0546d2e.orig/src/CMakeLists.txt	2013-04-18 05:21:35.000000000 +0200
-+++ beignet-0.1+git20130418+0546d2e/src/CMakeLists.txt	2013-04-18 11:53:59.985434244 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/src/CMakeLists.txt	2013-05-14 20:04:47.974033949 +0200
++++ beignet-0.1+git20130514+19e9c58/src/CMakeLists.txt	2013-05-14 20:05:52.618031067 +0200
 @@ -58,4 +58,8 @@
                        ${OPENGL_LIBRARIES}
                        ${OPTIONAL_EGL_LIBRARY}
diff --git a/debian/patches/verbose b/debian/patches/verbose
index 033a2be..ce1a8fc 100644
--- a/debian/patches/verbose
+++ b/debian/patches/verbose
@@ -2,10 +2,10 @@ Description: More verbose errors
 Author: Simon Richter <sjr at debian.org>
 Last-Update: 2013-04-01
 
-Index: beignet-0.0.0+git2013.04.11+e6b503e/src/cl_utils.h
+Index: beignet-0.1+git20130514+19e9c58/src/cl_utils.h
 ===================================================================
---- beignet-0.0.0+git2013.04.11+e6b503e.orig/src/cl_utils.h	2013-04-12 08:13:48.000000000 +0200
-+++ beignet-0.0.0+git2013.04.11+e6b503e/src/cl_utils.h	2013-04-15 18:28:17.640334450 +0200
+--- beignet-0.1+git20130514+19e9c58.orig/src/cl_utils.h	2013-05-14 20:04:49.846033866 +0200
++++ beignet-0.1+git20130514+19e9c58/src/cl_utils.h	2013-05-14 20:05:00.822033376 +0200
 @@ -80,6 +80,7 @@
  
  #define FATAL(...)                                          \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git