[Pkg-opencl-devel] [beignet] 31/66: Imported Upstream version 0.1+git20130619+42967d2

Andreas Beckmann anbe at moszumanska.debian.org
Fri Oct 31 07:27:05 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository beignet.

commit b7629fb360f6c094b50c3d519cce32d6a834f85b
Author: Simon Richter <sjr at debian.org>
Date:   Wed Jun 19 20:48:03 2013 +0200

    Imported Upstream version 0.1+git20130619+42967d2
---
 backend/src/backend/gen_context.cpp                |    13 +-
 backend/src/backend/gen_context.hpp                |     1 +
 backend/src/backend/gen_defs.hpp                   |    14 +
 backend/src/backend/gen_encoder.cpp                |     9 +
 backend/src/backend/gen_encoder.hpp                |     2 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |     1 +
 backend/src/backend/gen_insn_scheduling.cpp        |    20 +-
 backend/src/backend/gen_insn_selection.cpp         |    19 +-
 backend/src/backend/gen_insn_selection.hxx         |     1 +
 backend/src/ocl_stdlib.h                           |    29 +-
 include/CL/cl.h                                    |   364 +-
 include/CL/cl.hpp                                  | 10360 ++-----------------
 include/CL/cl_d3d10.h                              |     4 +-
 include/CL/cl_d3d11.h                              |   126 -
 include/CL/cl_d3d9.h                               |    98 -
 include/CL/cl_dx9_media_sharing.h                  |   127 -
 include/CL/cl_ext.h                                |    48 +-
 include/CL/cl_gl.h                                 |    90 +-
 include/CL/cl_gl_ext.h                             |     4 +-
 include/CL/cl_platform.h                           |    80 +-
 include/CL/opencl.h                                |     2 +-
 kernels/builtin_global_size.cl                     |     3 +
 kernels/compiler_function_constant0.cl             |     2 +-
 kernels/compiler_local_memory_barrier_2.cl         |     7 +
 src/cl_api.c                                       |   109 +-
 src/cl_device_id.c                                 |     4 +
 src/cl_mem.h                                       |    19 +
 src/cl_mem_gl.c                                    |     4 +
 src/cl_program.c                                   |    70 +-
 src/cl_program.h                                   |     4 +-
 src/intel/intel_gpgpu.c                            |     7 +
 utests/CMakeLists.txt                              |     4 +-
 utests/builtin_global_size.cpp                     |   108 +
 utests/compiler_box_blur_image.cpp                 |    11 +-
 utests/compiler_copy_image.cpp                     |    10 +-
 utests/compiler_copy_image1.cpp                    |    18 +-
 utests/compiler_copy_image_3d.cpp                  |    11 +-
 utests/compiler_fill_gl_image.cpp                  |     2 +-
 utests/compiler_fill_image.cpp                     |     7 +-
 utests/compiler_fill_image0.cpp                    |     7 +-
 utests/compiler_fill_image_3d.cpp                  |     9 +-
 utests/compiler_fill_image_3d_2.cpp                |     9 +-
 utests/compiler_function_constant0.cpp             |     6 +-
 utests/compiler_get_image_info.cpp                 |    12 +-
 utests/compiler_local_memory_barrier_2.cpp         |    29 +
 utests/compiler_movforphi_undef.cpp                |    10 +-
 utests/utest_helper.hpp                            |    12 +
 47 files changed, 1675 insertions(+), 10231 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 055c8fc..70c5bcf 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -193,6 +193,12 @@ namespace gbe
     p->BARRIER(src);
   }
 
+  void GenContext::emitFenceInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    p->FENCE(dst);
+    p->MOV(dst, dst);
+  }
+
   void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
@@ -217,7 +223,12 @@ namespace gbe
   }
 
   void GenContext::emitIndirectMoveInstruction(const SelectionInstruction &insn) {
-    const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
+    GenRegister src = ra->genReg(insn.src(0));
+    if(isScalarReg(src.reg()))
+      src = GenRegister::retype(src, GEN_TYPE_UW);
+    else
+      src = GenRegister::unpacked_uw(src.nr, src.subnr / typeSize(GEN_TYPE_UW));
+
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister a0 = GenRegister::addr8(0);
     uint32_t simdWidth = p->curr.execWidth;
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 7c28bdf..1566cbb 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -85,6 +85,7 @@ namespace gbe
     void emitNoOpInstruction(const SelectionInstruction &insn);
     void emitWaitInstruction(const SelectionInstruction &insn);
     void emitBarrierInstruction(const SelectionInstruction &insn);
+    void emitFenceInstruction(const SelectionInstruction &insn);
     void emitMathInstruction(const SelectionInstruction &insn);
     void emitUntypedReadInstruction(const SelectionInstruction &insn);
     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index c7a1581..f4e4938 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -765,6 +765,20 @@ struct GenInstruction
       uint32_t end_of_thread:1;
     } gen7_typed_rw;
 
+    /*! Memory fence */
+    struct {
+      uint32_t bti:8;
+      uint32_t ingored:5;
+      uint32_t commit_enable:1;
+      uint32_t msg_type:4;
+      uint32_t pad2:1;
+      uint32_t header_present:1;
+      uint32_t response_length:5;
+      uint32_t msg_length:4;
+      uint32_t pad3:2;
+      uint32_t end_of_thread:1;
+    } gen7_memory_fence;
+
     struct {
       uint32_t src1_subreg_nr_high:1;
       uint32_t src1_reg_nr:8;
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b65cc94..859a1b9 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -707,6 +707,15 @@ namespace gbe
      insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
      insn->bits3.msg_gateway.notify = 0x1;
   }
+  void GenEncoder::FENCE(GenRegister dst) {
+    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    this->setHeader(insn);
+    this->setDst(insn, dst);
+    this->setSrc0(insn, dst);
+    setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1);
+    insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE;
+    insn->bits3.gen7_memory_fence.commit_enable = 0x1;
+  }
 
   void GenEncoder::JMPI(GenRegister src) {
     alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 83d83d2..c98774f 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -118,6 +118,8 @@ namespace gbe
 #undef ALU3
     /*! Barrier message (to synchronize threads of a workgroup) */
     void BARRIER(GenRegister src);
+    /*! Memory fence message (to order loads and stores between threads) */
+    void FENCE(GenRegister dst);
     /*! Jump indexed instruction */
     void JMPI(GenRegister src);
     /*! Compare instructions */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a2c0fba..098d9ec 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -11,6 +11,7 @@ DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
 DECL_GEN7_SCHEDULE(Wait,            20,        2,        2)
 DECL_GEN7_SCHEDULE(Math,            20,        4,        2)
 DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
+DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
 DECL_GEN7_SCHEDULE(UntypedRead,     80,        1,        1)
 DECL_GEN7_SCHEDULE(UntypedWrite,    80,        1,        1)
 DECL_GEN7_SCHEDULE(ByteGather,      80,        1,        1)
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 95eedfe..cb990be 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -305,7 +305,7 @@ namespace gbe
       return simdWidth == 8 ? physical.nr : physical.nr / 2;
     }
     // We use virtual registers since allocation is not done yet
-    else 
+    else
       return reg.value.reg;
   }
 
@@ -345,7 +345,9 @@ namespace gbe
     }
 
     // Consider barriers and wait write to memory
-    if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
       const uint32_t local = this->getIndex(0xfe);
       const uint32_t global = this->getIndex(0x00);
       this->nodes[local] = this->nodes[global] = node;
@@ -424,7 +426,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are reading memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(node, local);
@@ -450,7 +454,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are writing memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(node, local);
@@ -482,7 +488,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are reading memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+      if (insn.opcode == SEL_OP_BARRIER ||
+          insn.opcode == SEL_OP_FENCE ||
+          insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(local, node);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 88f9e94..4e7cebd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -431,6 +431,8 @@ namespace gbe
 #undef ALU3
     /*! Encode a barrier instruction */
     void BARRIER(GenRegister src);
+    /*! Encode a barrier instruction */
+    void FENCE(GenRegister dst);
     /*! Encode a label instruction */
     void LABEL(ir::LabelIndex label);
     /*! Jump indexed instruction */
@@ -682,6 +684,11 @@ namespace gbe
     insn->src(0) = src;
   }
 
+  void Selection::Opaque::FENCE(GenRegister dst) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0);
+    insn->dst(0) = dst;
+  }
+
   void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
     insn->src(0) = src;
@@ -1607,17 +1614,21 @@ namespace gbe
     INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
     {
       using namespace ir;
-      const uint32_t params = insn.getParameters();
-      GBE_ASSERTM(params == syncLocalBarrier,
-                  "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
-                  "for the synchronization primitives");
       const ir::Register reg = sel.reg(FAMILY_DWORD);
 
+      const uint32_t params = insn.getParameters();
+      //need to double check local barrier whether need fence or not
+      if(params == syncGlobalBarrier) {
+        const ir::Register fenceDst = sel.reg(FAMILY_DWORD);
+        sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32));
+      }
+
       sel.push();
         sel.curr.predicate = GEN_PREDICATE_NONE;
         sel.curr.execWidth = 8;
         sel.curr.physicalFlag = 0;
         sel.curr.noMask = 1;
+
         sel.SHL(GenRegister::ud8grf(reg),
                 GenRegister::ud1grf(ocl::threadn),
                 GenRegister::immud(0x9));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 455bb92..789c81c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -29,6 +29,7 @@ DECL_SELECTION_IR(NOP, NoOpInstruction)
 DECL_SELECTION_IR(WAIT, WaitInstruction)
 DECL_SELECTION_IR(MATH, MathInstruction)
 DECL_SELECTION_IR(BARRIER, BarrierInstruction)
+DECL_SELECTION_IR(FENCE, FenceInstruction)
 DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
 DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
 DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 46b81e1..b95b7ed 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -374,19 +374,22 @@ DECL_INTERNAL_WORK_ITEM_FN(get_global_offset)
 DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
 #undef DECL_INTERNAL_WORK_ITEM_FN
 
-#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \
-INLINE unsigned NAME(unsigned int dim) { \
-  if (dim == 0) return __gen_ocl_##NAME##0(); \
-  else if (dim == 1) return __gen_ocl_##NAME##1(); \
-  else if (dim == 2) return __gen_ocl_##NAME##2(); \
-  else return 0; \
-}
-DECL_PUBLIC_WORK_ITEM_FN(get_group_id)
-DECL_PUBLIC_WORK_ITEM_FN(get_local_id)
-DECL_PUBLIC_WORK_ITEM_FN(get_local_size)
-DECL_PUBLIC_WORK_ITEM_FN(get_global_size)
-DECL_PUBLIC_WORK_ITEM_FN(get_global_offset)
-DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
+#define DECL_PUBLIC_WORK_ITEM_FN(NAME, OTHER_RET)    \
+INLINE unsigned NAME(unsigned int dim) {             \
+  if (dim == 0) return __gen_ocl_##NAME##0();        \
+  else if (dim > 0 && dim < get_work_dim()) {        \
+    if (dim == 1) return __gen_ocl_##NAME##1();      \
+    else if (dim == 2) return __gen_ocl_##NAME##2(); \
+  }                                                  \
+  return OTHER_RET;                                  \
+}
+
+DECL_PUBLIC_WORK_ITEM_FN(get_group_id, 0)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_id, 0)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_size, 1)
+DECL_PUBLIC_WORK_ITEM_FN(get_global_size, 1)
+DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0)
+DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1)
 #undef DECL_PUBLIC_WORK_ITEM_FN
 
 INLINE uint get_global_id(uint dim) {
diff --git a/include/CL/cl.h b/include/CL/cl.h
index 203c659..4355e74 100644
--- a/include/CL/cl.h
+++ b/include/CL/cl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -21,6 +21,8 @@
  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
  ******************************************************************************/
 
+/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
+
 #ifndef __OPENCL_CL_H
 #define __OPENCL_CL_H
 
@@ -56,10 +58,8 @@ typedef cl_uint             cl_device_mem_cache_type;
 typedef cl_uint             cl_device_local_mem_type;
 typedef cl_bitfield         cl_device_exec_capabilities;
 typedef cl_bitfield         cl_command_queue_properties;
-typedef intptr_t            cl_device_partition_property;
-typedef cl_bitfield         cl_device_affinity_domain;
 
-typedef intptr_t            cl_context_properties;
+typedef intptr_t			cl_context_properties;
 typedef cl_uint             cl_context_info;
 typedef cl_uint             cl_command_queue_info;
 typedef cl_uint             cl_channel_order;
@@ -67,7 +67,6 @@ typedef cl_uint             cl_channel_type;
 typedef cl_bitfield         cl_mem_flags;
 typedef cl_uint             cl_mem_object_type;
 typedef cl_uint             cl_mem_info;
-typedef cl_bitfield         cl_mem_migration_flags;
 typedef cl_uint             cl_image_info;
 typedef cl_uint             cl_buffer_create_type;
 typedef cl_uint             cl_addressing_mode;
@@ -76,43 +75,24 @@ typedef cl_uint             cl_sampler_info;
 typedef cl_bitfield         cl_map_flags;
 typedef cl_uint             cl_program_info;
 typedef cl_uint             cl_program_build_info;
-typedef cl_uint             cl_program_binary_type;
 typedef cl_int              cl_build_status;
 typedef cl_uint             cl_kernel_info;
-typedef cl_uint             cl_kernel_arg_info;
-typedef cl_uint             cl_kernel_arg_address_qualifier;
-typedef cl_uint             cl_kernel_arg_access_qualifier;
-typedef cl_bitfield         cl_kernel_arg_type_qualifier;
 typedef cl_uint             cl_kernel_work_group_info;
 typedef cl_uint             cl_event_info;
 typedef cl_uint             cl_command_type;
 typedef cl_uint             cl_profiling_info;
 
-
 typedef struct _cl_image_format {
     cl_channel_order        image_channel_order;
     cl_channel_type         image_channel_data_type;
 } cl_image_format;
 
-typedef struct _cl_image_desc {
-    cl_mem_object_type      image_type;
-    size_t                  image_width;
-    size_t                  image_height;
-    size_t                  image_depth;
-    size_t                  image_array_size;
-    size_t                  image_row_pitch;
-    size_t                  image_slice_pitch;
-    cl_uint                 num_mip_levels;
-    cl_uint                 num_samples;
-    cl_mem                  buffer;
-} cl_image_desc;
 
 typedef struct _cl_buffer_region {
     size_t                  origin;
     size_t                  size;
 } cl_buffer_region;
 
-
 /******************************************************************************/
 
 /* Error Codes */
@@ -131,11 +111,6 @@ typedef struct _cl_buffer_region {
 #define CL_MAP_FAILURE                              -12
 #define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
-#define CL_COMPILE_PROGRAM_FAILURE                  -15
-#define CL_LINKER_NOT_AVAILABLE                     -16
-#define CL_LINK_PROGRAM_FAILURE                     -17
-#define CL_DEVICE_PARTITION_FAILED                  -18
-#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
 
 #define CL_INVALID_VALUE                            -30
 #define CL_INVALID_DEVICE_TYPE                      -31
@@ -172,21 +147,14 @@ typedef struct _cl_buffer_region {
 #define CL_INVALID_MIP_LEVEL                        -62
 #define CL_INVALID_GLOBAL_WORK_SIZE                 -63
 #define CL_INVALID_PROPERTY                         -64
-#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
-#define CL_INVALID_COMPILER_OPTIONS                 -66
-#define CL_INVALID_LINKER_OPTIONS                   -67
-#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
 
 /* OpenCL Version */
 #define CL_VERSION_1_0                              1
 #define CL_VERSION_1_1                              1
-#define CL_VERSION_1_2                              1
 
 /* cl_bool */
 #define CL_FALSE                                    0
 #define CL_TRUE                                     1
-#define CL_BLOCKING                                 CL_TRUE
-#define CL_NON_BLOCKING                             CL_FALSE
 
 /* cl_platform_info */
 #define CL_PLATFORM_PROFILE                         0x0900
@@ -200,7 +168,6 @@ typedef struct _cl_buffer_region {
 #define CL_DEVICE_TYPE_CPU                          (1 << 1)
 #define CL_DEVICE_TYPE_GPU                          (1 << 2)
 #define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
-#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
 #define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
 
 /* cl_device_info */
@@ -254,7 +221,7 @@ typedef struct _cl_buffer_region {
 #define CL_DEVICE_VERSION                           0x102F
 #define CL_DEVICE_EXTENSIONS                        0x1030
 #define CL_DEVICE_PLATFORM                          0x1031
-#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
 /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
 #define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
@@ -266,20 +233,6 @@ typedef struct _cl_buffer_region {
 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
 #define CL_DEVICE_OPENCL_C_VERSION                  0x103D
-#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
-#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
-#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
-#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
-#define CL_DEVICE_PARENT_DEVICE                     0x1042
-#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
-#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
-#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
-#define CL_DEVICE_PARTITION_TYPE                    0x1046
-#define CL_DEVICE_REFERENCE_COUNT                   0x1047
-#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
-#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
-#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
 
 /* cl_device_fp_config - bitfield */
 #define CL_FP_DENORM                                (1 << 0)
@@ -289,7 +242,6 @@ typedef struct _cl_buffer_region {
 #define CL_FP_ROUND_TO_INF                          (1 << 4)
 #define CL_FP_FMA                                   (1 << 5)
 #define CL_FP_SOFT_FLOAT                            (1 << 6)
-#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
 
 /* cl_device_mem_cache_type */
 #define CL_NONE                                     0x0
@@ -314,23 +266,8 @@ typedef struct _cl_buffer_region {
 #define CL_CONTEXT_PROPERTIES                       0x1082
 #define CL_CONTEXT_NUM_DEVICES                      0x1083
 
-/* cl_context_properties */
+/* cl_context_info + cl_context_properties */
 #define CL_CONTEXT_PLATFORM                         0x1084
-#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
-    
-/* cl_device_partition_property */
-#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
-#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
-#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
-#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
-    
-/* cl_device_affinity_domain */
-#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
-#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
-#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
-#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
-#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
-#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
 
 /* cl_command_queue_info */
 #define CL_QUEUE_CONTEXT                            0x1090
@@ -345,14 +282,6 @@ typedef struct _cl_buffer_region {
 #define CL_MEM_USE_HOST_PTR                         (1 << 3)
 #define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
 #define CL_MEM_COPY_HOST_PTR                        (1 << 5)
-// reserved                                         (1 << 6)    
-#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
-#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
-#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
-
-/* cl_mem_migration_flags - bitfield */
-#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
-#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
 
 /* cl_channel_order */
 #define CL_R                                        0x10B0
@@ -368,8 +297,6 @@ typedef struct _cl_buffer_region {
 #define CL_Rx                                       0x10BA
 #define CL_RGx                                      0x10BB
 #define CL_RGBx                                     0x10BC
-#define CL_DEPTH                                    0x10BD
-#define CL_DEPTH_STENCIL                            0x10BE
 
 /* cl_channel_type */
 #define CL_SNORM_INT8                               0x10D0
@@ -387,16 +314,11 @@ typedef struct _cl_buffer_region {
 #define CL_UNSIGNED_INT32                           0x10DC
 #define CL_HALF_FLOAT                               0x10DD
 #define CL_FLOAT                                    0x10DE
-#define CL_UNORM_INT24                              0x10DF
 
 /* cl_mem_object_type */
 #define CL_MEM_OBJECT_BUFFER                        0x10F0
 #define CL_MEM_OBJECT_IMAGE2D                       0x10F1
 #define CL_MEM_OBJECT_IMAGE3D                       0x10F2
-#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
-#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
-#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
-#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
 
 /* cl_mem_info */
 #define CL_MEM_TYPE                                 0x1100
@@ -417,10 +339,6 @@ typedef struct _cl_buffer_region {
 #define CL_IMAGE_WIDTH                              0x1114
 #define CL_IMAGE_HEIGHT                             0x1115
 #define CL_IMAGE_DEPTH                              0x1116
-#define CL_IMAGE_ARRAY_SIZE                         0x1117
-#define CL_IMAGE_BUFFER                             0x1118
-#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
-#define CL_IMAGE_NUM_SAMPLES                        0x111A
 
 /* cl_addressing_mode */
 #define CL_ADDRESS_NONE                             0x1130
@@ -443,7 +361,6 @@ typedef struct _cl_buffer_region {
 /* cl_map_flags - bitfield */
 #define CL_MAP_READ                                 (1 << 0)
 #define CL_MAP_WRITE                                (1 << 1)
-#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
 
 /* cl_program_info */
 #define CL_PROGRAM_REFERENCE_COUNT                  0x1160
@@ -453,20 +370,11 @@ typedef struct _cl_buffer_region {
 #define CL_PROGRAM_SOURCE                           0x1164
 #define CL_PROGRAM_BINARY_SIZES                     0x1165
 #define CL_PROGRAM_BINARIES                         0x1166
-#define CL_PROGRAM_NUM_KERNELS                      0x1167
-#define CL_PROGRAM_KERNEL_NAMES                     0x1168
 
 /* cl_program_build_info */
 #define CL_PROGRAM_BUILD_STATUS                     0x1181
 #define CL_PROGRAM_BUILD_OPTIONS                    0x1182
 #define CL_PROGRAM_BUILD_LOG                        0x1183
-#define CL_PROGRAM_BINARY_TYPE                      0x1184
-    
-/* cl_program_binary_type */
-#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
-#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
-#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
-#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
 
 /* cl_build_status */
 #define CL_BUILD_SUCCESS                            0
@@ -480,32 +388,6 @@ typedef struct _cl_buffer_region {
 #define CL_KERNEL_REFERENCE_COUNT                   0x1192
 #define CL_KERNEL_CONTEXT                           0x1193
 #define CL_KERNEL_PROGRAM                           0x1194
-#define CL_KERNEL_ATTRIBUTES                        0x1195
-
-/* cl_kernel_arg_info */
-#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
-#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
-#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
-#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
-#define CL_KERNEL_ARG_NAME                          0x119A
-
-/* cl_kernel_arg_address_qualifier */
-#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
-#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
-#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
-#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
-
-/* cl_kernel_arg_access_qualifier */
-#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
-#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
-#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
-#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
-    
-/* cl_kernel_arg_type_qualifer */
-#define CL_KERNEL_ARG_TYPE_NONE                     0
-#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
-#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
-#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
 
 /* cl_kernel_work_group_info */
 #define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
@@ -513,7 +395,6 @@ typedef struct _cl_buffer_region {
 #define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
 #define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
-#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
 
 /* cl_event_info  */
 #define CL_EVENT_COMMAND_QUEUE                      0x11D0
@@ -544,17 +425,13 @@ typedef struct _cl_buffer_region {
 #define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
 #define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
 #define CL_COMMAND_USER                             0x1204
-#define CL_COMMAND_BARRIER                          0x1205
-#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
-#define CL_COMMAND_FILL_BUFFER                      0x1207
-#define CL_COMMAND_FILL_IMAGE                       0x1208
 
 /* command execution status */
 #define CL_COMPLETE                                 0x0
 #define CL_RUNNING                                  0x1
 #define CL_SUBMITTED                                0x2
 #define CL_QUEUED                                   0x3
-
+  
 /* cl_buffer_create_type  */
 #define CL_BUFFER_CREATE_TYPE_REGION                0x1220
 
@@ -593,35 +470,22 @@ clGetDeviceInfo(cl_device_id    /* device */,
                 size_t          /* param_value_size */, 
                 void *          /* param_value */,
                 size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-    
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateSubDevices(cl_device_id                         /* in_device */,
-                   const cl_device_partition_property * /* properties */,
-                   cl_uint                              /* num_devices */,
-                   cl_device_id *                       /* out_devices */,
-                   cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
 
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
-    
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
-    
 /* Context APIs  */
 extern CL_API_ENTRY cl_context CL_API_CALL
 clCreateContext(const cl_context_properties * /* properties */,
-                cl_uint                 /* num_devices */,
-                const cl_device_id *    /* devices */,
+                cl_uint                       /* num_devices */,
+                const cl_device_id *          /* devices */,
                 void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
-                void *                  /* user_data */,
-                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                void *                        /* user_data */,
+                cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_context CL_API_CALL
 clCreateContextFromType(const cl_context_properties * /* properties */,
-                        cl_device_type          /* device_type */,
+                        cl_device_type                /* device_type */,
                         void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
-                        void *                  /* user_data */,
-                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        void *                        /* user_data */,
+                        cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
 clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
@@ -656,6 +520,25 @@ clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
                       void *                /* param_value */,
                       size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
+/* 
+ *  WARNING:
+ *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
+ *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
+ *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
+ *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
+ *
+ *  Software developers previously relying on this API are instructed to set the command queue 
+ *  properties when creating the queue, instead. 
+ */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetCommandQueueProperty(cl_command_queue              /* command_queue */,
+                          cl_command_queue_properties   /* properties */, 
+                          cl_bool                        /* enable */,
+                          cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
+#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
+
 /* Memory Object APIs */
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateBuffer(cl_context   /* context */,
@@ -672,12 +555,26 @@ clCreateSubBuffer(cl_mem                   /* buffer */,
                   cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
 
 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateImage(cl_context              /* context */,
-              cl_mem_flags            /* flags */,
-              const cl_image_format * /* image_format */,
-              const cl_image_desc *   /* image_desc */, 
-              void *                  /* host_ptr */,
-              cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+clCreateImage2D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */,
+                size_t                  /* image_height */,
+                size_t                  /* image_row_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage3D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */, 
+                size_t                  /* image_height */,
+                size_t                  /* image_depth */, 
+                size_t                  /* image_row_pitch */, 
+                size_t                  /* image_slice_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
                         
 extern CL_API_ENTRY cl_int CL_API_CALL
 clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
@@ -712,7 +609,7 @@ clSetMemObjectDestructorCallback(  cl_mem /* memobj */,
                                     void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
                                     void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
 
-/* Sampler APIs */
+/* Sampler APIs  */
 extern CL_API_ENTRY cl_sampler CL_API_CALL
 clCreateSampler(cl_context          /* context */,
                 cl_bool             /* normalized_coords */, 
@@ -750,13 +647,6 @@ clCreateProgramWithBinary(cl_context                     /* context */,
                           cl_int *                       /* binary_status */,
                           cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithBuiltInKernels(cl_context            /* context */,
-                                  cl_uint               /* num_devices */,
-                                  const cl_device_id *  /* device_list */,
-                                  const char *          /* kernel_names */,
-                                  cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-
 extern CL_API_ENTRY cl_int CL_API_CALL
 clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 
@@ -772,30 +662,7 @@ clBuildProgram(cl_program           /* program */,
                void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clCompileProgram(cl_program           /* program */,
-                 cl_uint              /* num_devices */,
-                 const cl_device_id * /* device_list */,
-                 const char *         /* options */, 
-                 cl_uint              /* num_input_headers */,
-                 const cl_program *   /* input_headers */,
-                 const char **        /* header_include_names */,
-                 void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-                 void *               /* user_data */) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clLinkProgram(cl_context           /* context */,
-              cl_uint              /* num_devices */,
-              const cl_device_id * /* device_list */,
-              const char *         /* options */, 
-              cl_uint              /* num_input_programs */,
-              const cl_program *   /* input_programs */,
-              void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-              void *               /* user_data */,
-              cl_int *             /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
-
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
+clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetProgramInfo(cl_program         /* program */,
@@ -844,14 +711,6 @@ clGetKernelInfo(cl_kernel       /* kernel */,
                 size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelArgInfo(cl_kernel       /* kernel */,
-                   cl_uint         /* arg_indx */,
-                   cl_kernel_arg_info  /* param_name */,
-                   size_t          /* param_value_size */,
-                   void *          /* param_value */,
-                   size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
 clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
                          cl_device_id               /* device */,
                          cl_kernel_work_group_info  /* param_name */,
@@ -859,7 +718,7 @@ clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
                          void *                     /* param_value */,
                          size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
 
-/* Event Object APIs */
+/* Event Object APIs  */
 extern CL_API_ENTRY cl_int CL_API_CALL
 clWaitForEvents(cl_uint             /* num_events */,
                 const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
@@ -891,7 +750,7 @@ clSetEventCallback( cl_event    /* event */,
                     void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
                     void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
 
-/* Profiling APIs */
+/* Profiling APIs  */
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetEventProfilingInfo(cl_event            /* event */,
                         cl_profiling_info   /* param_name */,
@@ -912,7 +771,7 @@ clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
                     cl_mem              /* buffer */,
                     cl_bool             /* blocking_read */,
                     size_t              /* offset */,
-                    size_t              /* size */, 
+                    size_t              /* cb */, 
                     void *              /* ptr */,
                     cl_uint             /* num_events_in_wait_list */,
                     const cl_event *    /* event_wait_list */,
@@ -922,8 +781,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
                         cl_mem              /* buffer */,
                         cl_bool             /* blocking_read */,
-                        const size_t *      /* buffer_offset */,
-                        const size_t *      /* host_offset */, 
+                        const size_t *      /* buffer_origin */,
+                        const size_t *      /* host_origin */, 
                         const size_t *      /* region */,
                         size_t              /* buffer_row_pitch */,
                         size_t              /* buffer_slice_pitch */,
@@ -939,7 +798,7 @@ clEnqueueWriteBuffer(cl_command_queue   /* command_queue */,
                      cl_mem             /* buffer */, 
                      cl_bool            /* blocking_write */, 
                      size_t             /* offset */, 
-                     size_t             /* size */, 
+                     size_t             /* cb */, 
                      const void *       /* ptr */, 
                      cl_uint            /* num_events_in_wait_list */, 
                      const cl_event *   /* event_wait_list */, 
@@ -949,8 +808,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
                          cl_mem              /* buffer */,
                          cl_bool             /* blocking_write */,
-                         const size_t *      /* buffer_offset */,
-                         const size_t *      /* host_offset */, 
+                         const size_t *      /* buffer_origin */,
+                         const size_t *      /* host_origin */, 
                          const size_t *      /* region */,
                          size_t              /* buffer_row_pitch */,
                          size_t              /* buffer_slice_pitch */,
@@ -962,23 +821,12 @@ clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
                          cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
                             
 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillBuffer(cl_command_queue   /* command_queue */,
-                    cl_mem             /* buffer */, 
-                    const void *       /* pattern */, 
-                    size_t             /* pattern_size */, 
-                    size_t             /* offset */, 
-                    size_t             /* size */, 
-                    cl_uint            /* num_events_in_wait_list */, 
-                    const cl_event *   /* event_wait_list */, 
-                    cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
-                            
-extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
                     cl_mem              /* src_buffer */,
                     cl_mem              /* dst_buffer */, 
                     size_t              /* src_offset */,
                     size_t              /* dst_offset */,
-                    size_t              /* size */, 
+                    size_t              /* cb */, 
                     cl_uint             /* num_events_in_wait_list */,
                     const cl_event *    /* event_wait_list */,
                     cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
@@ -1025,16 +873,6 @@ clEnqueueWriteImage(cl_command_queue    /* command_queue */,
                     cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillImage(cl_command_queue   /* command_queue */,
-                   cl_mem             /* image */, 
-                   const void *       /* fill_color */, 
-                   const size_t *     /* origin[3] */, 
-                   const size_t *     /* region[3] */, 
-                   cl_uint            /* num_events_in_wait_list */, 
-                   const cl_event *   /* event_wait_list */, 
-                   cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
-                            
-extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueCopyImage(cl_command_queue     /* command_queue */,
                    cl_mem               /* src_image */,
                    cl_mem               /* dst_image */, 
@@ -1073,7 +911,7 @@ clEnqueueMapBuffer(cl_command_queue /* command_queue */,
                    cl_bool          /* blocking_map */, 
                    cl_map_flags     /* map_flags */,
                    size_t           /* offset */,
-                   size_t           /* size */,
+                   size_t           /* cb */,
                    cl_uint          /* num_events_in_wait_list */,
                    const cl_event * /* event_wait_list */,
                    cl_event *       /* event */,
@@ -1102,15 +940,6 @@ clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
                         cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemObjects(cl_command_queue       /* command_queue */,
-                           cl_uint                /* num_mem_objects */,
-                           const cl_mem *         /* mem_objects */,
-                           cl_mem_migration_flags /* flags */,
-                           cl_uint                /* num_events_in_wait_list */,
-                           const cl_event *       /* event_wait_list */,
-                           cl_event *             /* event */) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
                        cl_kernel        /* kernel */,
                        cl_uint          /* work_dim */,
@@ -1130,7 +959,7 @@ clEnqueueTask(cl_command_queue  /* command_queue */,
 
 extern CL_API_ENTRY cl_int CL_API_CALL
 clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
-					  void (CL_CALLBACK * /*user_func*/)(void *), 
+					  void (CL_CALLBACK *user_func)(void *), 
                       void *            /* args */,
                       size_t            /* cb_args */, 
                       cl_uint           /* num_mem_objects */,
@@ -1141,17 +970,16 @@ clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
                       cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
-                            cl_uint           /* num_events_in_wait_list */,
-                            const cl_event *  /* event_wait_list */,
-                            cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+clEnqueueMarker(cl_command_queue    /* command_queue */,
+                cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
-                             cl_uint           /* num_events_in_wait_list */,
-                             const cl_event *  /* event_wait_list */,
-                             cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+                       cl_uint          /* num_events */,
+                       const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
 
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
 
 /* Extension function access
  *
@@ -1160,51 +988,7 @@ clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
  * check to make sure the address is not NULL, before using or 
  * calling the returned function address.
  */
-extern CL_API_ENTRY void * CL_API_CALL 
-clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
-                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
-    
-
-// Deprecated OpenCL 1.1 APIs
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateImage2D(cl_context              /* context */,
-                cl_mem_flags            /* flags */,
-                const cl_image_format * /* image_format */,
-                size_t                  /* image_width */,
-                size_t                  /* image_height */,
-                size_t                  /* image_row_pitch */, 
-                void *                  /* host_ptr */,
-                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateImage3D(cl_context              /* context */,
-                cl_mem_flags            /* flags */,
-                const cl_image_format * /* image_format */,
-                size_t                  /* image_width */, 
-                size_t                  /* image_height */,
-                size_t                  /* image_depth */, 
-                size_t                  /* image_row_pitch */, 
-                size_t                  /* image_slice_pitch */, 
-                void *                  /* host_ptr */,
-                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueMarker(cl_command_queue    /* command_queue */,
-                cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
-                        cl_uint          /* num_events */,
-                        const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
-clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
 
 #ifdef __cplusplus
 }
diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp
index 06448e2..99b86a6 100644
--- a/include/CL/cl.hpp
+++ b/include/CL/cl.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -23,16 +23,13 @@
 
 /*! \file
  *
- *   \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and 
- *       OpenCL 1.2 (rev 15)    
- *   \author Benedict R. Gaster, Laurent Morichetti and Lee Howes
+ *   \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)    
+ *   \author Benedict R. Gaster and Laurent Morichetti
  *   
- *   Additions and fixes from:
- *       Brian Cole, March 3rd 2010 and April 2012 
- *       Matt Gruenke, April 2012.
+ *   Additions and fixes from Brian Cole, March 3rd 2010.
  *   
- *   \version 1.2.4
- *   \date January 2013
+ *   \version 1.1
+ *   \date June 2010
  *
  *   Optional extension support
  *
@@ -144,21 +141,10 @@
 #define CL_HPP_
 
 #ifdef _WIN32
-
 #include <windows.h>
 #include <malloc.h>
-#include <iterator>
-#include <intrin.h>
-
-#if defined(__CL_ENABLE_EXCEPTIONS)
-#include <exception>
-#endif // #if defined(__CL_ENABLE_EXCEPTIONS)
-
-#pragma push_macro("max")
-#undef max
 #if defined(USE_DX_INTEROP)
 #include <CL/cl_d3d10.h>
-#include <CL/cl_dx9_media_sharing.h>
 #endif
 #endif // _WIN32
 
@@ -170,36 +156,16 @@
 #if defined(__APPLE__) || defined(__MACOSX)
 #include <OpenGL/OpenGL.h>
 #include <OpenCL/opencl.h>
-#include <libkern/OSAtomic.h>
 #else
 #include <GL/gl.h>
 #include <CL/opencl.h>
 #endif // !__APPLE__
 
-// To avoid accidentally taking ownership of core OpenCL types
-// such as cl_kernel constructors are made explicit
-// under OpenCL 1.2
-#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
-#define __CL_EXPLICIT_CONSTRUCTORS explicit
-#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
-#define __CL_EXPLICIT_CONSTRUCTORS 
-#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
-
-// Define deprecated prefixes and suffixes to ensure compilation
-// in case they are not pre-defined
-#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
-#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED  
-#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
-#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED)
-#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
-#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
-
 #if !defined(CL_CALLBACK)
 #define CL_CALLBACK
 #endif //CL_CALLBACK
 
 #include <utility>
-#include <limits>
 
 #if !defined(__NO_STD_VECTOR)
 #include <vector>
@@ -210,15 +176,11 @@
 #endif 
 
 #if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
-#include <alloca.h>
-
-#include <emmintrin.h>
-#include <xmmintrin.h>
+# include <alloca.h>
 #endif // linux
 
 #include <cstring>
 
-
 /*! \namespace cl
  *
  * \brief The OpenCL C++ bindings are defined within this namespace.
@@ -226,12 +188,6 @@
  */
 namespace cl {
 
-class Memory;
-
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
 #define __INIT_CL_EXT_FCN_PTR(name) \
     if(!pfn_##name) { \
         pfn_##name = (PFN_##name) \
@@ -239,17 +195,6 @@ class Memory;
         if(!pfn_##name) { \
         } \
     }
-#endif // #if defined(CL_VERSION_1_1)
-
-#if defined(CL_VERSION_1_2)
-#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \
-    if(!pfn_##name) { \
-        pfn_##name = (PFN_##name) \
-            clGetExtensionFunctionAddressForPlatform(platform, #name); \
-        if(!pfn_##name) { \
-        } \
-    }
-#endif // #if defined(CL_VERSION_1_1)
 
 class Program;
 class Device;
@@ -258,9 +203,9 @@ class CommandQueue;
 class Memory;
 
 #if defined(__CL_ENABLE_EXCEPTIONS)
-/*! \brief Exception class 
- * 
- *  This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined.
+#include <exception>
+/*! \class Error
+ * \brief Exception class
  */
 class Error : public std::exception
 {
@@ -268,14 +213,8 @@ private:
     cl_int err_;
     const char * errStr_;
 public:
-    /*! \brief Create a new CL error exception for a given error code
+    /*! Create a new CL error exception for a given error code
      *  and corresponding message.
-     * 
-     *  \param err error code value.
-     *
-     *  \param errStr a descriptive string that must remain in scope until
-     *                handling of the exception has concluded.  If set, it
-     *                will be returned by what().
      */
     Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
     {}
@@ -300,7 +239,7 @@ public:
      *
      *  \return The error code.
      */
-    cl_int err(void) const { return err_; }
+    const cl_int err(void) const { return err_; }
 };
 
 #define __ERR_STR(x) #x
@@ -308,33 +247,9 @@ public:
 #define __ERR_STR(x) NULL
 #endif // __CL_ENABLE_EXCEPTIONS
 
-
-namespace detail
-{
-#if defined(__CL_ENABLE_EXCEPTIONS)
-static inline cl_int errHandler (
-    cl_int err,
-    const char * errStr = NULL)
-{
-    if (err != CL_SUCCESS) {
-        throw Error(err, errStr);
-    }
-    return err;
-}
-#else
-static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
-{
-    (void) errStr; // suppress unused variable warning
-    return err;
-}
-#endif // __CL_ENABLE_EXCEPTIONS
-}
-
-
-
 //! \cond DOXYGEN_DETAIL
 #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
-#define __GET_DEVICE_INFO_ERR               __ERR_STR(clGetDeviceInfo)
+#define __GET_DEVICE_INFO_ERR               __ERR_STR(clgetDeviceInfo)
 #define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
 #define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
 #define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
@@ -345,29 +260,20 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
 #define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
 #define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
 #define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
-#if defined(CL_VERSION_1_2)
-#define __GET_KERNEL_ARG_INFO_ERR               __ERR_STR(clGetKernelArgInfo)
-#endif // #if defined(CL_VERSION_1_2)
 #define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
 #define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
 #define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
 #define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)
 
-#define __CREATE_CONTEXT_ERR                __ERR_STR(clCreateContext)
 #define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
 #define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)
 
 #define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
-#define __COPY_ERR                          __ERR_STR(cl::copy)
 #define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
 #define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
-#define __CREATE_GL_RENDER_BUFFER_ERR       __ERR_STR(clCreateFromGLBuffer)
 #define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
-#if defined(CL_VERSION_1_2)
-#define __CREATE_IMAGE_ERR                  __ERR_STR(clCreateImage)
-#define __CREATE_GL_TEXTURE_ERR             __ERR_STR(clCreateFromGLTexture)
-#define __IMAGE_DIMENSION_ERR               __ERR_STR(Incorrect image dimensions)
-#endif // #if defined(CL_VERSION_1_2)
+#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
+#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
 #define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
 #define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
 
@@ -380,14 +286,7 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
 #define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
 #define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
 #define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
-#if defined(CL_VERSION_1_2)
-#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR    __ERR_STR(clCreateProgramWithBuiltInKernels)
-#endif // #if defined(CL_VERSION_1_2)
 #define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
-#if defined(CL_VERSION_1_2)
-#define __COMPILE_PROGRAM_ERR                  __ERR_STR(clCompileProgram)
-
-#endif // #if defined(CL_VERSION_1_2)
 #define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)
 
 #define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
@@ -398,11 +297,9 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
 #define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
 #define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
 #define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
-#define __ENQUEUE_FILL_BUFFER_ERR           __ERR_STR(clEnqueueFillBuffer)
 #define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
 #define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
 #define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
-#define __ENQUEUE_FILL_IMAGE_ERR           __ERR_STR(clEnqueueFillImage)
 #define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
 #define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
 #define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
@@ -411,193 +308,76 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
 #define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
 #define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
 #define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
-#if defined(CL_VERSION_1_2)
-#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR   __ERR_STR(clEnqueueMigrateMemObjects)
-#endif // #if defined(CL_VERSION_1_2)
+#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
+#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
+#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
 
 #define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
 #define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)
 
+#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
 
-#define __RETAIN_ERR                        __ERR_STR(Retain Object)
-#define __RELEASE_ERR                       __ERR_STR(Release Object)
 #define __FLUSH_ERR                         __ERR_STR(clFlush)
 #define __FINISH_ERR                        __ERR_STR(clFinish)
-#define __VECTOR_CAPACITY_ERR               __ERR_STR(Vector capacity error)
 
-/**
- * CL 1.2 version that uses device fission.
- */
-#if defined(CL_VERSION_1_2)
-#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevices)
-#else
 #define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevicesEXT)
-#endif // #if defined(CL_VERSION_1_2)
-
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
-#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
-#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
-#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
-#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
-#define __CREATE_GL_TEXTURE_2D_ERR          __ERR_STR(clCreateFromGLTexture2D)
-#define __CREATE_GL_TEXTURE_3D_ERR          __ERR_STR(clCreateFromGLTexture3D)
-#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
-#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
-#endif // #if defined(CL_VERSION_1_1)
-
 #endif // __CL_USER_OVERRIDE_ERROR_STRINGS
 //! \endcond
 
-/**
- * CL 1.2 marker and barrier commands
- */
-#if defined(CL_VERSION_1_2)
-#define __ENQUEUE_MARKER_WAIT_LIST_ERR                __ERR_STR(clEnqueueMarkerWithWaitList)
-#define __ENQUEUE_BARRIER_WAIT_LIST_ERR               __ERR_STR(clEnqueueBarrierWithWaitList)
-#endif // #if defined(CL_VERSION_1_2)
-
-#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
-typedef std::string STRING_CLASS;
-#elif !defined(__USE_DEV_STRING) 
-
 /*! \class string
  * \brief Simple string class, that provides a limited subset of std::string
  * functionality but avoids many of the issues that come with that class.
- 
- *  \note Deprecated. Please use std::string as default or
- *  re-define the string class to match the std::string
- *  interface by defining STRING_CLASS
  */
-class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+class string
 {
 private:
     ::size_t size_;
     char * str_;
 public:
-    //! \brief Constructs an empty string, allocating no memory.
     string(void) : size_(0), str_(NULL)
     {
     }
 
-    /*! \brief Constructs a string populated from an arbitrary value of
-     *  specified size.
-     * 
-     *  An extra '\0' is added, in case none was contained in str.
-     *
-     *  \param str the initial value of the string instance.  Note that '\0'     
-     *             characters receive no special treatment.  If NULL,
-     *             the string is left empty, with a size of 0.
-     *
-     *  \param size the number of characters to copy from str.
-     */
-    string(const char * str, ::size_t size) :
+    string(char * str, ::size_t size) :
         size_(size),
         str_(NULL)
     {
-        if( size > 0 ) {
-            str_ = new char[size_+1];
-            if (str_ != NULL) {
-                memcpy(str_, str, size_  * sizeof(char));
-                str_[size_] = '\0';
-            }
-            else {
-                size_ = 0;
-            }
-        }
-    }
-
-    /*! \brief Constructs a string populated from a null-terminated value.
-     *
-     *  \param str the null-terminated initial value of the string instance.
-     *             If NULL, the string is left empty, with a size of 0.
-     */
-    string(const char * str) :
-        size_(0),
-        str_(NULL)
-    {
-        if( str ) {
-            size_= ::strlen(str);
+        str_ = new char[size_+1];
+        if (str_ != NULL) {
+            memcpy(str_, str, size_  * sizeof(char));
+            str_[size_] = '\0';
         }
-        if( size_ > 0 ) {
-            str_ = new char[size_ + 1];
-            if (str_ != NULL) {
-                memcpy(str_, str, (size_ + 1) * sizeof(char));
-            }
+        else {
+            size_ = 0;
         }
     }
 
-    void resize( ::size_t n )
+    string(char * str) :
+        str_(NULL)
     {
-        if( size_ == n ) {
-            return;
+        size_= ::strlen(str);
+        str_ = new char[size_ + 1];
+        if (str_ != NULL) {
+            memcpy(str_, str, (size_ + 1) * sizeof(char));
         }
-        if (n == 0) {
-            if( str_ ) {
-                delete [] str_;
-            }
-            str_ = NULL;
-            size_ = 0;
-        } 
         else {
-            char *newString = new char[n + 1];
-            int copySize = n;
-            if( size_ < n ) {
-                copySize = size_;
-            }
-            size_ = n;
-            
-            if(str_) {
-                memcpy(newString, str_, (copySize + 1) * sizeof(char));
-            }
-            if( copySize < size_ ) {
-                memset(newString + copySize, 0, size_ - copySize);
-            }
-            newString[size_] = '\0';
-
-            delete [] str_;
-            str_ = newString;
+            size_ = 0;
         }
     }
 
-    const char& operator[] ( ::size_t pos ) const
-    {
-        return str_[pos];
-    }
-
-    char& operator[] ( ::size_t pos )
-    {
-        return str_[pos];
-    }
-
-    /*! \brief Copies the value of another string to this one.
-     *
-     *  \param rhs the string to copy.
-     *
-     *  \returns a reference to the modified instance.
-     */
     string& operator=(const string& rhs)
     {
         if (this == &rhs) {
             return *this;
         }
 
-        if( str_ != NULL ) {
-            delete [] str_;
-            str_ = NULL;
-            size_ = 0;
-        }
-
         if (rhs.size_ == 0 || rhs.str_ == NULL) {
-            str_ = NULL;
             size_ = 0;
+            str_  = NULL;
         } 
         else {
-            str_ = new char[rhs.size_ + 1];
             size_ = rhs.size_;
-            
+            str_ = new char[size_ + 1];
             if (str_ != NULL) {
                 memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
             }
@@ -609,42 +389,37 @@ public:
         return *this;
     }
 
-    /*! \brief Constructs a string by copying the value of another instance.
-     *
-     *  \param rhs the string to copy.
-     */
-    string(const string& rhs) :
-        size_(0),
-        str_(NULL)
+    string(const string& rhs)
     {
         *this = rhs;
     }
 
-    //! \brief Destructor - frees memory used to hold the current value.
     ~string()
     {
-        delete[] str_;
-        str_ = NULL;
+        if (str_ != NULL) {
+            delete[] str_;
+        }
     }
-    
-    //! \brief Queries the length of the string, excluding any added '\0's.
-    ::size_t size(void) const   { return size_; }
 
-    //! \brief Queries the length of the string, excluding any added '\0's.
+    ::size_t size(void) const   { return size_; }
     ::size_t length(void) const { return size(); }
 
-    /*! \brief Returns a pointer to the private copy held by this instance,
-     *  or "" if empty/unset.
-     */
     const char * c_str(void) const { return (str_) ? str_ : "";}
 };
+
+#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
+#include <string>
+typedef std::string STRING_CLASS;
+#elif !defined(__USE_DEV_STRING) 
 typedef cl::string STRING_CLASS;
-#endif // #elif !defined(__USE_DEV_STRING) 
+#endif
 
 #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+#include <vector>
 #define VECTOR_CLASS std::vector
 #elif !defined(__USE_DEV_VECTOR) 
 #define VECTOR_CLASS cl::vector 
+#endif
 
 #if !defined(__MAX_DEFAULT_VECTOR_SIZE)
 #define __MAX_DEFAULT_VECTOR_SIZE 10
@@ -652,279 +427,189 @@ typedef cl::string STRING_CLASS;
 
 /*! \class vector
  * \brief Fixed sized vector implementation that mirroring 
- *
- *  \note Deprecated. Please use std::vector as default or
- *  re-define the vector class to match the std::vector
- *  interface by defining VECTOR_CLASS
-
- *  \note Not recommended for use with custom objects as
- *  current implementation will construct N elements
- *
  * std::vector functionality.
- *  \brief Fixed sized vector compatible with std::vector.
- *
- *  \note
- *  This differs from std::vector<> not just in memory allocation,
- *  but also in terms of when members are constructed, destroyed,
- *  and assigned instead of being copy constructed.
- *
- *  \param T type of element contained in the vector.
- *
- *  \param N maximum size of the vector.
  */
 template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
-class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+class vector
 {
 private:
     T data_[N];
     unsigned int size_;
-
+    bool empty_;
 public:
-    //! \brief Constructs an empty vector with no memory allocated.
-    vector() :  
-        size_(static_cast<unsigned int>(0))
+    vector() : 
+        size_(-1),
+        empty_(true)
     {}
 
-    //! \brief Deallocates the vector's memory and destroys all of its elements.
-    ~vector() 
-    {
-        clear();
-    }
+    ~vector() {}
 
-    //! \brief Returns the number of elements currently contained.
     unsigned int size(void) const
     {
-        return size_;
+        return size_ + 1;
     }
-    
-    /*! \brief Empties the vector of all elements.
-     *  \note
-     *  This does not deallocate memory but will invoke destructors
-     *  on contained elements.
-     */
+
     void clear()
     {
-        while(!empty()) {
-            pop_back();
-        }
+        size_ = -1;
+        empty_ = true;
     }
 
-    /*! \brief Appends an element after the last valid element.
-     * Calling this on a vector that has reached capacity will throw an 
-     * exception if exceptions are enabled.
-     */
     void push_back (const T& x)
     { 
-        if (size() < N) {    
-            new (&data_[size_]) T(x);
-            size_++;
-        } else {
-            detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+        if (size() < N) {
+            size_++;  
+            data_[size_] = x;
+            empty_ = false;
         }
     }
 
-    /*! \brief Removes the last valid element from the vector.
-     * Calling this on an empty vector will throw an exception
-     * if exceptions are enabled.
-     */
     void pop_back(void)
     {
-        if (size_ != 0) {
-            --size_;
+        if (!empty_) {
             data_[size_].~T();
-        } else {
-            detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+            size_--;
+            if (size_ == -1) {
+                empty_ = true;
+            }
         }
     }
   
-    /*! \brief Constructs with a value copied from another.
-     *
-     *  \param vec the vector to copy.
-     */
     vector(const vector<T, N>& vec) : 
-        size_(vec.size_)
+        size_(vec.size_),
+        empty_(vec.empty_)
     {
-        if (size_ != 0) {	
-            assign(vec.begin(), vec.end());
+        if (!empty_) {
+            memcpy(&data_[0], &vec.data_[0], size() * sizeof(T));
         }
     } 
 
-    /*! \brief Constructs with a specified number of initial elements.
-     *
-     *  \param size number of initial elements.
-     *
-     *  \param val value of initial elements.
-     */
     vector(unsigned int size, const T& val = T()) :
-        size_(0)
+        size_(-1),
+        empty_(true)
     {
         for (unsigned int i = 0; i < size; i++) {
             push_back(val);
         }
     }
 
-    /*! \brief Overwrites the current content with that copied from another
-     *         instance.
-     *
-     *  \param rhs vector to copy.
-     *
-     *  \returns a reference to this.
-     */
     vector<T, N>& operator=(const vector<T, N>& rhs)
     {
         if (this == &rhs) {
             return *this;
         }
 
-        if (rhs.size_ != 0) {	
-            assign(rhs.begin(), rhs.end());
-        } else {
-            clear();
+        size_  = rhs.size_;
+        empty_ = rhs.empty_;
+
+        if (!empty_) {	
+            memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T));
         }
     
         return *this;
     }
 
-    /*! \brief Tests equality against another instance.
-     *
-     *  \param vec the vector against which to compare.
-     */
     bool operator==(vector<T,N> &vec)
     {
+        if (empty_ && vec.empty_) {
+            return true;
+        }
+
         if (size() != vec.size()) {
             return false;
         }
 
-        for( unsigned int i = 0; i < size(); ++i ) {
-            if( operator[](i) != vec[i] ) {
-                return false;
-            }
-        }
-        return true;
+        return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false;
     }
   
-    //! \brief Conversion operator to T*.
     operator T* ()             { return data_; }
-
-    //! \brief Conversion operator to const T*.
     operator const T* () const { return data_; }
    
-    //! \brief Tests whether this instance has any elements.
     bool empty (void) const
     {
-        return size_==0;
+        return empty_;
     }
   
-    //! \brief Returns the maximum number of elements this instance can hold.
     unsigned int max_size (void) const
     {
         return N;
     }
 
-    //! \brief Returns the maximum number of elements this instance can hold.
     unsigned int capacity () const
     {
-        return N;
+        return sizeof(T) * N;
     }
 
-    /*! \brief Returns a reference to a given element.
-     *
-     *  \param index which element to access.     *
-     *  \note
-     *  The caller is responsible for ensuring index is >= 0 and < size().
-     */
     T& operator[](int index)
     {
         return data_[index];
     }
   
-    /*! \brief Returns a const reference to a given element.
-     *
-     *  \param index which element to access.
-     *
-     *  \note
-     *  The caller is responsible for ensuring index is >= 0 and < size().
-     */
-    const T& operator[](int index) const
+    T operator[](int index) const
     {
         return data_[index];
     }
   
-    /*! \brief Assigns elements of the vector based on a source iterator range.
-     *
-     *  \param start Beginning iterator of source range
-     *  \param end Enditerator of source range
-     *
-     *  \note
-     *  Will throw an exception if exceptions are enabled and size exceeded.
-     */
     template<class I>
     void assign(I start, I end)
     {
         clear();   
-        while(start != end) {
+        while(start < end) {
             push_back(*start);
             start++;
         }
     }
 
     /*! \class iterator
-     * \brief Const iterator class for vectors
+     * \brief Iterator class for vectors
      */
     class iterator
     {
     private:
-        const vector<T,N> *vec_;
+        vector<T,N> vec_;
         int index_;
-
-        /**
-         * Internal iterator constructor to capture reference
-         * to the vector it iterates over rather than taking 
-         * the vector by copy.
-         */
-        iterator (const vector<T,N> &vec, int index) :
-            vec_(&vec)
-        {            
-            if( !vec.empty() ) {
-                index_ = index;
-            } else {
-                index_ = -1;
-            }
-        }
-
+        bool initialized_;
     public:
         iterator(void) : 
             index_(-1),
-            vec_(NULL)
-        {
-        }
-
-        iterator(const iterator& rhs) :
-            vec_(rhs.vec_),
-            index_(rhs.index_)
+            initialized_(false)
         {
+            index_ = -1;
+            initialized_ = false;
         }
 
         ~iterator(void) {}
 
-        static iterator begin(const cl::vector<T,N> &vec)
+        static iterator begin(vector<T,N> &vec)
         {
-            iterator i(vec, 0);
+            iterator i;
+
+            if (!vec.empty()) {
+                i.index_ = 0;
+            }
 
+            i.vec_ = vec;
+            i.initialized_ = true;
             return i;
         }
 
-        static iterator end(const cl::vector<T,N> &vec)
+        static iterator end(vector<T,N> &vec)
         {
-            iterator i(vec, vec.size());
+            iterator i;
 
+            if (!vec.empty()) {
+                i.index_ = vec.size();
+            }
+            i.vec_ = vec;
+            i.initialized_ = true;
             return i;
         }
     
         bool operator==(iterator i)
         {
             return ((vec_ == i.vec_) && 
-                    (index_ == i.index_));
+                    (index_ == i.index_) && 
+                    (initialized_ == i.initialized_));
         }
 
         bool operator!=(iterator i)
@@ -932,35 +617,29 @@ public:
             return (!(*this==i));
         }
 
-        iterator& operator++()
+        void operator++()
         {
-            ++index_;
-            return *this;
+            index_++;
         }
 
-        iterator operator++(int)
+        void operator++(int x)
         {
-            iterator retVal(*this);
-            ++index_;
-            return retVal;
+            index_ += x;
         }
 
-        iterator& operator--()
+        void operator--()
         {
-            --index_;
-            return *this;
+            index_--;
         }
 
-        iterator operator--(int)
+        void operator--(int x)
         {
-            iterator retVal(*this);
-            --index_;
-            return retVal;
+            index_ -= x;
         }
 
-        const T& operator *() const
+        T operator *()
         {
-            return (*vec_)[index_];
+            return vec_[index_];
         }
     };
 
@@ -969,21 +648,11 @@ public:
         return iterator::begin(*this);
     }
 
-    iterator begin(void) const
-    {
-        return iterator::begin(*this);
-    }
-
     iterator end(void)
     {
         return iterator::end(*this);
     }
 
-    iterator end(void) const
-    {
-        return iterator::end(*this);
-    }
-
     T& front(void)
     {
         return data_[0];
@@ -1001,79 +670,17 @@ public:
 
     const T& back(void) const
     {
-        return data_[size_-1];
+        return data_[size_];
     }
 };  
-#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
-
-
-
-
-
-namespace detail {
-#define __DEFAULT_NOT_INITIALIZED 1 
-#define __DEFAULT_BEING_INITIALIZED 2
-#define __DEFAULT_INITIALIZED 4
-
-    /*
-     * Compare and exchange primitives are needed for handling of defaults
-    */
-    inline int compare_exchange(volatile int * dest, int exchange, int comparand)
-    {
-#ifdef _WIN32
-        return (int)(InterlockedCompareExchange(
-           (volatile long*)dest, 
-           (long)exchange, 
-           (long)comparand));
-#elif defined(__APPLE__) || defined(__MACOSX)
-		return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest);
-#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX)
-        return (__sync_val_compare_and_swap(
-            dest, 
-            comparand, 
-            exchange));
-#endif // !_WIN32
-    }
-
-    inline void fence() { _mm_mfence(); }
-}; // namespace detail
-
     
-/*! \brief class used to interface between C++ and
- *  OpenCL C calls that require arrays of size_t values, whose
- *  size is known statically.
+/*!
+ * \brief size_t class used to interface between C++ and
+ * OpenCL C calls that require arrays of size_t values, who's
+ * size is known statically.
  */
 template <int N>
-class size_t
-{ 
-private:
-    ::size_t data_[N];
-
-public:
-    //! \brief Initialize size_t to all 0s
-    size_t()
-    {
-        for( int i = 0; i < N; ++i ) {
-            data_[i] = 0;
-        }
-    }
-
-    ::size_t& operator[](int index)
-    {
-        return data_[index];
-    }
-
-    const ::size_t& operator[](int index) const
-    {
-        return data_[index];
-    }
-
-    //! \brief Conversion operator to T*.
-    operator ::size_t* ()             { return data_; }
-
-    //! \brief Conversion operator to const T*.
-    operator const ::size_t* () const { return data_; }
-};
+struct size_t : public cl::vector< ::size_t, N> { };
 
 namespace detail {
 
@@ -1111,28 +718,6 @@ struct GetInfoHelper<Func, VECTOR_CLASS<T> >
     }
 };
 
-template <typename Func>
-struct GetInfoHelper<Func, VECTOR_CLASS<cl::Device> >
-{
-    static cl_int get(Func f, cl_uint name, VECTOR_CLASS<cl::Device>* param)
-    {
-        ::size_t required;
-        cl_int err = f(name, 0, NULL, &required);
-        if (err != CL_SUCCESS) {
-            return err;
-        }
-
-        cl_device_id* value = (cl_device_id*) alloca(required);
-        err = f(name, required, value, NULL);
-        if (err != CL_SUCCESS) {
-            return err;
-        }
-
-        param->assign(&value[0], &value[required/sizeof(cl_device_id)]);
-        return CL_SUCCESS;
-    }
-};
-
 // Specialized for getInfo<CL_PROGRAM_BINARIES>
 template <typename Func>
 struct GetInfoHelper<Func, VECTOR_CLASS<char *> >
@@ -1141,11 +726,10 @@ struct GetInfoHelper<Func, VECTOR_CLASS<char *> >
     get(Func f, cl_uint name, VECTOR_CLASS<char *>* param)
     {
       cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
-
       if (err != CL_SUCCESS) {
         return err;
       }
-
+      
       return CL_SUCCESS;
     }
 };
@@ -1173,32 +757,6 @@ struct GetInfoHelper<Func, STRING_CLASS>
     }
 };
 
-// Specialized GetInfoHelper for cl::size_t params
-template <typename Func, ::size_t N>
-struct GetInfoHelper<Func, size_t<N> >
-{
-    static cl_int get(Func f, cl_uint name, size_t<N>* param)
-    {
-        ::size_t required;
-        cl_int err = f(name, 0, NULL, &required);
-        if (err != CL_SUCCESS) {
-            return err;
-        }
-
-        ::size_t* value = (::size_t*) alloca(required);
-        err = f(name, required, value, NULL);
-        if (err != CL_SUCCESS) {
-            return err;
-        }
-
-        for(int i = 0; i < N; ++i) {
-            (*param)[i] = value[i];
-        }
- 
-        return CL_SUCCESS;
-    }
-};
-
 #define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \
 namespace detail { \
 template <typename Func> \
@@ -1237,7 +795,7 @@ struct GetInfoHelper<Func, CPP_TYPE> \
     F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
     F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
     F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
-    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \
     F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
     F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
     F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
@@ -1246,7 +804,7 @@ struct GetInfoHelper<Func, CPP_TYPE> \
     F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
     F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
     F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
-    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \
+    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \
     F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
     F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
     F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
@@ -1314,7 +872,7 @@ struct GetInfoHelper<Func, CPP_TYPE> \
     F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
     F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
     F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
-    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<Device>) \
+    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \
     F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
     F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
     F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
@@ -1352,7 +910,6 @@ struct GetInfoHelper<Func, CPP_TYPE> \
     F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
     F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
     F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
-    F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \
     \
     F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
     F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
@@ -1363,43 +920,19 @@ struct GetInfoHelper<Func, CPP_TYPE> \
     F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
 #endif // CL_VERSION_1_1
 
-    
-#if defined(CL_VERSION_1_2)
-#define __PARAM_NAME_INFO_1_2(F) \
-    F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \
-    F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \
-    \
-    F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \
-    \
-    F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \
-    \
-    F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \
-    F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \
-    F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \
-    F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \
-    \
-    F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \
-    F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS<cl_device_partition_property>) \
-    F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS<cl_device_partition_property>)  \
-    F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \
-    F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \
-    F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \
-    F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS)
-#endif // #if defined(CL_VERSION_1_2)
-
 #if defined(USE_CL_DEVICE_FISSION)
 #define __PARAM_NAME_DEVICE_FISSION(F) \
     F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
-    F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
-    F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
-    F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
-    F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
+	F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+	F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+	F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
+	F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
 #endif // USE_CL_DEVICE_FISSION
 
 template <typename enum_type, cl_int Name>
 struct param_traits {};
 
-#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \
+#define __DECLARE_PARAM_TRAITS(token, param_name, T) \
 struct token;                                        \
 template<>                                           \
 struct param_traits<detail:: token,param_name>       \
@@ -1408,78 +941,16 @@ struct param_traits<detail:: token,param_name>       \
     typedef T param_type;                            \
 };
 
-__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS)
+__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS);
 #if defined(CL_VERSION_1_1)
-__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS)
-#endif // CL_VERSION_1_1
-#if defined(CL_VERSION_1_2)
-__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS)
+__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS);
 #endif // CL_VERSION_1_1
 
 #if defined(USE_CL_DEVICE_FISSION)
-__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS);
+__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS);
 #endif // USE_CL_DEVICE_FISSION
 
-#ifdef CL_PLATFORM_ICD_SUFFIX_KHR
-__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS)
-#endif
-
-#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong)
-#endif
-
-#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>)
-#endif
-#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_SIMD_WIDTH_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint)
-#endif
-#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint)
-#endif
-
-#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint)
-#endif
-#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint)
-#endif
-#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint)
-#endif
-#ifdef CL_DEVICE_WARP_SIZE_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint)
-#endif
-#ifdef CL_DEVICE_GPU_OVERLAP_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool)
-#endif
-#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool)
-#endif
-#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV
-__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool)
-#endif
+#undef __DECLARE_PARAM_TRAITS
 
 // Convenience functions
 
@@ -1530,61 +1001,26 @@ template<typename T>
 struct ReferenceHandler
 { };
 
-#if defined(CL_VERSION_1_2)
-/**
- * OpenCL 1.2 devices do have retain/release.
- */
-template <>
-struct ReferenceHandler<cl_device_id>
-{
-    /**
-     * Retain the device.
-     * \param device A valid device created using createSubDevices
-     * \return 
-     *   CL_SUCCESS if the function executed successfully.
-     *   CL_INVALID_DEVICE if device was not a valid subdevice
-     *   CL_OUT_OF_RESOURCES
-     *   CL_OUT_OF_HOST_MEMORY
-     */
-    static cl_int retain(cl_device_id device)
-    { return ::clRetainDevice(device); }
-    /**
-     * Retain the device.
-     * \param device A valid device created using createSubDevices
-     * \return 
-     *   CL_SUCCESS if the function executed successfully.
-     *   CL_INVALID_DEVICE if device was not a valid subdevice
-     *   CL_OUT_OF_RESOURCES
-     *   CL_OUT_OF_HOST_MEMORY
-     */
-    static cl_int release(cl_device_id device)
-    { return ::clReleaseDevice(device); }
-};
-#else // #if defined(CL_VERSION_1_2)
-/**
- * OpenCL 1.1 devices do not have retain/release.
- */
 template <>
 struct ReferenceHandler<cl_device_id>
 {
     // cl_device_id does not have retain().
     static cl_int retain(cl_device_id)
-    { return CL_SUCCESS; }
+    { return CL_INVALID_DEVICE; }
     // cl_device_id does not have release().
     static cl_int release(cl_device_id)
-    { return CL_SUCCESS; }
+    { return CL_INVALID_DEVICE; }
 };
-#endif // #if defined(CL_VERSION_1_2)
 
 template <>
 struct ReferenceHandler<cl_platform_id>
 {
     // cl_platform_id does not have retain().
     static cl_int retain(cl_platform_id)
-    { return CL_SUCCESS; }
+    { return CL_INVALID_PLATFORM; }
     // cl_platform_id does not have release().
     static cl_int release(cl_platform_id)
-    { return CL_SUCCESS; }
+    { return CL_INVALID_PLATFORM; }
 };
 
 template <>
@@ -1662,8 +1098,6 @@ protected:
 public:
     Wrapper() : object_(NULL) { }
 
-    Wrapper(const cl_type &obj) : object_(obj) { }
-
     ~Wrapper()
     {
         if (object_ != NULL) { release(); }
@@ -1672,21 +1106,14 @@ public:
     Wrapper(const Wrapper<cl_type>& rhs)
     {
         object_ = rhs.object_;
-        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+        if (object_ != NULL) { retain(); }
     }
 
     Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
     {
-        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+        if (object_ != NULL) { release(); }
         object_ = rhs.object_;
-        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
-        return *this;
-    }
-
-    Wrapper<cl_type>& operator = (const cl_type &rhs)
-    {
-        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
-        object_ = rhs;
+        if (object_ != NULL) { retain(); }
         return *this;
     }
 
@@ -1707,138 +1134,39 @@ protected:
     }
 };
 
-template <>
-class Wrapper<cl_device_id>
+#if defined(__CL_ENABLE_EXCEPTIONS)
+static inline cl_int errHandler (
+    cl_int err,
+    const char * errStr = NULL) throw(Error)
 {
-public:
-    typedef cl_device_id cl_type;
-
-protected:
-    cl_type object_;
-    bool referenceCountable_;
-
-    static int getVersion(cl_device_id device)
-    {
-        ::size_t size = 0;
-        clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, 0, &size);
-        STRING_CLASS versionInfo;
-        versionInfo.resize(size + 1);
-        clGetDeviceInfo(device, CL_DEVICE_VERSION, size, &versionInfo[0],
-&size);
-        int highVersion = 0;
-        int lowVersion = 0;
-        int index = 7;
-        while(versionInfo[index] != '.' ) {
-            highVersion *= 10;
-            highVersion += versionInfo[index]-'0';
-            ++index;
-        }
-        ++index;
-        while(versionInfo[index] != ' ' ) {
-            lowVersion *= 10;
-            lowVersion += versionInfo[index]-'0';
-            ++index;
-        }
-        return (highVersion << 16) | lowVersion;
-    }
-
-    static bool isReferenceCountable(cl_device_id device)
-    {
-        bool retVal = false;
-        int version = getVersion(device);
-        if(version > ((1 << 16) + 1)) {
-            retVal = true;
-        }
-        return retVal;
-    }
-
-public:
-    Wrapper() : object_(NULL), referenceCountable_(false) 
-    { 
-    }
-    
-    Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) 
-    {
-        referenceCountable_ = isReferenceCountable(obj); 
-    }
-
-    ~Wrapper()
-    {
-        if (object_ != NULL) { release(); }
-    }
-    
-    Wrapper(const Wrapper<cl_type>& rhs)
-    {
-        object_ = rhs.object_;
-        referenceCountable_ = isReferenceCountable(object_); 
-        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
-    }
-
-    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
-    {
-        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
-        object_ = rhs.object_;
-        referenceCountable_ = rhs.referenceCountable_;
-        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
-        return *this;
-    }
-
-    Wrapper<cl_type>& operator = (const cl_type &rhs)
-    {
-        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
-        object_ = rhs;
-        referenceCountable_ = isReferenceCountable(object_); 
-        return *this;
-    }
-
-    cl_type operator ()() const { return object_; }
-
-    cl_type& operator ()() { return object_; }
-
-protected:
-
-    cl_int retain() const
-    {
-        if( referenceCountable_ ) {
-            return ReferenceHandler<cl_type>::retain(object_);
-        }
-        else {
-            return CL_SUCCESS;
-        }
-    }
-
-    cl_int release() const
-    {
-        if( referenceCountable_ ) {
-            return ReferenceHandler<cl_type>::release(object_);
-        }
-        else {
-            return CL_SUCCESS;
-        }
+    if (err != CL_SUCCESS) {
+        throw Error(err, errStr);
     }
-};
+    return err;
+}
+#else
+static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
+{
+    return err;
+}
+#endif // __CL_ENABLE_EXCEPTIONS
 
 } // namespace detail
 //! \endcond
 
 /*! \stuct ImageFormat
- *  \brief Adds constructors and member functions for cl_image_format.
- *
- *  \see cl_image_format
+ * \brief ImageFormat interface fro cl_image_format.
  */
 struct ImageFormat : public cl_image_format
 {
-    //! \brief Default constructor - performs no initialization.
     ImageFormat(){}
 
-    //! \brief Initializing constructor.
     ImageFormat(cl_channel_order order, cl_channel_type type)
     {
         image_channel_order = order;
         image_channel_data_type = type;
     }
 
-    //! \brief Assignment operator.
     ImageFormat& operator = (const ImageFormat& rhs)
     {
         if (this != &rhs) {
@@ -1849,41 +1177,18 @@ struct ImageFormat : public cl_image_format
     }
 };
 
-/*! \brief Class interface for cl_device_id.
- *
- *  \note Copies of these objects are inexpensive, since they don't 'own'
- *        any underlying resources or data structures.
- *
- *  \see cl_device_id
+/*! \class Device
+ * \brief Device interface for cl_device_id.
  */
 class Device : public detail::Wrapper<cl_device_id>
 {
 public:
-    //! \brief Default constructor - initializes to NULL.
+    Device(cl_device_id device) { object_ = device; }
+
     Device() : detail::Wrapper<cl_type>() { }
 
-    /*! \brief Copy constructor.
-     * 
-     *  This simply copies the device ID value, which is an inexpensive operation.
-     */
     Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
 
-    /*! \brief Constructor from cl_device_id.
-     * 
-     *  This simply copies the device ID value, which is an inexpensive operation.
-     */
-    Device(const cl_device_id &device) : detail::Wrapper<cl_type>(device) { }
-
-    /*! \brief Returns the first device on the default context.
-     *
-     *  \see Context::getDefault()
-     */
-    static Device getDefault(cl_int * err = NULL);
-
-    /*! \brief Assignment operator from Device.
-     * 
-     *  This simply copies the device ID value, which is an inexpensive operation.
-     */
     Device& operator = (const Device& rhs)
     {
         if (this != &rhs) {
@@ -1892,17 +1197,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_device_id.
-     * 
-     *  This simply copies the device ID value, which is an inexpensive operation.
-     */
-    Device& operator = (const cl_device_id& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetDeviceInfo().
     template <typename T>
     cl_int getInfo(cl_device_info name, T* param) const
     {
@@ -1911,7 +1205,6 @@ public:
             __GET_DEVICE_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetDeviceInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_device_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -1925,53 +1218,23 @@ public:
         return param;
     }
 
-    /**
-     * CL 1.2 version
-     */
-#if defined(CL_VERSION_1_2)
-    //! \brief Wrapper for clCreateSubDevicesEXT().
-    cl_int createSubDevices(
-        const cl_device_partition_property * properties,
-        VECTOR_CLASS<Device>* devices)
-    {
-        cl_uint n = 0;
-        cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n);
-        if (err != CL_SUCCESS) {
-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
-        }
-
-        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
-        err = clCreateSubDevices(object_, properties, n, ids, NULL);
-        if (err != CL_SUCCESS) {
-            return detail::errHandler(err, __CREATE_SUB_DEVICES);
-        }
-
-        devices->assign(&ids[0], &ids[n]);
-        return CL_SUCCESS;
-    }
-#endif // #if defined(CL_VERSION_1_2)
-
-/**
- * CL 1.1 version that uses device fission.
- */
-#if defined(CL_VERSION_1_1)
 #if defined(USE_CL_DEVICE_FISSION)
-    cl_int createSubDevices(
-        const cl_device_partition_property_ext * properties,
-        VECTOR_CLASS<Device>* devices)
-    {
-        typedef CL_API_ENTRY cl_int 
-            ( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
-                cl_device_id /*in_device*/,
+	cl_int createSubDevices(
+		const cl_device_partition_property_ext * properties,
+		VECTOR_CLASS<Device>* devices)
+	{
+		typedef CL_API_ENTRY cl_int 
+			( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
+				cl_device_id /*in_device*/,
                 const cl_device_partition_property_ext * /* properties */,
                 cl_uint /*num_entries*/,
                 cl_device_id * /*out_devices*/,
                 cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
 
-        static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
-        __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
+		static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
+		__INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
 
-        cl_uint n = 0;
+		cl_uint n = 0;
         cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
         if (err != CL_SUCCESS) {
             return detail::errHandler(err, __CREATE_SUB_DEVICES);
@@ -1985,40 +1248,24 @@ public:
 
         devices->assign(&ids[0], &ids[n]);
         return CL_SUCCESS;
-    }
-#endif // #if defined(USE_CL_DEVICE_FISSION)
-#endif // #if defined(CL_VERSION_1_1)
+ 	}
+#endif
 };
 
-/*! \brief Class interface for cl_platform_id.
- *
- *  \note Copies of these objects are inexpensive, since they don't 'own'
- *        any underlying resources or data structures.
- *
- *  \see cl_platform_id
+/*! \class Platform
+ *  \brief Platform interface.
  */
 class Platform : public detail::Wrapper<cl_platform_id>
 {
 public:
-    //! \brief Default constructor - initializes to NULL.
+    static const Platform null();
+
+    Platform(cl_platform_id platform) { object_ = platform; }
+
     Platform() : detail::Wrapper<cl_type>()  { }
 
-    /*! \brief Copy constructor.
-     * 
-     *  This simply copies the platform ID value, which is an inexpensive operation.
-     */
     Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
 
-    /*! \brief Constructor from cl_platform_id.
-     * 
-     *  This simply copies the platform ID value, which is an inexpensive operation.
-     */
-    Platform(const cl_platform_id &platform) : detail::Wrapper<cl_type>(platform) { }
-
-    /*! \brief Assignment operator from Platform.
-     * 
-     *  This simply copies the platform ID value, which is an inexpensive operation.
-     */
     Platform& operator = (const Platform& rhs)
     {
         if (this != &rhs) {
@@ -2027,17 +1274,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_platform_id.
-     * 
-     *  This simply copies the platform ID value, which is an inexpensive operation.
-     */
-    Platform& operator = (const cl_platform_id& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetPlatformInfo().
     cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
     {
         return detail::errHandler(
@@ -2045,7 +1281,6 @@ public:
             __GET_PLATFORM_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetPlatformInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_platform_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -2059,18 +1294,11 @@ public:
         return param;
     }
 
-    /*! \brief Gets a list of devices for this platform.
-     * 
-     *  Wraps clGetDeviceIDs().
-     */
     cl_int getDevices(
         cl_device_type type,
         VECTOR_CLASS<Device>* devices) const
     {
         cl_uint n = 0;
-        if( devices == NULL ) {
-            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
-        }
         cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
         if (err != CL_SUCCESS) {
             return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
@@ -2125,12 +1353,8 @@ public:
             cl_device_id * devices,
             cl_uint* num_devices);
 
-        if( devices == NULL ) {
-            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
-        }
-
         static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
-        __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR);
+        __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR);
 
         cl_uint n = 0;
         cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
@@ -2163,19 +1387,10 @@ public:
     }
 #endif
 
-    /*! \brief Gets a list of available platforms.
-     * 
-     *  Wraps clGetPlatformIDs().
-     */
     static cl_int get(
         VECTOR_CLASS<Platform>* platforms)
     {
         cl_uint n = 0;
-
-        if( platforms == NULL ) {
-            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
-        }
-
         cl_int err = ::clGetPlatformIDs(0, NULL, &n);
         if (err != CL_SUCCESS) {
             return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
@@ -2191,128 +1406,17 @@ public:
         platforms->assign(&ids[0], &ids[n]);
         return CL_SUCCESS;
     }
+};
 
-    /*! \brief Gets the first available platform.
-     * 
-     *  Wraps clGetPlatformIDs(), returning the first result.
-     */
-    static cl_int get(
-        Platform * platform)
-    {
-        cl_uint n = 0;
-
-        if( platform == NULL ) {
-            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
-        }
-
-        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
-        if (err != CL_SUCCESS) {
-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
-        }
-
-        cl_platform_id* ids = (cl_platform_id*) alloca(
-            n * sizeof(cl_platform_id));
-        err = ::clGetPlatformIDs(n, ids, NULL);
-        if (err != CL_SUCCESS) {
-            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
-        }
-
-        *platform = ids[0];
-        return CL_SUCCESS;
-    }
-
-    /*! \brief Gets the first available platform, returning it by value.
-     * 
-     *  Wraps clGetPlatformIDs(), returning the first result.
-     */
-    static Platform get(
-        cl_int * errResult = NULL)
-    {
-        Platform platform;
-        cl_uint n = 0;
-        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
-        if (err != CL_SUCCESS) {
-            detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
-            if (errResult != NULL) {
-                *errResult = err;
-            }
-        }
-
-        cl_platform_id* ids = (cl_platform_id*) alloca(
-            n * sizeof(cl_platform_id));
-        err = ::clGetPlatformIDs(n, ids, NULL);
-
-        if (err != CL_SUCCESS) {
-            detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
-        }
-
-        if (errResult != NULL) {
-            *errResult = err;
-        }
-        
-        return ids[0];
-    }
-
-    static Platform getDefault( 
-        cl_int *errResult = NULL )
-    {
-        return get(errResult);
-    }
-
-    
-#if defined(CL_VERSION_1_2)
-    //! \brief Wrapper for clUnloadCompiler().
-    cl_int
-    unloadCompiler()
-    {
-        return ::clUnloadPlatformCompiler(object_);
-    }
-#endif // #if defined(CL_VERSION_1_2)
-}; // class Platform
-
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
-/**
- * Unload the OpenCL compiler.
- * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead.
- */
-inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int
-UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-inline cl_int
+static inline cl_int
 UnloadCompiler()
 {
     return ::clUnloadCompiler();
 }
-#endif // #if defined(CL_VERSION_1_1)
 
-/*! \brief Class interface for cl_context.
- *
- *  \note Copies of these objects are shallow, meaning that the copy will refer
- *        to the same underlying cl_context as the original.  For details, see
- *        clRetainContext() and clReleaseContext().
- *
- *  \see cl_context
- */
-class Context 
-    : public detail::Wrapper<cl_context>
+class Context : public detail::Wrapper<cl_context>
 {
-private:
-    static volatile int default_initialized_;
-    static Context default_;
-    static volatile cl_int default_error_;
 public:
-    /*! \brief Destructor.
-     *
-     *  This calls clReleaseContext() on the value held by this instance.
-     */
-    ~Context() { }
-
-    /*! \brief Constructs a context including a list of specified devices.
-     *
-     *  Wraps clCreateContext().
-     */
     Context(
         const VECTOR_CLASS<Device>& devices,
         cl_context_properties* properties = NULL,
@@ -2325,26 +1429,19 @@ public:
         cl_int* err = NULL)
     {
         cl_int error;
-
-        ::size_t numDevices = devices.size();
-        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
-        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
-            deviceIDs[deviceIndex] = (devices[deviceIndex])();
-        }
-
         object_ = ::clCreateContext(
-            properties, (cl_uint) numDevices,
-            deviceIDs,
+            properties, (cl_uint) devices.size(),
+            (cl_device_id*) &devices.front(),
             notifyFptr, data, &error);
 
-        detail::errHandler(error, __CREATE_CONTEXT_ERR);
+        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
         if (err != NULL) {
             *err = error;
         }
     }
 
     Context(
-        const Device& device,
+        cl_device_type type,
         cl_context_properties* properties = NULL,
         void (CL_CALLBACK * notifyFptr)(
             const char *,
@@ -2355,134 +1452,19 @@ public:
         cl_int* err = NULL)
     {
         cl_int error;
+        object_ = ::clCreateContextFromType(
+            properties, type, notifyFptr, data, &error);
 
-        cl_device_id deviceID = device();
-
-        object_ = ::clCreateContext(
-            properties, 1,
-            &deviceID,
-            notifyFptr, data, &error);
-
-        detail::errHandler(error, __CREATE_CONTEXT_ERR);
+        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
         if (err != NULL) {
             *err = error;
         }
     }
 
-    /*! \brief Constructs a context including all devices of a specified type.
-     *
-     *  Wraps clCreateContextFromType().
-     */
-    Context(
-        cl_device_type type,
-        cl_context_properties* properties = NULL,
-        void (CL_CALLBACK * notifyFptr)(
-            const char *,
-            const void *,
-            ::size_t,
-            void *) = NULL,
-        void* data = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-#if !defined(__APPLE__) || !defined(__MACOS)
-        cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 };	
-        if (properties == NULL) {
-            prop[1] = (cl_context_properties)Platform::get(&error)();
-            if (error != CL_SUCCESS) {
-                detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
-                if (err != NULL) {
-                    *err = error;
-                    return;
-                }
-            }
-
-            properties = &prop[0];
-        }
-#endif
-        object_ = ::clCreateContextFromType(
-            properties, type, notifyFptr, data, &error);
-
-        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT.
-     *
-     *  \note All calls to this function return the same cl_context as the first.
-     */
-    static Context getDefault(cl_int * err = NULL) 
-    {
-        int state = detail::compare_exchange(
-            &default_initialized_, 
-            __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
-        
-        if (state & __DEFAULT_INITIALIZED) {
-            if (err != NULL) {
-                *err = default_error_;
-            }
-            return default_;
-        }
-
-        if (state & __DEFAULT_BEING_INITIALIZED) {
-              // Assume writes will propagate eventually...
-              while(default_initialized_ != __DEFAULT_INITIALIZED) {
-                  detail::fence();
-              }
-
-            if (err != NULL) {
-                *err = default_error_;
-            }
-            return default_;
-        }
-
-        cl_int error;
-        default_ = Context(
-            CL_DEVICE_TYPE_DEFAULT,
-            NULL,
-            NULL,
-            NULL,
-            &error);
-
-        detail::fence();
-
-        default_error_ = error;
-        // Assume writes will propagate eventually...
-        default_initialized_ = __DEFAULT_INITIALIZED;
-
-        detail::fence();
-
-        if (err != NULL) {
-            *err = default_error_;
-        }
-        return default_;
-
-    }
-
-    //! \brief Default constructor - initializes to NULL.
     Context() : detail::Wrapper<cl_type>() { }
 
-    /*! \brief Copy constructor.
-     * 
-     *  This calls clRetainContext() on the parameter's cl_context.
-     */
     Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
 
-    /*! \brief Constructor from cl_context - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the cl_context
-     *  into the new Context object.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper<cl_type>(context) { }
-
-    /*! \brief Assignment operator from Context.
-     * 
-     *  This calls clRetainContext() on the parameter and clReleaseContext() on
-     *  the previous value held by this instance.
-     */
     Context& operator = (const Context& rhs)
     {
         if (this != &rhs) {
@@ -2491,18 +1473,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_context - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the rhs and calls
-     *  clReleaseContext() on the value previously held by this instance.
-     */
-    Context& operator = (const cl_context& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetContextInfo().
     template <typename T>
     cl_int getInfo(cl_context_info name, T* param) const
     {
@@ -2511,7 +1481,6 @@ public:
             __GET_CONTEXT_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetContextInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_context_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -2525,10 +1494,6 @@ public:
         return param;
     }
 
-    /*! \brief Gets a list of supported image formats.
-     *  
-     *  Wraps clGetSupportedImageFormats().
-     */
     cl_int getSupportedImageFormats(
         cl_mem_flags flags,
         cl_mem_object_type type,
@@ -2564,80 +1529,18 @@ public:
     }
 };
 
-inline Device Device::getDefault(cl_int * err)
-{
-    cl_int error;
-    Device device;
-
-    Context context = Context::getDefault(&error);
-    detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
-
-    if (error != CL_SUCCESS) {
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-    else {
-        device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
-        if (err != NULL) {
-            *err = CL_SUCCESS;
-        }
-    }
-
-    return device;
-}
-
-
-#ifdef _WIN32
-__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
-__declspec(selectany) Context Context::default_;
-__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS;
-#else
-__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
-__attribute__((weak)) Context Context::default_;
-__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS;
-#endif
-
 __GET_INFO_HELPER_WITH_RETAIN(cl::Context)
 
-/*! \brief Class interface for cl_event.
- *
- *  \note Copies of these objects are shallow, meaning that the copy will refer
- *        to the same underlying cl_event as the original.  For details, see
- *        clRetainEvent() and clReleaseEvent().
- *
- *  \see cl_event
+/*! \class Event
+ * \brief Event interface for cl_event.
  */
 class Event : public detail::Wrapper<cl_event>
 {
 public:
-    /*! \brief Destructor.
-     *
-     *  This calls clReleaseEvent() on the value held by this instance.
-     */
-    ~Event() { }
- 
-    //! \brief Default constructor - initializes to NULL.
     Event() : detail::Wrapper<cl_type>() { }
 
-    /*! \brief Copy constructor.
-     * 
-     *  This calls clRetainEvent() on the parameter's cl_event.
-     */
     Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
 
-    /*! \brief Constructor from cl_event - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the cl_event
-     *  into the new Event object.
-     */
-    Event(const cl_event& event) : detail::Wrapper<cl_type>(event) { }
-
-    /*! \brief Assignment operator from cl_event - takes ownership.
-     *
-     *  This effectively transfers ownership of a refcount on the rhs and calls
-     *  clReleaseEvent() on the value previously held by this instance.
-     */
     Event& operator = (const Event& rhs)
     {
         if (this != &rhs) {
@@ -2646,18 +1549,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_event.
-     * 
-     *  This calls clRetainEvent() on the parameter and clReleaseEvent() on
-     *  the previous value held by this instance.
-     */
-    Event& operator = (const cl_event& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetEventInfo().
     template <typename T>
     cl_int getInfo(cl_event_info name, T* param) const
     {
@@ -2666,7 +1557,6 @@ public:
             __GET_EVENT_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetEventInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_event_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -2680,7 +1570,6 @@ public:
         return param;
     }
 
-    //! \brief Wrapper for clGetEventProfilingInfo().
     template <typename T>
     cl_int getProfilingInfo(cl_profiling_info name, T* param) const
     {
@@ -2689,7 +1578,6 @@ public:
             __GET_EVENT_PROFILE_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetEventProfilingInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_profiling_info, name>::param_type
     getProfilingInfo(cl_int* err = NULL) const
@@ -2703,10 +1591,6 @@ public:
         return param;
     }
 
-    /*! \brief Blocks the calling thread until this event completes.
-     * 
-     *  Wraps clWaitForEvents().
-     */
     cl_int wait() const
     {
         return detail::errHandler(
@@ -2715,10 +1599,6 @@ public:
     }
 
 #if defined(CL_VERSION_1_1)
-    /*! \brief Registers a user callback function for a specific command execution status.
-     *
-     *  Wraps clSetEventCallback().
-     */
     cl_int setCallback(
         cl_int type,
         void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),		
@@ -2734,10 +1614,6 @@ public:
     }
 #endif
 
-    /*! \brief Blocks the calling thread until every event specified is complete.
-     * 
-     *  Wraps clWaitForEvents().
-     */
     static cl_int
     waitForEvents(const VECTOR_CLASS<Event>& events)
     {
@@ -2751,17 +1627,12 @@ public:
 __GET_INFO_HELPER_WITH_RETAIN(cl::Event)
 
 #if defined(CL_VERSION_1_1)
-/*! \brief Class interface for user events (a subset of cl_event's).
- * 
- *  See Event for details about copy semantics, etc.
+/*! \class UserEvent
+ * \brief User event interface for cl_event.
  */
 class UserEvent : public Event
 {
 public:
-    /*! \brief Constructs a user event on a given context.
-     *
-     *  Wraps clCreateUserEvent().
-     */
     UserEvent(
         const Context& context,
         cl_int * err = NULL)
@@ -2777,13 +1648,10 @@ public:
         }
     }
 
-    //! \brief Default constructor - initializes to NULL.
     UserEvent() : Event() { }
 
-    //! \brief Copy constructor - performs shallow copy.
     UserEvent(const UserEvent& event) : Event(event) { }
 
-    //! \brief Assignment Operator - performs shallow copy.
     UserEvent& operator = (const UserEvent& rhs)
     {
         if (this != &rhs) {
@@ -2792,10 +1660,6 @@ public:
         return *this;
     }
 
-    /*! \brief Sets the execution status of a user event object.
-     *
-     *  Wraps clSetUserEventStatus().
-     */
     cl_int setStatus(cl_int status)
     {
         return detail::errHandler(
@@ -2805,10 +1669,6 @@ public:
 };
 #endif
 
-/*! \brief Blocks the calling thread until every event specified is complete.
- * 
- *  Wraps clWaitForEvents().
- */
 inline static cl_int
 WaitForEvents(const VECTOR_CLASS<Event>& events)
 {
@@ -2818,45 +1678,16 @@ WaitForEvents(const VECTOR_CLASS<Event>& events)
         __WAIT_FOR_EVENTS_ERR);
 }
 
-/*! \brief Class interface for cl_mem.
- *
- *  \note Copies of these objects are shallow, meaning that the copy will refer
- *        to the same underlying cl_mem as the original.  For details, see
- *        clRetainMemObject() and clReleaseMemObject().
- *
- *  \see cl_mem
+/*! \class Memory
+ * \brief Memory interface for cl_mem.
  */
 class Memory : public detail::Wrapper<cl_mem>
 {
 public:
- 
-    /*! \brief Destructor.
-     *
-     *  This calls clReleaseMemObject() on the value held by this instance.
-     */
-    ~Memory() {}
-
-    //! \brief Default constructor - initializes to NULL.
     Memory() : detail::Wrapper<cl_type>() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     * 
-     *  This calls clRetainMemObject() on the parameter's cl_mem.
-     */
     Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the cl_mem
-     *  into the new Memory object.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper<cl_type>(memory) { }
-
-    /*! \brief Assignment operator from Memory.
-     * 
-     *  This calls clRetainMemObject() on the parameter and clReleaseMemObject()
-     *  on the previous value held by this instance.
-     */
     Memory& operator = (const Memory& rhs)
     {
         if (this != &rhs) {
@@ -2865,18 +1696,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_mem - takes ownership.
-     *
-     *  This effectively transfers ownership of a refcount on the rhs and calls
-     *  clReleaseMemObject() on the value previously held by this instance.
-     */
-    Memory& operator = (const cl_mem& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetMemObjectInfo().
     template <typename T>
     cl_int getInfo(cl_mem_info name, T* param) const
     {
@@ -2885,7 +1704,6 @@ public:
             __GET_MEM_OBJECT_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetMemObjectInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_mem_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -2900,19 +1718,6 @@ public:
     }
 
 #if defined(CL_VERSION_1_1)
-    /*! \brief Registers a callback function to be called when the memory object
-     *         is no longer needed.
-     *
-     *  Wraps clSetMemObjectDestructorCallback().
-     *
-     *  Repeated calls to this function, for a given cl_mem value, will append
-     *  to the list of functions called (in reverse order) when memory object�s
-     *  resources are freed and the memory object is deleted.
-     *
-     *  \note
-     *  The registered callbacks are associated with the underlying cl_mem
-     *  value - not the Memory class instance.
-     */
     cl_int setDestructorCallback(
         void (CL_CALLBACK * pfn_notify)(cl_mem, void *),		
         void * user_data = NULL)
@@ -2930,30 +1735,12 @@ public:
 
 __GET_INFO_HELPER_WITH_RETAIN(cl::Memory)
 
-// Pre-declare copy functions
-class Buffer;
-template< typename IteratorType >
-cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer );
-template< typename IteratorType >
-cl_int copy( cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator );
-
-/*! \brief Class interface for Buffer Memory Objects.
- * 
- *  See Memory for details about copy semantics, etc.
- *
- *  \see Memory
+/*! \class Buffer
+ * \brief Memory buffer interface.
  */
 class Buffer : public Memory
 {
 public:
-
-    /*! \brief Constructs a Buffer in a specified context.
-     *
-     *  Wraps clCreateBuffer().
-     *
-     *  \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
-     *                  specified.  Note alignment & exclusivity requirements.
-     */
     Buffer(
         const Context& context,
         cl_mem_flags flags,
@@ -2970,102 +1757,10 @@ public:
         }
     }
 
-    /*! \brief Constructs a Buffer in the default context.
-     *
-     *  Wraps clCreateBuffer().
-     *
-     *  \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
-     *                  specified.  Note alignment & exclusivity requirements.
-     *
-     *  \see Context::getDefault()
-     */
-    Buffer(
-         cl_mem_flags flags,
-        ::size_t size,
-        void* host_ptr = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-        Context context = Context::getDefault(err);
-
-        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
-
-        detail::errHandler(error, __CREATE_BUFFER_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    /*!
-     * \brief Construct a Buffer from a host container via iterators.
-     * If useHostPtr is specified iterators must be random access.
-     */
-    template< typename IteratorType >
-    Buffer(
-        IteratorType startIterator,
-        IteratorType endIterator,
-        bool readOnly,
-        bool useHostPtr = false,
-        cl_int* err = NULL)
-    {
-        typedef typename std::iterator_traits<IteratorType>::value_type DataType;
-        cl_int error;
-
-        cl_mem_flags flags = 0;
-        if( readOnly ) {
-            flags |= CL_MEM_READ_ONLY;
-        }
-        else {
-            flags |= CL_MEM_READ_WRITE;
-        }
-        if( useHostPtr ) {
-            flags |= CL_MEM_USE_HOST_PTR;
-        }
-        
-        ::size_t size = sizeof(DataType)*(endIterator - startIterator);
-
-        Context context = Context::getDefault(err);
-
-        if( useHostPtr ) {
-            object_ = ::clCreateBuffer(context(), flags, size, static_cast<DataType*>(&*startIterator), &error);
-        } else {
-            object_ = ::clCreateBuffer(context(), flags, size, 0, &error);
-        }
-
-        detail::errHandler(error, __CREATE_BUFFER_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-
-        if( !useHostPtr ) {
-            error = cl::copy(startIterator, endIterator, *this);
-            detail::errHandler(error, __CREATE_BUFFER_ERR);
-            if (err != NULL) {
-                *err = error;
-            }
-        }
-    }
-
-    //! \brief Default constructor - initializes to NULL.
     Buffer() : Memory() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Buffer(const Buffer& buffer) : Memory(buffer) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { }
-
-    /*! \brief Assignment from Buffer - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Buffer& operator = (const Buffer& rhs)
     {
         if (this != &rhs) {
@@ -3074,21 +1769,7 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Buffer& operator = (const cl_mem& rhs)
-    {
-        Memory::operator=(rhs);
-        return *this;
-    }
-
 #if defined(CL_VERSION_1_1)
-    /*! \brief Creates a new buffer object from this.
-     *
-     *  Wraps clCreateSubBuffer().
-     */
     Buffer createSubBuffer(
         cl_mem_flags flags,
         cl_buffer_create_type buffer_create_type,
@@ -3110,19 +1791,11 @@ public:
         }
 
         return result;
-    }		
+	}		
 #endif
 };
 
 #if defined (USE_DX_INTEROP)
-/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's.
- *
- *  This is provided to facilitate interoperability with Direct3D.
- * 
- *  See Memory for details about copy semantics, etc.
- *
- *  \see Memory
- */
 class BufferD3D10 : public Buffer
 {
 public:
@@ -3130,11 +1803,6 @@ public:
     cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
     cl_int* errcode_ret);
 
-    /*! \brief Constructs a BufferD3D10, in a specified context, from a
-     *         given ID3D10Buffer.
-     *
-     *  Wraps clCreateFromD3D10BufferKHR().
-     */
     BufferD3D10(
         const Context& context,
         cl_mem_flags flags,
@@ -3142,20 +1810,7 @@ public:
         cl_int * err = NULL)
     {
         static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
-
-#if defined(CL_VERSION_1_2)
-        vector<cl_context_properties> props = context.getInfo<CL_CONTEXT_PROPERTIES>();
-        cl_platform platform = -1;
-        for( int i = 0; i < props.size(); ++i ) {
-            if( props[i] == CL_CONTEXT_PLATFORM ) {
-                platform = props[i+1];
-            }
-        }
-        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR);
-#endif
-#if defined(CL_VERSION_1_1)
         __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
-#endif
 
         cl_int error;
         object_ = pfn_clCreateFromD3D10BufferKHR(
@@ -3170,25 +1825,10 @@ public:
         }
     }
 
-    //! \brief Default constructor - initializes to NULL.
     BufferD3D10() : Buffer() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { }
-
-    /*! \brief Assignment from BufferD3D10 - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferD3D10& operator = (const BufferD3D10& rhs)
     {
         if (this != &rhs) {
@@ -3196,35 +1836,15 @@ public:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    BufferD3D10& operator = (const cl_mem& rhs)
-    {
-        Buffer::operator=(rhs);
-        return *this;
-    }
 };
 #endif
 
-/*! \brief Class interface for GL Buffer Memory Objects.
- *
- *  This is provided to facilitate interoperability with OpenGL.
- * 
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class BufferGL
+ * \brief Memory buffer interface for GL interop.
  */
 class BufferGL : public Buffer
 {
 public:
-    /*! \brief Constructs a BufferGL in a specified context, from a given
-     *         GL buffer.
-     *
-     *  Wraps clCreateFromGLBuffer().
-     */
     BufferGL(
         const Context& context,
         cl_mem_flags flags,
@@ -3244,25 +1864,10 @@ public:
         }
     }
 
-    //! \brief Default constructor - initializes to NULL.
     BufferGL() : Buffer() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { }
-
-    /*! \brief Assignment from BufferGL - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferGL& operator = (const BufferGL& rhs)
     {
         if (this != &rhs) {
@@ -3271,17 +1876,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    BufferGL& operator = (const cl_mem& rhs)
-    {
-        Buffer::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetGLObjectInfo().
     cl_int getObjectInfo(
         cl_gl_object_type *type,
         GLuint * gl_object_name)
@@ -3292,22 +1886,12 @@ public:
     }
 };
 
-/*! \brief Class interface for GL Render Buffer Memory Objects.
- *
- *  This is provided to facilitate interoperability with OpenGL.
- * 
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class BufferRenderGL
+ * \brief Memory buffer interface for GL interop with renderbuffer.
  */
 class BufferRenderGL : public Buffer
 {
 public:
-    /*! \brief Constructs a BufferRenderGL in a specified context, from a given
-     *         GL Renderbuffer.
-     *
-     *  Wraps clCreateFromGLRenderbuffer().
-     */
     BufferRenderGL(
         const Context& context,
         cl_mem_flags flags,
@@ -3321,31 +1905,16 @@ public:
             bufobj,
             &error);
 
-        detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR);
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
         if (err != NULL) {
             *err = error;
         }
     }
 
-    //! \brief Default constructor - initializes to NULL.
     BufferRenderGL() : Buffer() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { }
-
-    /*! \brief Assignment from BufferGL - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     BufferRenderGL& operator = (const BufferRenderGL& rhs)
     {
         if (this != &rhs) {
@@ -3354,17 +1923,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    BufferRenderGL& operator = (const cl_mem& rhs)
-    {
-        Buffer::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetGLObjectInfo().
     cl_int getObjectInfo(
         cl_gl_object_type *type,
         GLuint * gl_object_name)
@@ -3375,34 +1933,16 @@ public:
     }
 };
 
-/*! \brief C++ base class for Image Memory objects.
- *
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class Image
+ * \brief Base class  interface for all images.
  */
 class Image : public Memory
 {
 protected:
-    //! \brief Default constructor - initializes to NULL.
     Image() : Memory() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image(const Image& image) : Memory(image) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { }
-
-    /*! \brief Assignment from Image - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image& operator = (const Image& rhs)
     {
         if (this != &rhs) {
@@ -3410,19 +1950,7 @@ protected:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image& operator = (const cl_mem& rhs)
-    {
-        Memory::operator=(rhs);
-        return *this;
-    }
-
 public:
-    //! \brief Wrapper for clGetImageInfo().
     template <typename T>
     cl_int getImageInfo(cl_image_info name, T* param) const
     {
@@ -3430,8 +1958,7 @@ public:
             detail::getInfo(&::clGetImageInfo, object_, name, param),
             __GET_IMAGE_INFO_ERR);
     }
-    
-    //! \brief Wrapper for clGetImageInfo() that returns by value.
+
     template <cl_int name> typename
     detail::param_traits<detail::cl_image_info, name>::param_type
     getImageInfo(cl_int* err = NULL) const
@@ -3446,221 +1973,12 @@ public:
     }
 };
 
-#if defined(CL_VERSION_1_2)
-/*! \brief Class interface for 1D Image Memory objects.
- *
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class Image2D
+ * \brief Image interface for 2D images.
  */
-class Image1D : public Image
+class Image2D : public Image
 {
 public:
-    /*! \brief Constructs a 1D Image in a specified context.
-     *
-     *  Wraps clCreateImage().
-     */
-    Image1D(
-        const Context& context,
-        cl_mem_flags flags,
-        ImageFormat format,
-        ::size_t width,
-        void* host_ptr = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE1D;
-        desc.image_width = width;
-        desc.image_row_pitch = 0;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = 0;
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    //! \brief Default constructor - initializes to NULL.
-    Image1D() { }
-
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image1D(const Image1D& image1D) : Image(image1D) { }
-
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { }
-
-    /*! \brief Assignment from Image1D - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image1D& operator = (const Image1D& rhs)
-    {
-        if (this != &rhs) {
-            Image::operator=(rhs);
-        }
-        return *this;
-    }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image1D& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
-};
-
-/*! \class Image1DBuffer
- * \brief Image interface for 1D buffer images.
- */
-class Image1DBuffer : public Image
-{
-public:
-    Image1DBuffer(
-        const Context& context,
-        cl_mem_flags flags,
-        ImageFormat format,
-        ::size_t width,
-        Buffer &buffer,
-        void* host_ptr = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
-        desc.image_width = width;
-        desc.image_row_pitch = 0;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = buffer();
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Image1DBuffer() { }
-
-    Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { }
-
-    __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { }
-
-    Image1DBuffer& operator = (const Image1DBuffer& rhs)
-    {
-        if (this != &rhs) {
-            Image::operator=(rhs);
-        }
-        return *this;
-    }
-
-    Image1DBuffer& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
-};
-
-/*! \class Image1DArray
- * \brief Image interface for arrays of 1D images.
- */
-class Image1DArray : public Image
-{
-public:
-    Image1DArray(
-        const Context& context,
-        cl_mem_flags flags,
-        ImageFormat format,
-        ::size_t arraySize,
-        ::size_t width,
-        ::size_t rowPitch,
-        void* host_ptr = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
-        desc.image_array_size = arraySize;
-        desc.image_width = width;
-        desc.image_row_pitch = rowPitch;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = 0;
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Image1DArray() { }
-
-    Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { }
-
-    __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { }
-
-    Image1DArray& operator = (const Image1DArray& rhs)
-    {
-        if (this != &rhs) {
-            Image::operator=(rhs);
-        }
-        return *this;
-    }
-
-    Image1DArray& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
-};
-#endif // #if defined(CL_VERSION_1_2)
-
-
-/*! \brief Class interface for 2D Image Memory objects.
- *
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
- */
-class Image2D : public Image
-{
-public:
-    /*! \brief Constructs a 1D Image in a specified context.
-     *
-     *  Wraps clCreateImage().
-     */
     Image2D(
         const Context& context,
         cl_mem_flags flags,
@@ -3672,28 +1990,6 @@ public:
         cl_int* err = NULL)
     {
         cl_int error;
-#if defined(CL_VERSION_1_2)
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-        desc.image_width = width;
-        desc.image_height = height;
-        desc.image_row_pitch = row_pitch;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = 0;
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-#else
         object_ = ::clCreateImage2D(
             context(), flags,&format, width, height, row_pitch, host_ptr, &error);
 
@@ -3701,28 +1997,12 @@ public:
         if (err != NULL) {
             *err = error;
         }
-#endif // #if defined(CL_VERSION_1_2)
     }
 
-    //! \brief Default constructor - initializes to NULL.
     Image2D() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image2D(const Image2D& image2D) : Image(image2D) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { }
-
-    /*! \brief Assignment from Image2D - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image2D& operator = (const Image2D& rhs)
     {
         if (this != &rhs) {
@@ -3730,37 +2010,14 @@ public:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image2D& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
 };
 
-
-#if !defined(CL_VERSION_1_2)
-/*! \brief Class interface for GL 2D Image Memory objects.
- *
- *  This is provided to facilitate interoperability with OpenGL.
- * 
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
- *  \note Deprecated for OpenCL 1.2. Please use ImageGL instead.
+/*! \class Image2DGL
+ * \brief 2D image interface for GL interop.
  */
-class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D
+class Image2DGL : public Image2D
 {
 public:
-    /*! \brief Constructs an Image2DGL in a specified context, from a given
-     *         GL Texture.
-     *
-     *  Wraps clCreateFromGLTexture2D().
-     */
     Image2DGL(
         const Context& context,
         cl_mem_flags flags,
@@ -3778,32 +2035,16 @@ public:
             texobj,
             &error);
 
-        detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR);
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
         if (err != NULL) {
             *err = error;
         }
-
     }
-    
-    //! \brief Default constructor - initializes to NULL.
+
     Image2DGL() : Image2D() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image2DGL(const Image2DGL& image) : Image2D(image) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { }
-
-    /*! \brief Assignment from Image2DGL - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image2DGL& operator = (const Image2DGL& rhs)
     {
         if (this != &rhs) {
@@ -3811,98 +2052,14 @@ public:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image2DGL& operator = (const cl_mem& rhs)
-    {
-        Image2D::operator=(rhs);
-        return *this;
-    }
-};
-#endif // #if !defined(CL_VERSION_1_2)
-
-#if defined(CL_VERSION_1_2)
-/*! \class Image2DArray
- * \brief Image interface for arrays of 2D images.
- */
-class Image2DArray : public Image
-{
-public:
-    Image2DArray(
-        const Context& context,
-        cl_mem_flags flags,
-        ImageFormat format,
-        ::size_t arraySize,
-        ::size_t width,
-        ::size_t height,
-        ::size_t rowPitch,
-        ::size_t slicePitch,
-        void* host_ptr = NULL,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-        desc.image_array_size = arraySize;
-        desc.image_width = width;
-        desc.image_height = height;
-        desc.image_row_pitch = rowPitch;
-        desc.image_slice_pitch = slicePitch;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = 0;
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Image2DArray() { }
-
-    Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { }
-
-    __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { }
-
-    Image2DArray& operator = (const Image2DArray& rhs)
-    {
-        if (this != &rhs) {
-            Image::operator=(rhs);
-        }
-        return *this;
-    }
-
-    Image2DArray& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
 };
-#endif // #if defined(CL_VERSION_1_2)
 
-/*! \brief Class interface for 3D Image Memory objects.
- *
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class Image3D
+ * \brief Image interface for 3D images.
  */
 class Image3D : public Image
 {
 public:
-    /*! \brief Constructs a 3D Image in a specified context.
-     *
-     *  Wraps clCreateImage().
-     */
     Image3D(
         const Context& context,
         cl_mem_flags flags,
@@ -3916,30 +2073,6 @@ public:
         cl_int* err = NULL)
     {
         cl_int error;
-#if defined(CL_VERSION_1_2)
-        cl_image_desc desc;
-        desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-        desc.image_width = width;
-        desc.image_height = height;
-        desc.image_depth = depth;
-        desc.image_row_pitch = row_pitch;
-        desc.image_slice_pitch = slice_pitch;
-        desc.num_mip_levels = 0;
-        desc.num_samples = 0;
-        desc.buffer = 0;
-        object_ = ::clCreateImage(
-            context(), 
-            flags, 
-            &format, 
-            &desc, 
-            host_ptr, 
-            &error);
-
-        detail::errHandler(error, __CREATE_IMAGE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-#else  // #if defined(CL_VERSION_1_2)
         object_ = ::clCreateImage3D(
             context(), flags, &format, width, height, depth, row_pitch,
             slice_pitch, host_ptr, &error);
@@ -3948,28 +2081,12 @@ public:
         if (err != NULL) {
             *err = error;
         }
-#endif // #if defined(CL_VERSION_1_2)
     }
 
-    //! \brief Default constructor - initializes to NULL.
     Image3D() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image3D(const Image3D& image3D) : Image(image3D) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { }
-
-    /*! \brief Assignment from Image3D - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image3D& operator = (const Image3D& rhs)
     {
         if (this != &rhs) {
@@ -3977,35 +2094,14 @@ public:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image3D& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
 };
 
-#if !defined(CL_VERSION_1_2)
-/*! \brief Class interface for GL 3D Image Memory objects.
- *
- *  This is provided to facilitate interoperability with OpenGL.
- * 
- *  See Memory for details about copy semantics, etc.
- * 
- *  \see Memory
+/*! \class Image2DGL
+ * \brief 2D image interface for GL interop.
  */
 class Image3DGL : public Image3D
 {
 public:
-    /*! \brief Constructs an Image3DGL in a specified context, from a given
-     *         GL Texture.
-     *
-     *  Wraps clCreateFromGLTexture3D().
-     */
     Image3DGL(
         const Context& context,
         cl_mem_flags flags,
@@ -4023,31 +2119,16 @@ public:
             texobj,
             &error);
 
-        detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR);
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
         if (err != NULL) {
             *err = error;
         }
     }
 
-    //! \brief Default constructor - initializes to NULL.
     Image3DGL() : Image3D() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image3DGL(const Image3DGL& image) : Image3D(image) { }
 
-    /*! \brief Constructor from cl_mem - takes ownership.
-     *
-     *  See Memory for further details.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { }
-
-    /*! \brief Assignment from Image3DGL - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
     Image3DGL& operator = (const Image3DGL& rhs)
     {
         if (this != &rhs) {
@@ -4055,99 +2136,17 @@ public:
         }
         return *this;
     }
-
-    /*! \brief Assignment from cl_mem - performs shallow copy.
-     *
-     *  See Memory for further details.
-     */
-    Image3DGL& operator = (const cl_mem& rhs)
-    {
-        Image3D::operator=(rhs);
-        return *this;
-    }
 };
-#endif // #if !defined(CL_VERSION_1_2)
-
-#if defined(CL_VERSION_1_2)
-/*! \class ImageGL
- * \brief general image interface for GL interop.
- * We abstract the 2D and 3D GL images into a single instance here
- * that wraps all GL sourced images on the grounds that setup information
- * was performed by OpenCL anyway.
+
+/*! \class Sampler
+ * \brief Sampler interface for cl_sampler.
  */
-class ImageGL : public Image
+class Sampler : public detail::Wrapper<cl_sampler>
 {
 public:
-    ImageGL(
-        const Context& context,
-        cl_mem_flags flags,
-        GLenum target,
-        GLint  miplevel,
-        GLuint texobj,
-        cl_int * err = NULL)
-    {
-        cl_int error;
-        object_ = ::clCreateFromGLTexture(
-            context(), 
-            flags, 
-            target,
-            miplevel,
-            texobj,
-            &error);
-
-        detail::errHandler(error, __CREATE_GL_TEXTURE_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    ImageGL() : Image() { }
-
-    ImageGL(const ImageGL& image) : Image(image) { }
-
-    __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { }
-
-    ImageGL& operator = (const ImageGL& rhs)
-    {
-        if (this != &rhs) {
-            Image::operator=(rhs);
-        }
-        return *this;
-    }
-
-    ImageGL& operator = (const cl_mem& rhs)
-    {
-        Image::operator=(rhs);
-        return *this;
-    }
-};
-#endif // #if defined(CL_VERSION_1_2)
-
-/*! \brief Class interface for cl_sampler.
- *
- *  \note Copies of these objects are shallow, meaning that the copy will refer
- *        to the same underlying cl_sampler as the original.  For details, see
- *        clRetainSampler() and clReleaseSampler().
- *
- *  \see cl_sampler 
- */
-class Sampler : public detail::Wrapper<cl_sampler>
-{
-public:
-    /*! \brief Destructor.
-     *
-     *  This calls clReleaseSampler() on the value held by this instance.
-     */
-    ~Sampler() { }
-
-    //! \brief Default constructor - initializes to NULL.
-    Sampler() { }
-
-    /*! \brief Constructs a Sampler in a specified context.
-     *
-     *  Wraps clCreateSampler().
-     */
-    Sampler(
+    Sampler() { }
+
+    Sampler(
         const Context& context,
         cl_bool normalized_coords,
         cl_addressing_mode addressing_mode,
@@ -4168,24 +2167,8 @@ public:
         }
     }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     * 
-     *  This calls clRetainSampler() on the parameter's cl_sampler.
-     */
     Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
 
-    /*! \brief Constructor from cl_sampler - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the cl_sampler
-     *  into the new Sampler object.
-     */
-    Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
-
-    /*! \brief Assignment operator from Sampler.
-     * 
-     *  This calls clRetainSampler() on the parameter and clReleaseSampler()
-     *  on the previous value held by this instance.
-     */
     Sampler& operator = (const Sampler& rhs)
     {
         if (this != &rhs) {
@@ -4194,18 +2177,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_sampler - takes ownership.
-     *
-     *  This effectively transfers ownership of a refcount on the rhs and calls
-     *  clReleaseSampler() on the value previously held by this instance.
-     */
-    Sampler& operator = (const cl_sampler& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
-    //! \brief Wrapper for clGetSamplerInfo().
     template <typename T>
     cl_int getInfo(cl_sampler_info name, T* param) const
     {
@@ -4214,7 +2185,6 @@ public:
             __GET_SAMPLER_INFO_ERR);
     }
 
-    //! \brief Wrapper for clGetSamplerInfo() that returns by value.
     template <cl_int name> typename
     detail::param_traits<detail::cl_sampler_info, name>::param_type
     getInfo(cl_int* err = NULL) const
@@ -4235,7 +2205,9 @@ class Program;
 class CommandQueue;
 class Kernel;
 
-//! \brief Class interface for specifying NDRange values.
+/*! \class NDRange
+ * \brief NDRange interface
+ */
 class NDRange
 {
 private:
@@ -4243,51 +2215,41 @@ private:
     cl_uint dimensions_;
 
 public:
-    //! \brief Default constructor - resulting range has zero dimensions.
     NDRange()
         : dimensions_(0)
     { }
 
-    //! \brief Constructs one-dimensional range.
     NDRange(::size_t size0)
         : dimensions_(1)
     {
-        sizes_[0] = size0;
+        sizes_.push_back(size0);
     }
 
-    //! \brief Constructs two-dimensional range.
     NDRange(::size_t size0, ::size_t size1)
         : dimensions_(2)
     {
-        sizes_[0] = size0;
-        sizes_[1] = size1;
+        sizes_.push_back(size0);
+        sizes_.push_back(size1);
     }
 
-    //! \brief Constructs three-dimensional range.
     NDRange(::size_t size0, ::size_t size1, ::size_t size2)
         : dimensions_(3)
     {
-        sizes_[0] = size0;
-        sizes_[1] = size1;
-        sizes_[2] = size2;
+        sizes_.push_back(size0);
+        sizes_.push_back(size1);
+        sizes_.push_back(size2);
     }
 
-    /*! \brief Conversion operator to const ::size_t *.
-     *  
-     *  \returns a pointer to the size of the first dimension.
-     */
-    operator const ::size_t*() const { 
-        return (const ::size_t*) sizes_; 
-    }
-
-    //! \brief Queries the number of dimensions in the range.
+    operator const ::size_t*() const { return (const ::size_t*) sizes_; }
     ::size_t dimensions() const { return dimensions_; }
 };
 
-//! \brief A zero-dimensional range.
 static const NDRange NullRange;
 
-//! \brief Local address wrapper for use with Kernel::setArg
+/*!
+ * \struct LocalSpaceArg
+ * \brief Local address raper for use with Kernel::setArg
+ */
 struct LocalSpaceArg
 {
     ::size_t size_;
@@ -4312,12 +2274,6 @@ struct KernelArgumentHandler<LocalSpaceArg>
 } 
 //! \endcond
 
-/*! __local
- * \brief Helper function for generating LocalSpaceArg objects.
- * Deprecated. Replaced with Local.
- */
-inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg
-__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
 inline LocalSpaceArg
 __local(::size_t size)
 {
@@ -4325,58 +2281,20 @@ __local(::size_t size)
     return ret;
 }
 
-/*! Local
- * \brief Helper function for generating LocalSpaceArg objects.
- */
-inline LocalSpaceArg
-Local(::size_t size)
-{
-    LocalSpaceArg ret = { size };
-    return ret;
-}
-
-//class KernelFunctor;
+class KernelFunctor;
 
-/*! \brief Class interface for cl_kernel.
- *
- *  \note Copies of these objects are shallow, meaning that the copy will refer
- *        to the same underlying cl_kernel as the original.  For details, see
- *        clRetainKernel() and clReleaseKernel().
- *
- *  \see cl_kernel
+/*! \class Kernel
+ * \brief Kernel interface that implements cl_kernel
  */
 class Kernel : public detail::Wrapper<cl_kernel>
 {
 public:
     inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
 
-    /*! \brief Destructor.
-     *
-     *  This calls clReleaseKernel() on the value held by this instance.
-     */
-    ~Kernel() { }
-
-    //! \brief Default constructor - initializes to NULL.
     Kernel() { }
 
-    /*! \brief Copy constructor - performs shallow copy.
-     * 
-     *  This calls clRetainKernel() on the parameter's cl_kernel.
-     */
     Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
 
-    /*! \brief Constructor from cl_kernel - takes ownership.
-     * 
-     *  This effectively transfers ownership of a refcount on the cl_kernel
-     *  into the new Kernel object.
-     */
-    __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
-
-    /*! \brief Assignment operator from Kernel.
-     * 
-     *  This calls clRetainKernel() on the parameter and clReleaseKernel()
-     *  on the previous value held by this instance.
-     */
     Kernel& operator = (const Kernel& rhs)
     {
         if (this != &rhs) {
@@ -4385,17 +2303,6 @@ public:
         return *this;
     }
 
-    /*! \brief Assignment operator from cl_kernel - takes ownership.
-     *
-     *  This effectively transfers ownership of a refcount on the rhs and calls
-     *  clReleaseKernel() on the value previously held by this instance.
-     */
-    Kernel& operator = (const cl_kernel& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
     template <typename T>
     cl_int getInfo(cl_kernel_info name, T* param) const
     {
@@ -4417,29 +2324,6 @@ public:
         return param;
     }
 
-#if defined(CL_VERSION_1_2)
-    template <typename T>
-    cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const
-    {
-        return detail::errHandler(
-            detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param),
-            __GET_KERNEL_ARG_INFO_ERR);
-    }
-
-    template <cl_int name> typename
-    detail::param_traits<detail::cl_kernel_arg_info, name>::param_type
-    getArgInfo(cl_uint argIndex, cl_int* err = NULL) const
-    {
-        typename detail::param_traits<
-            detail::cl_kernel_arg_info, name>::param_type param;
-        cl_int result = getArgInfo(argIndex, name, &param);
-        if (err != NULL) {
-            *err = result;
-        }
-        return param;
-    }
-#endif // #if defined(CL_VERSION_1_2)
-
     template <typename T>
     cl_int getWorkGroupInfo(
         const Device& device, cl_kernel_work_group_info name, T* param) const
@@ -4481,6 +2365,17 @@ public:
             ::clSetKernelArg(object_, index, size, argPtr),
             __SET_KERNEL_ARGS_ERR);
     }
+
+    KernelFunctor bind(
+        const CommandQueue& queue,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local);
+
+    KernelFunctor bind(
+        const CommandQueue& queue,
+        const NDRange& global,
+        const NDRange& local);
 };
 
 __GET_INFO_HELPER_WITH_RETAIN(cl::Kernel)
@@ -4495,109 +2390,6 @@ public:
     typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
 
     Program(
-        const STRING_CLASS& source,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-        const char * strings = source.c_str();
-        const ::size_t length  = source.size();
-
-        Context context = Context::getDefault(err);
-
-        object_ = ::clCreateProgramWithSource(
-            context(), (cl_uint)1, &strings, &length, &error);
-
-        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
-
-        if (error == CL_SUCCESS) {
-
-            error = ::clBuildProgram(
-                object_,
-                0,
-                NULL,
-                "",
-                NULL,
-                NULL);
-
-            detail::errHandler(error, __BUILD_PROGRAM_ERR);
-        }
-
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Program(
-        const STRING_CLASS& source,
-		bool build,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-        const char * strings = source.c_str();
-        const ::size_t length  = source.size();
-
-        Context context = Context::getDefault(err);
-
-        object_ = ::clCreateProgramWithSource(
-            context(), (cl_uint)1, &strings, &length, &error);
-
-        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
-
-        if (error == CL_SUCCESS && build) {
-
-            error = ::clBuildProgram(
-                object_,
-                0,
-                NULL,
-                "",
-                NULL,
-                NULL);
-
-            detail::errHandler(error, __BUILD_PROGRAM_ERR);
-        }
-
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Program(
-        const Context& context,
-        const STRING_CLASS& source,
-        bool build = false,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-        const char * strings = source.c_str();
-        const ::size_t length  = source.size();
-
-        object_ = ::clCreateProgramWithSource(
-            context(), (cl_uint)1, &strings, &length, &error);
-
-        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
-
-        if (error == CL_SUCCESS && build) {
-
-            error = ::clBuildProgram(
-                object_,
-                0,
-                NULL,
-                "",
-                NULL,
-                NULL);
-
-            detail::errHandler(error, __BUILD_PROGRAM_ERR);
-        }
-
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-
-    Program(
         const Context& context,
         const Sources& sources,
         cl_int* err = NULL)
@@ -4639,15 +2431,9 @@ public:
             lengths[i] = binaries[(int)i].second;
         }
 
-        ::size_t numDevices = devices.size();
-        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
-        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
-            deviceIDs[deviceIndex] = (devices[deviceIndex])();
-        }
-
         object_ = ::clCreateProgramWithBinary(
             context(), (cl_uint) devices.size(),
-            deviceIDs,
+            (cl_device_id*)&devices.front(),
             lengths, images, binaryStatus != NULL
                ? (cl_int*) &binaryStatus->front()
                : NULL, &error);
@@ -4658,47 +2444,10 @@ public:
         }
     }
 
-    
-#if defined(CL_VERSION_1_2)
-    /**
-     * Create program using builtin kernels.
-     * \param kernelNames Semi-colon separated list of builtin kernel names
-     */
-    Program(
-        const Context& context,
-        const VECTOR_CLASS<Device>& devices,
-        const STRING_CLASS& kernelNames,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-
-        ::size_t numDevices = devices.size();
-        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
-        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
-            deviceIDs[deviceIndex] = (devices[deviceIndex])();
-        }
-        
-        object_ = ::clCreateProgramWithBuiltInKernels(
-            context(), 
-            (cl_uint) devices.size(),
-            deviceIDs,
-            kernelNames.c_str(), 
-            &error);
-
-        detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR);
-        if (err != NULL) {
-            *err = error;
-        }
-    }
-#endif // #if defined(CL_VERSION_1_2)
-
     Program() { }
 
     Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
 
-    __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper<cl_type>(program) { }
-
     Program& operator = (const Program& rhs)
     {
         if (this != &rhs) {
@@ -4707,73 +2456,24 @@ public:
         return *this;
     }
 
-    Program& operator = (const cl_program& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
     cl_int build(
         const VECTOR_CLASS<Device>& devices,
         const char* options = NULL,
         void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
         void* data = NULL) const
     {
-        ::size_t numDevices = devices.size();
-        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
-        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
-            deviceIDs[deviceIndex] = (devices[deviceIndex])();
-        }
-
         return detail::errHandler(
             ::clBuildProgram(
                 object_,
                 (cl_uint)
                 devices.size(),
-                deviceIDs,
-                options,
-                notifyFptr,
-                data),
-                __BUILD_PROGRAM_ERR);
-    }
-
-    cl_int build(
-        const char* options = NULL,
-        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
-        void* data = NULL) const
-    {
-        return detail::errHandler(
-            ::clBuildProgram(
-                object_,
-                0,
-                NULL,
+                (cl_device_id*)&devices.front(),
                 options,
                 notifyFptr,
                 data),
                 __BUILD_PROGRAM_ERR);
     }
 
-#if defined(CL_VERSION_1_2)
-	cl_int compile(
-        const char* options = NULL,
-        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
-        void* data = NULL) const
-    {
-        return detail::errHandler(
-            ::clCompileProgram(
-                object_,
-                0,
-                NULL,
-                options,
-				0,
-				NULL,
-				NULL,
-                notifyFptr,
-                data),
-                __COMPILE_PROGRAM_ERR);
-    }
-#endif
-
     template <typename T>
     cl_int getInfo(cl_program_info name, T* param) const
     {
@@ -4838,116 +2538,6 @@ public:
     }
 };
 
-#if defined(CL_VERSION_1_2)
-inline Program linkProgram(
-    Program input1,
-    Program input2,
-    const char* options = NULL,
-    void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
-    void* data = NULL,
-    cl_int* err = NULL) 
-{
-    cl_int err_local;
-
-    cl_program programs[2] = { input1(), input2() };
-
-    Context ctx = input1.getInfo<CL_PROGRAM_CONTEXT>();
-
-    cl_program prog = ::clLinkProgram(
-        ctx(),
-        0,
-        NULL,
-        options,
-        2,
-        programs,
-        notifyFptr,
-        data,
-        &err_local);
-
-    if (err_local != CL_SUCCESS) {
-        err_local = detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
-        if (err != NULL) {
-            *err = err_local;
-        }
-    }
-
-    return Program(prog);
-}
-
-inline Program linkProgram(
-    VECTOR_CLASS<Program> inputPrograms,
-    const char* options = NULL,
-    void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
-    void* data = NULL,
-    cl_int* err = NULL) 
-{
-    cl_int err_local;
-
-    if (inputPrograms.size() == 0) {
-        err_local = detail::errHandler(CL_INVALID_VALUE,__COMPILE_PROGRAM_ERR);
-        if (err != NULL) {
-            *err = err_local;
-        }
-        return Program();
-    }
-
-    cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program));
-
-    if (programs != NULL) {
-        for (unsigned int i = 0; i < inputPrograms.size(); i++) {
-          programs[i] = inputPrograms[i]();
-        }
-    } 
-    else {
-        err_local = detail::errHandler(CL_OUT_OF_HOST_MEMORY,__COMPILE_PROGRAM_ERR);
-        if (err != NULL) {
-            *err = err_local;
-        }
-        return Program();
-    }
-
-    cl_program prog = ::clLinkProgram(
-        Context::getDefault()(),
-        0,
-        NULL,
-        options,
-        (cl_uint)inputPrograms.size(),
-        programs,
-        notifyFptr,
-        data,
-        &err_local);
-
-    if (err_local != CL_SUCCESS) {
-        err_local = detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
-        if (err != NULL) {
-            *err = err_local;
-        }
-    }
-
-    return Program(prog);
-}
-#endif
-
-template<>
-inline VECTOR_CLASS<char *> cl::Program::getInfo<CL_PROGRAM_BINARIES>(cl_int* err) const
-{
-    VECTOR_CLASS< ::size_t> sizes = getInfo<CL_PROGRAM_BINARY_SIZES>();
-    VECTOR_CLASS<char *> binaries;
-    for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) 
-    {
-        char *ptr = NULL;
-        if (*s != 0) 
-            ptr = new char[*s];
-        binaries.push_back(ptr);
-    }
-    
-    cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries);
-    if (err != NULL) {
-        *err = result;
-    }
-    return binaries;
-}
-
 __GET_INFO_HELPER_WITH_RETAIN(cl::Program)
 
 inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
@@ -4968,38 +2558,7 @@ inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
  */
 class CommandQueue : public detail::Wrapper<cl_command_queue>
 {
-private:
-    static volatile int default_initialized_;
-    static CommandQueue default_;
-    static volatile cl_int default_error_;
 public:
-   CommandQueue(
-        cl_command_queue_properties properties,
-        cl_int* err = NULL)
-    {
-        cl_int error;
-
-        Context context = Context::getDefault(&error);
-        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
-
-        if (error != CL_SUCCESS) {
-            if (err != NULL) {
-                *err = error;
-            }
-        }
-        else {
-            Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
-
-            object_ = ::clCreateCommandQueue(
-                context(), device(), properties, &error);
-
-            detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
-            if (err != NULL) {
-                *err = error;
-            }
-        }
-    }
-
     CommandQueue(
         const Context& context,
         const Device& device,
@@ -5016,73 +2575,10 @@ public:
         }
     }
 
-    static CommandQueue getDefault(cl_int * err = NULL) 
-    {
-        int state = detail::compare_exchange(
-            &default_initialized_, 
-            __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
-        
-        if (state & __DEFAULT_INITIALIZED) {
-            if (err != NULL) {
-                *err = default_error_;
-            }
-            return default_;
-        }
-
-        if (state & __DEFAULT_BEING_INITIALIZED) {
-              // Assume writes will propagate eventually...
-              while(default_initialized_ != __DEFAULT_INITIALIZED) {
-                  detail::fence();
-              }
-
-            if (err != NULL) {
-                *err = default_error_;
-            }
-            return default_;
-        }
-
-        cl_int error;
-
-        Context context = Context::getDefault(&error);
-        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
-
-        if (error != CL_SUCCESS) {
-            if (err != NULL) {
-                *err = error;
-            }
-        }
-        else {
-            Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
-
-            default_ = CommandQueue(context, device, 0, &error);
-
-            detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
-            if (err != NULL) {
-                *err = error;
-            }
-        }
-
-        detail::fence();
-
-        default_error_ = error;
-        // Assume writes will propagate eventually...
-        default_initialized_ = __DEFAULT_INITIALIZED;
-
-        detail::fence();
-
-        if (err != NULL) {
-            *err = default_error_;
-        }
-        return default_;
-
-    }
-
     CommandQueue() { }
 
     CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
 
-    CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
-
     CommandQueue& operator = (const CommandQueue& rhs)
     {
         if (this != &rhs) {
@@ -5091,12 +2587,6 @@ public:
         return *this;
     }
 
-    CommandQueue& operator = (const cl_command_queue& rhs)
-    {
-        detail::Wrapper<cl_type>::operator=(rhs);
-        return *this;
-    }
-
     template <typename T>
     cl_int getInfo(cl_command_queue_info name, T* param) const
     {
@@ -5128,20 +2618,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueReadBuffer(
                 object_, buffer(), blocking, offset, size,
                 ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_READ_BUFFER_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueWriteBuffer(
@@ -5153,20 +2637,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueWriteBuffer(
                 object_, buffer(), blocking, offset, size,
                 ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
                 __ENQUEUE_WRITE_BUFFER_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueCopyBuffer(
@@ -5178,21 +2656,16 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueCopyBuffer(
                 object_, src(), dst(), src_offset, dst_offset, size,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQEUE_COPY_BUFFER_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
+#if defined(CL_VERSION_1_1)
     cl_int enqueueReadBufferRect(
         const Buffer& buffer,
         cl_bool blocking,
@@ -5207,8 +2680,7 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueReadBufferRect(
                 object_, 
                 buffer(), 
@@ -5223,15 +2695,11 @@ public:
                 ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
                 __ENQUEUE_READ_BUFFER_RECT_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
+
     cl_int enqueueWriteBufferRect(
         const Buffer& buffer,
         cl_bool blocking,
@@ -5246,8 +2714,7 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueWriteBufferRect(
                 object_, 
                 buffer(), 
@@ -5262,13 +2729,8 @@ public:
                 ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
                 __ENQUEUE_WRITE_BUFFER_RECT_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueCopyBufferRect(
@@ -5284,8 +2746,7 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueCopyBufferRect(
                 object_, 
                 src(), 
@@ -5299,51 +2760,10 @@ public:
                 dst_slice_pitch,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQEUE_COPY_BUFFER_RECT_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-#if defined(CL_VERSION_1_2)
-    /**
-     * Enqueue a command to fill a buffer object with a pattern
-     * of a given size. The pattern is specified a as vector.
-     * \tparam PatternType The datatype of the pattern field. 
-     *     The pattern type must be an accepted OpenCL data type.
-     */
-    template<typename PatternType>
-    cl_int enqueueFillBuffer(
-        const Buffer& buffer,
-        PatternType pattern,
-        ::size_t offset,
-        ::size_t size,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL) const
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueFillBuffer(
-                object_, 
-                buffer(),
-                static_cast<void*>(&pattern),
-                sizeof(PatternType), 
-                offset, 
-                size,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-                __ENQUEUE_FILL_BUFFER_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
-#endif // #if defined(CL_VERSION_1_2)
+#endif
 
     cl_int enqueueReadImage(
         const Image& image,
@@ -5356,20 +2776,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueReadImage(
                 object_, image(), blocking, (const ::size_t *) origin,
                 (const ::size_t *) region, row_pitch, slice_pitch, ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_READ_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueWriteImage(
@@ -5383,20 +2797,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueWriteImage(
                 object_, image(), blocking, (const ::size_t *) origin,
                 (const ::size_t *) region, row_pitch, slice_pitch, ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_WRITE_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueCopyImage(
@@ -5408,125 +2816,15 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueCopyImage(
                 object_, src(), dst(), (const ::size_t *) src_origin,
                 (const ::size_t *)dst_origin, (const ::size_t *) region,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_COPY_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-#if defined(CL_VERSION_1_2)
-    /**
-     * Enqueue a command to fill an image object with a specified color.
-     * \param fillColor is the color to use to fill the image.
-     *     This is a four component RGBA floating-point color value if
-     *     the image channel data type is not an unnormalized signed or
-     *     unsigned data type.
-     */
-    cl_int enqueueFillImage(
-        const Image& image,
-        cl_float4 fillColor,
-        const size_t<3>& origin,
-        const size_t<3>& region,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL) const
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueFillImage(
-                object_, 
-                image(),
-                static_cast<void*>(&fillColor), 
-                (const ::size_t *) origin, 
-                (const ::size_t *) region,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-                __ENQUEUE_FILL_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-    /**
-     * Enqueue a command to fill an image object with a specified color.
-     * \param fillColor is the color to use to fill the image.
-     *     This is a four component RGBA signed integer color value if
-     *     the image channel data type is an unnormalized signed integer
-     *     type.
-     */
-    cl_int enqueueFillImage(
-        const Image& image,
-        cl_int4 fillColor,
-        const size_t<3>& origin,
-        const size_t<3>& region,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL) const
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueFillImage(
-                object_, 
-                image(),
-                static_cast<void*>(&fillColor), 
-                (const ::size_t *) origin, 
-                (const ::size_t *) region,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-                __ENQUEUE_FILL_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-    /**
-     * Enqueue a command to fill an image object with a specified color.
-     * \param fillColor is the color to use to fill the image.
-     *     This is a four component RGBA unsigned integer color value if
-     *     the image channel data type is an unnormalized unsigned integer
-     *     type.
-     */
-    cl_int enqueueFillImage(
-        const Image& image,
-        cl_uint4 fillColor,
-        const size_t<3>& origin,
-        const size_t<3>& region,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL) const
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueFillImage(
-                object_, 
-                image(),
-                static_cast<void*>(&fillColor), 
-                (const ::size_t *) origin, 
-                (const ::size_t *) region,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-                __ENQUEUE_FILL_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
-#endif // #if defined(CL_VERSION_1_2)
 
     cl_int enqueueCopyImageToBuffer(
         const Image& src,
@@ -5537,20 +2835,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueCopyImageToBuffer(
                 object_, src(), dst(), (const ::size_t *) src_origin,
                 (const ::size_t *) region, dst_offset,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueCopyBufferToImage(
@@ -5562,20 +2854,14 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueCopyBufferToImage(
                 object_, src(), dst(), src_offset,
                 (const ::size_t *) dst_origin, (const ::size_t *) region,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     void* enqueueMapBuffer(
@@ -5638,129 +2924,24 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueUnmapMemObject(
                 object_, memory(), mapped_ptr,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-#if defined(CL_VERSION_1_2)
-    /**
-     * Enqueues a marker command which waits for either a list of events to complete, 
-     * or all previously enqueued commands to complete.
-     *
-     * Enqueues a marker command which waits for either a list of events to complete, 
-     * or if the list is empty it waits for all commands previously enqueued in command_queue 
-     * to complete before it completes. This command returns an event which can be waited on, 
-     * i.e. this event can be waited on to insure that all events either in the event_wait_list 
-     * or all previously enqueued commands, queued before this command to command_queue, 
-     * have completed.
-     */
-    cl_int enqueueMarkerWithWaitList(
-        const VECTOR_CLASS<Event> *events = 0,
-        Event *event = 0)
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueMarkerWithWaitList(
-                object_,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-            __ENQUEUE_MARKER_WAIT_LIST_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-
-    /**
-     * A synchronization point that enqueues a barrier operation.
-     *
-     * Enqueues a barrier command which waits for either a list of events to complete, 
-     * or if the list is empty it waits for all commands previously enqueued in command_queue 
-     * to complete before it completes. This command blocks command execution, that is, any 
-     * following commands enqueued after it do not execute until it completes. This command 
-     * returns an event which can be waited on, i.e. this event can be waited on to insure that 
-     * all events either in the event_wait_list or all previously enqueued commands, queued 
-     * before this command to command_queue, have completed.
-     */
-    cl_int enqueueBarrierWithWaitList(
-        const VECTOR_CLASS<Event> *events = 0,
-        Event *event = 0)
-    {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
-            ::clEnqueueBarrierWithWaitList(
-                object_,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
-            __ENQUEUE_BARRIER_WAIT_LIST_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
-    }
-    
-    /**
-     * Enqueues a command to indicate with which device a set of memory objects
-     * should be associated.
-     */
-    cl_int enqueueMigrateMemObjects(
-        const VECTOR_CLASS<Memory> &memObjects,
-        cl_mem_migration_flags flags,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL
-        )
-    {
-        cl_event tmp;
-        
-        cl_mem* localMemObjects = static_cast<cl_mem*>(alloca(memObjects.size() * sizeof(cl_mem)));
-        for( int i = 0; i < (int)memObjects.size(); ++i ) {
-            localMemObjects[i] = memObjects[i]();
-        }
-
-
-        cl_int err = detail::errHandler(
-            ::clEnqueueMigrateMemObjects(
-                object_, 
-                (cl_uint)memObjects.size(), 
-                static_cast<const cl_mem*>(localMemObjects),
-                flags,
-                (events != NULL) ? (cl_uint) events->size() : 0,
-                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
-#endif // #if defined(CL_VERSION_1_2)
 
     cl_int enqueueNDRangeKernel(
         const Kernel& kernel,
         const NDRange& offset,
         const NDRange& global,
-        const NDRange& local = NullRange,
+        const NDRange& local,
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueNDRangeKernel(
                 object_, kernel(), (cl_uint) global.dimensions(),
                 offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
@@ -5768,13 +2949,8 @@ public:
                 local.dimensions() != 0 ? (const ::size_t*) local : NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_NDRANGE_KERNEL_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueTask(
@@ -5782,23 +2958,17 @@ public:
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueTask(
                 object_, kernel(),
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_TASK_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
     cl_int enqueueNativeKernel(
-        void (CL_CALLBACK *userFptr)(void *),
+        void (*userFptr)(void *),
         std::pair<void*, ::size_t> args,
         const VECTOR_CLASS<Memory>* mem_objects = NULL,
         const VECTOR_CLASS<const void*>* mem_locs = NULL,
@@ -5815,8 +2985,7 @@ public:
             }
         }
 
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             ::clEnqueueNativeKernel(
                 object_, userFptr, args.first, args.second,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
@@ -5824,29 +2993,18 @@ public:
                 (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_NATIVE_KERNEL);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
-    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED 
-    cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    cl_int enqueueMarker(Event* event = NULL) const
     {
         return detail::errHandler(
             ::clEnqueueMarker(object_, (cl_event*) event),
             __ENQUEUE_MARKER_ERR);
     }
 
-    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
-    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const
     {
         return detail::errHandler(
             ::clEnqueueWaitForEvents(
@@ -5855,28 +3013,21 @@ public:
                 (const cl_event*) &events.front()),
             __ENQUEUE_WAIT_FOR_EVENTS_ERR);
     }
-#endif // #if defined(CL_VERSION_1_1)
 
     cl_int enqueueAcquireGLObjects(
          const VECTOR_CLASS<Memory>* mem_objects = NULL,
          const VECTOR_CLASS<Event>* events = NULL,
          Event* event = NULL) const
      {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+         return detail::errHandler(
              ::clEnqueueAcquireGLObjects(
                  object_,
                  (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                  (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                  (events != NULL) ? (cl_uint) events->size() : 0,
                  (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                 (event != NULL) ? &tmp : NULL),
+                 (cl_event*) event),
              __ENQUEUE_ACQUIRE_GL_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
      }
 
     cl_int enqueueReleaseGLObjects(
@@ -5884,21 +3035,15 @@ public:
          const VECTOR_CLASS<Event>* events = NULL,
          Event* event = NULL) const
      {
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+         return detail::errHandler(
              ::clEnqueueReleaseGLObjects(
                  object_,
                  (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                  (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                  (events != NULL) ? (cl_uint) events->size() : 0,
                  (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-                 (event != NULL) ? &tmp : NULL),
+                 (cl_event*) event),
              __ENQUEUE_RELEASE_GL_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
      }
 
 #if defined (USE_DX_INTEROP)
@@ -5915,33 +3060,19 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
          const VECTOR_CLASS<Memory>* mem_objects = NULL,
          const VECTOR_CLASS<Event>* events = NULL,
          Event* event = NULL) const
-    {
-        static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
-#if defined(CL_VERSION_1_2)
-        cl_context context = getInfo<CL_QUEUE_CONTEXT>();
-        cl::Device device(getInfo<CL_QUEUE_DEVICE>());
-        cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
-        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR);
-#endif
-#if defined(CL_VERSION_1_1)
-        __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
-#endif
-        
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+     {
+         static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
+         __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
+		
+         return detail::errHandler(
              pfn_clEnqueueAcquireD3D10ObjectsKHR(
                  object_,
                  (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                  (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                  (events != NULL) ? (cl_uint) events->size() : 0,
                  (events != NULL) ? (cl_event*) &events->front() : NULL,
-                 (event != NULL) ? &tmp : NULL),
+                 (cl_event*) event),
              __ENQUEUE_ACQUIRE_GL_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
      }
 
     cl_int enqueueReleaseD3D10Objects(
@@ -5950,46 +3081,26 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
          Event* event = NULL) const
     {
         static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
-#if defined(CL_VERSION_1_2)
-        cl_context context = getInfo<CL_QUEUE_CONTEXT>();
-        cl::Device device(getInfo<CL_QUEUE_DEVICE>());
-        cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
-        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR);
-#endif // #if defined(CL_VERSION_1_2)
-#if defined(CL_VERSION_1_1)
         __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
-#endif // #if defined(CL_VERSION_1_1)
 
-        cl_event tmp;
-        cl_int err = detail::errHandler(
+        return detail::errHandler(
             pfn_clEnqueueReleaseD3D10ObjectsKHR(
                 object_,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL) ? (cl_event*) &events->front() : NULL,
-                (event != NULL) ? &tmp : NULL),
+                (cl_event*) event),
             __ENQUEUE_RELEASE_GL_ERR);
-
-        if (event != NULL && err == CL_SUCCESS)
-            *event = tmp;
-
-        return err;
     }
 #endif
 
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
-    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
-    cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    cl_int enqueueBarrier() const
     {
         return detail::errHandler(
             ::clEnqueueBarrier(object_),
             __ENQUEUE_BARRIER_ERR);
     }
-#endif // #if defined(CL_VERSION_1_1)
 
     cl_int flush() const
     {
@@ -6004,6125 +3115,823 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
 
 __GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue)
 
-#ifdef _WIN32
-__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
-__declspec(selectany) CommandQueue CommandQueue::default_;
-__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
-#else
-__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
-__attribute__((weak)) CommandQueue CommandQueue::default_;
-__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
-#endif
-
-inline cl_int enqueueReadBuffer(
-    const Buffer& buffer,
-    cl_bool blocking,
-    ::size_t offset,
-    ::size_t size,
-    void* ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
+/*! \class KernelFunctor
+ * \brief Kernel functor interface
+ *
+ * \note Currently only functors of zero to ten arguments are supported. It
+ * is straightforward to add more and a more general solution, similar to
+ * Boost.Lambda could be followed if required in the future.
+ */
+class KernelFunctor
 {
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
+private:
+    Kernel kernel_;
+    CommandQueue queue_;
+    NDRange offset_;
+    NDRange global_;
+    NDRange local_;
 
-    if (error != CL_SUCCESS) {
-        return error;
-    }
+    cl_int err_;
+public:
+    KernelFunctor() { }
+
+    KernelFunctor(
+        const Kernel& kernel,
+        const CommandQueue& queue,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local) :
+            kernel_(kernel),
+            queue_(queue),
+            offset_(offset),
+            global_(global),
+            local_(local),
+            err_(CL_SUCCESS)
+    {}
 
-    return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event);
+    KernelFunctor& operator=(const KernelFunctor& rhs);
+
+    KernelFunctor(const KernelFunctor& rhs);
+
+    cl_int getError() { return err_; }
+
+    inline Event operator()(const VECTOR_CLASS<Event>* events = NULL);
+
+    template<typename A1>
+    inline Event operator()(
+        const A1& a1, 
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5, class A6>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4,
+             class A5, class A6, class A7>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7, 
+        const A8& a8,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11, 
+        const A12& a12,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11, 
+        const A12& a12, 
+        const A13& a13,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13, class A14>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const A12& a12, 
+        const A13& a13, 
+        const A14& a14,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13, class A14, class A15>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const A12& a12, 
+        const A13& a13, 
+        const A14& a14, 
+        const A15& a15,
+        const VECTOR_CLASS<Event>* events = NULL);
+};
+
+inline KernelFunctor Kernel::bind(
+    const CommandQueue& queue,
+    const NDRange& offset,
+    const NDRange& global,
+    const NDRange& local)
+{
+    return KernelFunctor(*this,queue,offset,global,local);
 }
 
-inline cl_int enqueueWriteBuffer(
-        const Buffer& buffer,
-        cl_bool blocking,
-        ::size_t offset,
-        ::size_t size,
-        const void* ptr,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL)
+inline KernelFunctor Kernel::bind(
+    const CommandQueue& queue,
+    const NDRange& global,
+    const NDRange& local)
 {
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
+    return KernelFunctor(*this,queue,NullRange,global,local);
+}
 
-    if (error != CL_SUCCESS) {
-        return error;
+inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs)
+{
+    if (this == &rhs) {
+        return *this;
     }
+    
+    kernel_ = rhs.kernel_;
+    queue_  = rhs.queue_;
+    offset_ = rhs.offset_;
+    global_ = rhs.global_;
+    local_  = rhs.local_;
+    
+    return *this;
+}
 
-    return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event);
+inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) :
+    kernel_(rhs.kernel_),
+    queue_(rhs.queue_),
+    offset_(rhs.offset_),
+    global_(rhs.global_),
+    local_(rhs.local_)
+{
 }
 
-inline void* enqueueMapBuffer(
-        const Buffer& buffer,
-        cl_bool blocking,
-        cl_map_flags flags,
-        ::size_t offset,
-        ::size_t size,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL,
-        cl_int* err = NULL)
+Event KernelFunctor::operator()(const VECTOR_CLASS<Event>* events)
 {
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
-    if (err != NULL) {
-        *err = error;
-    }
+    Event event;
 
-    void * result = ::clEnqueueMapBuffer(
-            queue(), buffer(), blocking, flags, offset, size,
-            (events != NULL) ? (cl_uint) events->size() : 0,
-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-            (cl_event*) event,
-            &error);
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
 
-    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
-    if (err != NULL) {
-        *err = error;
-    }
-    return result;
+    return event;
 }
 
-inline cl_int enqueueUnmapMemObject(
-    const Memory& memory,
-    void* mapped_ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
+template<typename A1>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const VECTOR_CLASS<Event>* events)
 {
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
-    if (error != CL_SUCCESS) {
-        return error;
-    }
+    Event event;
 
-    cl_event tmp;
-    cl_int err = detail::errHandler(
-        ::clEnqueueUnmapMemObject(
-            queue(), memory(), mapped_ptr,
-            (events != NULL) ? (cl_uint) events->size() : 0,
-            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
-            (event != NULL) ? &tmp : NULL),
-        __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+    kernel_.setArg(0,a1);
 
-    if (event != NULL && err == CL_SUCCESS)
-        *event = tmp;
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
 
-    return err;
+    return event;
 }
 
-inline cl_int enqueueCopyBuffer(
-        const Buffer& src,
-        const Buffer& dst,
-        ::size_t src_offset,
-        ::size_t dst_offset,
-        ::size_t size,
-        const VECTOR_CLASS<Event>* events = NULL,
-        Event* event = NULL)
+template<typename A1, typename A2>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2,
+    const VECTOR_CLASS<Event>* events)
 {
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
+    Event event;
 
-    if (error != CL_SUCCESS) {
-        return error;
-    }
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
 
-    return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event);
-}
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
 
-/**
- * Blocking copy operation between iterators and a buffer.
- */
-template< typename IteratorType >
-inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer )
-{
-    typedef typename std::iterator_traits<IteratorType>::value_type DataType;
-    cl_int error;
-    
-    ::size_t length = endIterator-startIterator;
-    ::size_t byteLength = length*sizeof(DataType);
-
-    DataType *pointer = 
-        static_cast<DataType*>(enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error));
-    // if exceptions enabled, enqueueMapBuffer will throw
-    if( error != CL_SUCCESS ) {
-        return error;
-    }
-#if defined(_MSC_VER)
-    std::copy(
-        startIterator, 
-        endIterator, 
-        stdext::checked_array_iterator<DataType*>(
-            pointer, length));
-#else
-    std::copy(startIterator, endIterator, pointer);
-#endif
-    Event endEvent;
-    error = enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
-    // if exceptions enabled, enqueueUnmapMemObject will throw
-    if( error != CL_SUCCESS ) { 
-        return error;
-    }
-    endEvent.wait();
-    return CL_SUCCESS;
+    return event;
 }
 
-/**
- * Blocking copy operation between iterators and a buffer.
- */
-template< typename IteratorType >
-inline cl_int copy( cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator )
+template<typename A1, typename A2, typename A3>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3,
+    const VECTOR_CLASS<Event>* events)
 {
-    typedef typename std::iterator_traits<IteratorType>::value_type DataType;
-    cl_int error;
-        
-    ::size_t length = endIterator-startIterator;
-    ::size_t byteLength = length*sizeof(DataType);
-
-    DataType *pointer = 
-        static_cast<DataType*>(enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error));
-    // if exceptions enabled, enqueueMapBuffer will throw
-    if( error != CL_SUCCESS ) {
-        return error;
-    }
-    std::copy(pointer, pointer + length, startIterator);
-    Event endEvent;
-    error = enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
-    // if exceptions enabled, enqueueUnmapMemObject will throw
-    if( error != CL_SUCCESS ) { 
-        return error;
-    }
-    endEvent.wait();
-    return CL_SUCCESS;
-}
+    Event event;
 
-#if defined(CL_VERSION_1_1)
-inline cl_int enqueueReadBufferRect(
-    const Buffer& buffer,
-    cl_bool blocking,
-    const size_t<3>& buffer_offset,
-    const size_t<3>& host_offset,
-    const size_t<3>& region,
-    ::size_t buffer_row_pitch,
-    ::size_t buffer_slice_pitch,
-    ::size_t host_row_pitch,
-    ::size_t host_slice_pitch,
-    void *ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueReadBufferRect(
-        buffer, 
-        blocking, 
-        buffer_offset, 
-        host_offset,
-        region,
-        buffer_row_pitch,
-        buffer_slice_pitch,
-        host_row_pitch,
-        host_slice_pitch,
-        ptr, 
-        events, 
-        event);
-}
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
 
-inline cl_int enqueueWriteBufferRect(
-    const Buffer& buffer,
-    cl_bool blocking,
-    const size_t<3>& buffer_offset,
-    const size_t<3>& host_offset,
-    const size_t<3>& region,
-    ::size_t buffer_row_pitch,
-    ::size_t buffer_slice_pitch,
-    ::size_t host_row_pitch,
-    ::size_t host_slice_pitch,
-    void *ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueWriteBufferRect(
-        buffer, 
-        blocking, 
-        buffer_offset, 
-        host_offset,
-        region,
-        buffer_row_pitch,
-        buffer_slice_pitch,
-        host_row_pitch,
-        host_slice_pitch,
-        ptr, 
-        events, 
-        event);
-}
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
 
-inline cl_int enqueueCopyBufferRect(
-    const Buffer& src,
-    const Buffer& dst,
-    const size_t<3>& src_origin,
-    const size_t<3>& dst_origin,
-    const size_t<3>& region,
-    ::size_t src_row_pitch,
-    ::size_t src_slice_pitch,
-    ::size_t dst_row_pitch,
-    ::size_t dst_slice_pitch,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueCopyBufferRect(
-        src,
-        dst,
-        src_origin,
-        dst_origin,
-        region,
-        src_row_pitch,
-        src_slice_pitch,
-        dst_row_pitch,
-        dst_slice_pitch,
-        events, 
-        event);
+    return event;
 }
-#endif
 
-inline cl_int enqueueReadImage(
-    const Image& image,
-    cl_bool blocking,
-    const size_t<3>& origin,
-    const size_t<3>& region,
-    ::size_t row_pitch,
-    ::size_t slice_pitch,
-    void* ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL) 
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueReadImage(
-        image,
-        blocking,
-        origin,
-        region,
-        row_pitch,
-        slice_pitch,
-        ptr,
-        events, 
-        event);
+template<typename A1, typename A2, typename A3, typename A4>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-inline cl_int enqueueWriteImage(
-    const Image& image,
-    cl_bool blocking,
-    const size_t<3>& origin,
-    const size_t<3>& region,
-    ::size_t row_pitch,
-    ::size_t slice_pitch,
-    void* ptr,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueWriteImage(
-        image,
-        blocking,
-        origin,
-        region,
-        row_pitch,
-        slice_pitch,
-        ptr,
-        events, 
-        event);
+template<typename A1, typename A2, typename A3, typename A4, typename A5>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-inline cl_int enqueueCopyImage(
-    const Image& src,
-    const Image& dst,
-    const size_t<3>& src_origin,
-    const size_t<3>& dst_origin,
-    const size_t<3>& region,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueCopyImage(
-        src,
-        dst,
-        src_origin,
-        dst_origin,
-        region,
-        events,
-        event);
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-inline cl_int enqueueCopyImageToBuffer(
-    const Image& src,
-    const Buffer& dst,
-    const size_t<3>& src_origin,
-    const size_t<3>& region,
-    ::size_t dst_offset,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueCopyImageToBuffer(
-        src,
-        dst,
-        src_origin,
-        region,
-        dst_offset,
-        events,
-        event);
+template<typename A1, typename A2, typename A3, typename A4,
+         typename A5, typename A6, typename A7>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6, 
+    const A7& a7,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-inline cl_int enqueueCopyBufferToImage(
-    const Buffer& src,
-    const Image& dst,
-    ::size_t src_offset,
-    const size_t<3>& dst_origin,
-    const size_t<3>& region,
-    const VECTOR_CLASS<Event>* events = NULL,
-    Event* event = NULL)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.enqueueCopyBufferToImage(
-        src,
-        dst,
-        src_offset,
-        dst_origin,
-        region,
-        events,
-        event);
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-
-inline cl_int flush(void)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    }
-
-    return queue.flush();
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8, typename A9>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
-inline cl_int finish(void)
-{
-    cl_int error;
-    CommandQueue queue = CommandQueue::getDefault(&error);
-
-    if (error != CL_SUCCESS) {
-        return error;
-    } 
-
-
-    return queue.finish();
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8, typename A9, typename A10>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
 }
 
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
 
-// Kernel Functor support
-// New interface as of September 2011
-// Requires the C++11 std::tr1::function (note do not support TR1)
-// Visual Studio 2010 and GCC 4.2
-
-struct EnqueueArgs
-{
-    CommandQueue queue_;
-    const NDRange offset_;
-    const NDRange global_;
-    const NDRange local_;
-    VECTOR_CLASS<Event> events_;
-
-    EnqueueArgs(NDRange global) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange)
-    {
-
-    }
-
-    EnqueueArgs(NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(local)
-    {
-
-    }
-
-    EnqueueArgs(NDRange offset, NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(offset), 
-      global_(global),
-      local_(local)
-    {
-
-    }
-
-    EnqueueArgs(Event e, NDRange global) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(Event e, NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(local)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(offset), 
-      global_(global),
-      local_(local)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange),
-      events_(events)
-    {
-
-    }
-
-    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(NullRange), 
-      global_(global),
-      local_(local),
-      events_(events)
-    {
-
-    }
-
-    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) : 
-      queue_(CommandQueue::getDefault()),
-      offset_(offset), 
-      global_(global),
-      local_(local),
-      events_(events)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, NDRange global) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(local)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(offset), 
-      global_(global),
-      local_(local)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(local)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(offset), 
-      global_(global),
-      local_(local)
-    {
-        events_.push_back(e);
-    }
-
-    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(NullRange),
-      events_(events)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(NullRange), 
-      global_(global),
-      local_(local),
-      events_(events)
-    {
-
-    }
-
-    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) : 
-      queue_(queue),
-      offset_(offset), 
-      global_(global),
-      local_(local),
-      events_(events)
-    {
-
-    }
-};
-
-namespace detail {
-
-class NullType {};
-
-template<int index, typename T0>
-struct SetArg
-{
-    static void set (Kernel kernel, T0 arg)
-    {
-        kernel.setArg(index, arg);
-    }
-};  
-
-
-template<int index>
-struct SetArg<index, NullType>
-{
-    static void set (Kernel, NullType)
-    { 
-    }
-};
-
-
-template <
-   typename T0,   typename T1,   typename T2,   typename T3,
-   typename T4,   typename T5,   typename T6,   typename T7,
-   typename T8,   typename T9,   typename T10,   typename T11,
-   typename T12,   typename T13,   typename T14,   typename T15,
-   typename T16,   typename T17,   typename T18,   typename T19,
-   typename T20,   typename T21,   typename T22,   typename T23,
-   typename T24,   typename T25,   typename T26,   typename T27,
-   typename T28,   typename T29,   typename T30,   typename T31
->
-class KernelFunctorGlobal
-{
-private:
-    Kernel kernel_;
-
-public:
-   KernelFunctorGlobal(
-        Kernel kernel) :
-            kernel_(kernel)
-    {}
-
-   KernelFunctorGlobal(
-        const Program& program,
-        const STRING_CLASS name,
-        cl_int * err = NULL) :
-            kernel_(program, name.c_str(), err)
-    {}
-
-    Event operator() (
-        const EnqueueArgs& args,
-        T0 t0,
-        T1 t1 = NullType(),
-        T2 t2 = NullType(),
-        T3 t3 = NullType(),
-        T4 t4 = NullType(),
-        T5 t5 = NullType(),
-        T6 t6 = NullType(),
-        T7 t7 = NullType(),
-        T8 t8 = NullType(),
-        T9 t9 = NullType(),
-        T10 t10 = NullType(),
-        T11 t11 = NullType(),
-        T12 t12 = NullType(),
-        T13 t13 = NullType(),
-        T14 t14 = NullType(),
-        T15 t15 = NullType(),
-        T16 t16 = NullType(),
-        T17 t17 = NullType(),
-        T18 t18 = NullType(),
-        T19 t19 = NullType(),
-        T20 t20 = NullType(),
-        T21 t21 = NullType(),
-        T22 t22 = NullType(),
-        T23 t23 = NullType(),
-        T24 t24 = NullType(),
-        T25 t25 = NullType(),
-        T26 t26 = NullType(),
-        T27 t27 = NullType(),
-        T28 t28 = NullType(),
-        T29 t29 = NullType(),
-        T30 t30 = NullType(),
-        T31 t31 = NullType()
-        )
-    {
-        Event event;
-        SetArg<0, T0>::set(kernel_, t0);
-        SetArg<1, T1>::set(kernel_, t1);
-        SetArg<2, T2>::set(kernel_, t2);
-        SetArg<3, T3>::set(kernel_, t3);
-        SetArg<4, T4>::set(kernel_, t4);
-        SetArg<5, T5>::set(kernel_, t5);
-        SetArg<6, T6>::set(kernel_, t6);
-        SetArg<7, T7>::set(kernel_, t7);
-        SetArg<8, T8>::set(kernel_, t8);
-        SetArg<9, T9>::set(kernel_, t9);
-        SetArg<10, T10>::set(kernel_, t10);
-        SetArg<11, T11>::set(kernel_, t11);
-        SetArg<12, T12>::set(kernel_, t12);
-        SetArg<13, T13>::set(kernel_, t13);
-        SetArg<14, T14>::set(kernel_, t14);
-        SetArg<15, T15>::set(kernel_, t15);
-        SetArg<16, T16>::set(kernel_, t16);
-        SetArg<17, T17>::set(kernel_, t17);
-        SetArg<18, T18>::set(kernel_, t18);
-        SetArg<19, T19>::set(kernel_, t19);
-        SetArg<20, T20>::set(kernel_, t20);
-        SetArg<21, T21>::set(kernel_, t21);
-        SetArg<22, T22>::set(kernel_, t22);
-        SetArg<23, T23>::set(kernel_, t23);
-        SetArg<24, T24>::set(kernel_, t24);
-        SetArg<25, T25>::set(kernel_, t25);
-        SetArg<26, T26>::set(kernel_, t26);
-        SetArg<27, T27>::set(kernel_, t27);
-        SetArg<28, T28>::set(kernel_, t28);
-        SetArg<29, T29>::set(kernel_, t29);
-        SetArg<30, T30>::set(kernel_, t30);
-        SetArg<31, T31>::set(kernel_, t31);
-        
-
-        args.queue_.enqueueNDRangeKernel(
-            kernel_,
-            args.offset_,
-            args.global_,
-            args.local_,
-            &args.events_,
-            &event);
-        
-
-        return event;
-    }
-
-};
-
-//------------------------------------------------------------------------------------------------------
-
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26,
-	typename T27,
-	typename T28,
-	typename T29,
-	typename T30,
-	typename T31>
-struct functionImplementation_
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29,
-		T30,
-		T31> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29,
-		T30,
-		T31);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26,
-		T27 arg27,
-		T28 arg28,
-		T29 arg29,
-		T30 arg30,
-		T31 arg31)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26,
-			arg27,
-			arg28,
-			arg29,
-			arg30,
-			arg31);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26,
-	typename T27,
-	typename T28,
-	typename T29,
-	typename T30>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	T26,
-	T27,
-	T28,
-	T29,
-	T30,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29,
-		T30,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29,
-		T30);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26,
-		T27 arg27,
-		T28 arg28,
-		T29 arg29,
-		T30 arg30)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26,
-			arg27,
-			arg28,
-			arg29,
-			arg30);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26,
-	typename T27,
-	typename T28,
-	typename T29>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	T26,
-	T27,
-	T28,
-	T29,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		T29);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26,
-		T27 arg27,
-		T28 arg28,
-		T29 arg29)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26,
-			arg27,
-			arg28,
-			arg29);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26,
-	typename T27,
-	typename T28>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	T26,
-	T27,
-	T28,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		T28);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26,
-		T27 arg27,
-		T28 arg28)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26,
-			arg27,
-			arg28);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26,
-	typename T27>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	T26,
-	T27,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		T27);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26,
-		T27 arg27)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26,
-			arg27);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25,
-	typename T26>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	T26,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		T26);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25,
-		T26 arg26)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25,
-			arg26);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24,
-	typename T25>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	T25,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		T25);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24,
-		T25 arg25)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24,
-			arg25);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23,
-	typename T24>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	T24,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		T24);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23,
-		T24 arg24)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23,
-			arg24);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22,
-	typename T23>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	T23,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		T23);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22,
-		T23 arg23)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22,
-			arg23);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21,
-	typename T22>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	T22,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		T22);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21,
-		T22 arg22)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21,
-			arg22);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20,
-	typename T21>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	T21,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		T21);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20,
-		T21 arg21)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20,
-			arg21);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19,
-	typename T20>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	T20,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		T20);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19,
-		T20 arg20)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19,
-			arg20);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18,
-	typename T19>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	T19,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		T19);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18,
-		T19 arg19)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18,
-			arg19);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17,
-	typename T18>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	T18,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		T18);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17,
-		T18 arg18)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17,
-			arg18);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16,
-	typename T17>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	T17,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		T17);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16,
-		T17 arg17)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16,
-			arg17);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15,
-	typename T16>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	T16,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		T16);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15,
-		T16 arg16)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15,
-			arg16);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14,
-	typename T15>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	T15,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		T15);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14,
-		T15 arg15)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14,
-			arg15);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13,
-	typename T14>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	T14,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		T14);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13,
-		T14 arg14)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13,
-			arg14);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12,
-	typename T13>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	T13,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		T13);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12,
-		T13 arg13)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12,
-			arg13);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11,
-	typename T12>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	T12,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		T12);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11,
-		T12 arg12)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11,
-			arg12);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10,
-	typename T11>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	T11,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		T11);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10,
-		T11 arg11)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10,
-			arg11);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9,
-	typename T10>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	T10,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		T10);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9,
-		T10 arg10)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9,
-			arg10);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8,
-	typename T9>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	T9,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		T9);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8,
-		T9 arg9)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8,
-			arg9);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7,
-	typename T8>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	T8,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		T8);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7,
-		T8 arg8)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7,
-			arg8);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6,
-	typename T7>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	T7,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		T7);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6,
-		T7 arg7)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6,
-			arg7);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5,
-	typename T6>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	T6,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		T6);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5,
-		T6 arg6)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5,
-			arg6);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4,
-	typename T5>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	T5,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		T5);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4,
-		T5 arg5)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4,
-			arg5);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3,
-	typename T4>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	T4,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		T4,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3,
-		T4);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3,
-		T4 arg4)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3,
-			arg4);
-	}
-
-
-};
-
-template<
-	typename T0,
-	typename T1,
-	typename T2,
-	typename T3>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	T3,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		T3,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
-    
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2,
-		T3);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2,
-		T3 arg3)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2,
-			arg3);
-	}
-
-
-};
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11, 
+    const A12& a12,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
 
-template<
-	typename T0,
-	typename T1,
-	typename T2>
-struct functionImplementation_
-<	T0,
-	T1,
-	T2,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		T2,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11, 
+    const A12& a12, 
+    const A13& a13,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
     
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1,
-		T2);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1,
-		T2 arg2)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1,
-			arg2);
-	}
-
-
-};
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
 
-template<
-	typename T0,
-	typename T1>
-struct functionImplementation_
-<	T0,
-	T1,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		T1,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13, class A14>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const A12& a12, 
+    const A13& a13, 
+    const A14& a14,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
     
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0,
-		T1);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0,
-		T1 arg1)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0,
-			arg1);
-	}
-
-
-};
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+    kernel_.setArg(13,a14);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
 
-template<
-	typename T0>
-struct functionImplementation_
-<	T0,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType,
-	NullType>
-{
-	typedef detail::KernelFunctorGlobal<
-		T0,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType,
-		NullType> FunctorType;
-
-    FunctorType functor_;
-
-    functionImplementation_(const FunctorType &functor) :
-        functor_(functor)
-    {
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13, class A14, class A15>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const A12& a12, 
+    const A13& a13, 
+    const A14& a14, 
+    const A15& a15,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
     
-        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1))
-        // Fail variadic expansion for dev11
-        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
-        #endif
-            
-    }
-
-	//! \brief Return type of the functor
-	typedef Event result_type;
-
-	//! \brief Function signature of kernel functor with no event dependency.
-	typedef Event type_(
-		const EnqueueArgs&,
-		T0);
-
-	Event operator()(
-		const EnqueueArgs& enqueueArgs,
-		T0 arg0)
-	{
-		return functor_(
-			enqueueArgs,
-			arg0);
-	}
-
-
-};
-
-
-
-
-
-} // namespace detail
-
-//----------------------------------------------------------------------------------------------
-
-template <
-   typename T0,   typename T1 = detail::NullType,   typename T2 = detail::NullType,
-   typename T3 = detail::NullType,   typename T4 = detail::NullType,
-   typename T5 = detail::NullType,   typename T6 = detail::NullType,
-   typename T7 = detail::NullType,   typename T8 = detail::NullType,
-   typename T9 = detail::NullType,   typename T10 = detail::NullType,
-   typename T11 = detail::NullType,   typename T12 = detail::NullType,
-   typename T13 = detail::NullType,   typename T14 = detail::NullType,
-   typename T15 = detail::NullType,   typename T16 = detail::NullType,
-   typename T17 = detail::NullType,   typename T18 = detail::NullType,
-   typename T19 = detail::NullType,   typename T20 = detail::NullType,
-   typename T21 = detail::NullType,   typename T22 = detail::NullType,
-   typename T23 = detail::NullType,   typename T24 = detail::NullType,
-   typename T25 = detail::NullType,   typename T26 = detail::NullType,
-   typename T27 = detail::NullType,   typename T28 = detail::NullType,
-   typename T29 = detail::NullType,   typename T30 = detail::NullType,
-   typename T31 = detail::NullType
->
-struct make_kernel :
-    public detail::functionImplementation_<
-               T0,   T1,   T2,   T3,
-               T4,   T5,   T6,   T7,
-               T8,   T9,   T10,   T11,
-               T12,   T13,   T14,   T15,
-               T16,   T17,   T18,   T19,
-               T20,   T21,   T22,   T23,
-               T24,   T25,   T26,   T27,
-               T28,   T29,   T30,   T31
-    >
-{
-public:
-	typedef detail::KernelFunctorGlobal<             
-		       T0,   T1,   T2,   T3,
-               T4,   T5,   T6,   T7,
-               T8,   T9,   T10,   T11,
-               T12,   T13,   T14,   T15,
-               T16,   T17,   T18,   T19,
-               T20,   T21,   T22,   T23,
-               T24,   T25,   T26,   T27,
-               T28,   T29,   T30,   T31
-    > FunctorType;
-
-    make_kernel(
-        const Program& program,
-        const STRING_CLASS name,
-        cl_int * err = NULL) :
-           detail::functionImplementation_<
-                    T0,   T1,   T2,   T3,
-                       T4,   T5,   T6,   T7,
-                       T8,   T9,   T10,   T11,
-                       T12,   T13,   T14,   T15,
-                       T16,   T17,   T18,   T19,
-                       T20,   T21,   T22,   T23,
-                       T24,   T25,   T26,   T27,
-                       T28,   T29,   T30,   T31
-           >(
-            FunctorType(program, name, err)) 
-    {}
-
-    make_kernel(
-        const Kernel kernel) :
-           detail::functionImplementation_<
-                    T0,   T1,   T2,   T3,
-                       T4,   T5,   T6,   T7,
-                       T8,   T9,   T10,   T11,
-                       T12,   T13,   T14,   T15,
-                       T16,   T17,   T18,   T19,
-                       T20,   T21,   T22,   T23,
-                       T24,   T25,   T26,   T27,
-                       T28,   T29,   T30,   T31
-           >(
-            FunctorType(kernel)) 
-    {}    
-};
-
-
-//----------------------------------------------------------------------------------------------------------------------
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+    kernel_.setArg(13,a14);
+    kernel_.setArg(14,a15);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
 
 #undef __ERR_STR
 #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
@@ -12136,13 +3945,11 @@ public:
 #undef __GET_IMAGE_INFO_ERR
 #undef __GET_SAMPLER_INFO_ERR
 #undef __GET_KERNEL_INFO_ERR
-#undef __GET_KERNEL_ARG_INFO_ERR
 #undef __GET_KERNEL_WORK_GROUP_INFO_ERR
 #undef __GET_PROGRAM_INFO_ERR
 #undef __GET_PROGRAM_BUILD_INFO_ERR
 #undef __GET_COMMAND_QUEUE_INFO_ERR
 
-#undef __CREATE_CONTEXT_ERR
 #undef __CREATE_CONTEXT_FROM_TYPE_ERR
 #undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
 
@@ -12156,7 +3963,6 @@ public:
 #undef __CREATE_USER_EVENT_ERR
 #undef __SET_USER_EVENT_STATUS_ERR
 #undef __SET_EVENT_CALLBACK_ERR
-#undef __SET_PRINTF_CALLBACK_ERR
 
 #undef __WAIT_FOR_EVENTS_ERR
 
@@ -12164,7 +3970,6 @@ public:
 #undef __SET_KERNEL_ARGS_ERR
 #undef __CREATE_PROGRAM_WITH_SOURCE_ERR
 #undef __CREATE_PROGRAM_WITH_BINARY_ERR
-#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR
 #undef __BUILD_PROGRAM_ERR
 #undef __CREATE_KERNELS_IN_PROGRAM_ERR
 
@@ -12188,36 +3993,19 @@ public:
 #undef __ENQUEUE_TASK_ERR
 #undef __ENQUEUE_NATIVE_KERNEL
 
-#undef __CL_EXPLICIT_CONSTRUCTORS
-
 #undef __UNLOAD_COMPILER_ERR
 #endif //__CL_USER_OVERRIDE_ERROR_STRINGS
 
 #undef __GET_INFO_HELPER_WITH_RETAIN
 
-#undef __CL_FUNCTION_TYPE
-
 // Extensions
-/**
- * Deprecated APIs for 1.2
- */
-#if defined(CL_VERSION_1_1)
 #undef __INIT_CL_EXT_FCN_PTR
-#endif // #if defined(CL_VERSION_1_1)
 #undef __CREATE_SUB_DEVICES
 
 #if defined(USE_CL_DEVICE_FISSION)
 #undef __PARAM_NAME_DEVICE_FISSION
 #endif // USE_CL_DEVICE_FISSION
 
-#undef __DEFAULT_NOT_INITIALIZED 
-#undef __DEFAULT_BEING_INITIALIZED 
-#undef __DEFAULT_INITIALIZED
-
 } // namespace cl
 
-#ifdef _WIN32
-#pragma pop_macro("max")
-#endif // _WIN32
-
 #endif // CL_HPP_
diff --git a/include/CL/cl_d3d10.h b/include/CL/cl_d3d10.h
index 81b0d37..ea9ab99 100644
--- a/include/CL/cl_d3d10.h
+++ b/include/CL/cl_d3d10.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -113,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
     cl_command_queue command_queue,
     cl_uint          num_objects,
-    const cl_mem *   mem_objects,
+    cl_mem *         mem_objects,
     cl_uint          num_events_in_wait_list,
     const cl_event * event_wait_list,
     cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
diff --git a/include/CL/cl_d3d11.h b/include/CL/cl_d3d11.h
deleted file mode 100644
index d3c8bdc..0000000
--- a/include/CL/cl_d3d11.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and/or associated documentation files (the
- * "Materials"), to deal in the Materials without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Materials, and to
- * permit persons to whom the Materials are furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Materials.
- *
- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
- **********************************************************************************/
-
-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
-
-#ifndef __OPENCL_CL_D3D11_H
-#define __OPENCL_CL_D3D11_H
-
-#include <d3d11.h>
-#include <CL/cl.h>
-#include <CL/cl_platform.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/******************************************************************************
- * cl_khr_d3d11_sharing                                                       */
-#define cl_khr_d3d11_sharing 1
-
-typedef cl_uint cl_d3d11_device_source_khr;
-typedef cl_uint cl_d3d11_device_set_khr;
-
-/******************************************************************************/
-
-// Error Codes
-#define CL_INVALID_D3D11_DEVICE_KHR                  -1006
-#define CL_INVALID_D3D11_RESOURCE_KHR                -1007
-#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR       -1008
-#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR           -1009
-
-// cl_d3d11_device_source
-#define CL_D3D11_DEVICE_KHR                          0x4019
-#define CL_D3D11_DXGI_ADAPTER_KHR                    0x401A
-
-// cl_d3d11_device_set
-#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR           0x401B
-#define CL_ALL_DEVICES_FOR_D3D11_KHR                 0x401C
-
-// cl_context_info
-#define CL_CONTEXT_D3D11_DEVICE_KHR                  0x401D
-#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
-
-// cl_mem_info
-#define CL_MEM_D3D11_RESOURCE_KHR                    0x401E
-
-// cl_image_info
-#define CL_IMAGE_D3D11_SUBRESOURCE_KHR               0x401F
-
-// cl_command_type
-#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR         0x4020
-#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR         0x4021
-
-/******************************************************************************/
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
-    cl_platform_id             platform,
-    cl_d3d11_device_source_khr d3d_device_source,
-    void *                     d3d_object,
-    cl_d3d11_device_set_khr    d3d_device_set,
-    cl_uint                    num_entries,
-    cl_device_id *             devices,
-    cl_uint *                  num_devices) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
-    cl_context     context,
-    cl_mem_flags   flags,
-    ID3D11Buffer * resource,
-    cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
-    cl_context        context,
-    cl_mem_flags      flags,
-    ID3D11Texture2D * resource,
-    UINT              subresource,
-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
-    cl_context        context,
-    cl_mem_flags      flags,
-    ID3D11Texture3D * resource,
-    UINT              subresource,
-    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event) CL_API_SUFFIX__VERSION_1_2;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // __OPENCL_CL_D3D11_H
-
diff --git a/include/CL/cl_d3d9.h b/include/CL/cl_d3d9.h
deleted file mode 100644
index babc611..0000000
--- a/include/CL/cl_d3d9.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/* 
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
- */
-
-/* $Revision$ on $Date$ */
-
-#ifndef __OPENCL_CL_D3D9_H
-#define __OPENCL_CL_D3D9_H
-
-#include <CL/cl_platform.h>
-#include <d3d9.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* cl_khr_d3d9_sharing extension    */
-#define cl_khr_d3d9_sharing 1
-
-/* cl_context_properties            */
-#define CL_CONTEXT_D3D9_DEVICE 0x1085
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromD3D9BufferKHR(
-    cl_context           /* context */,
-    cl_mem_flags         /* flags */,
-    IDirect3DResource9 * /* resource */,
-    HANDLE               /* shared_handle */,
-    cl_int *             /* errcode_ret */);
-
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromD3D9TextureKHR(
-    cl_context          /* context */,
-    cl_mem_flags        /* flags */,
-    IDirect3DTexture9 * /* texture */,
-    HANDLE              /* shared_handle */,
-    UINT                /* miplevel */,
-    cl_int *            /* errcode_ret */);
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromD3D9VolumeTextureKHR(
-    cl_context                /* context */,
-    cl_mem_flags              /* flags */,
-    IDirect3DVolumeTexture9 * /* resource */,
-    HANDLE                    /* shared_handle */,
-    UINT                      /* miplevel */,
-    cl_int *                  /* errcode_ret */);
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromD3D9CubeTextureKHR(
-    cl_context                /* context */,
-    cl_mem_flags              /* flags */,
-    IDirect3DCubeTexture9 *   /* resource */,                            
-    HANDLE                    /* shared_handle */,
-    D3DCUBEMAP_FACES Facetype /* face */,
-    UINT                      /* miplevel */,
-    cl_int *                  /* errcode_ret */);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireD3D9ObjectsKHR(
-    cl_command_queue /* command_queue */,
-    cl_uint          /* num_objects */,
-    const cl_mem *   /* mem_objects */,
-    cl_uint          /* num_events_in_wait_list */,
-    const cl_event * /* event_wait_list */,
-    cl_event *       /* event */);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseD3D9ObjectsKHR(
-    cl_command_queue /* command_queue */,
-    cl_uint          /* num_objects */,
-    const cl_mem *   /* mem_objects */,
-    cl_uint          /* num_events_in_wait_list */,
-    const cl_event * /* event_wait_list */,
-    cl_event *       /* event */);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* __OPENCL_CL_D3D9_H   */
-
diff --git a/include/CL/cl_dx9_media_sharing.h b/include/CL/cl_dx9_media_sharing.h
deleted file mode 100644
index 1ef543a..0000000
--- a/include/CL/cl_dx9_media_sharing.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and/or associated documentation files (the
- * "Materials"), to deal in the Materials without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Materials, and to
- * permit persons to whom the Materials are furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Materials.
- *
- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
- **********************************************************************************/
-
-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
-
-#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
-#define __OPENCL_CL_DX9_MEDIA_SHARING_H
-
-#include <CL/cl.h>
-#include <CL/cl_platform.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/******************************************************************************
-/* cl_khr_dx9_media_sharing                                                   */
-#define cl_khr_dx9_media_sharing 1
-
-typedef cl_uint             cl_dx9_media_adapter_type_khr;
-typedef cl_uint             cl_dx9_media_adapter_set_khr;
-    
-#if defined(_WIN32)
-#include <d3d9.h>
-typedef struct _cl_dx9_surface_info_khr
-{
-    IDirect3DSurface9 *resource;
-    HANDLE shared_handle;
-} cl_dx9_surface_info_khr;
-#endif
-
-
-/******************************************************************************/
-
-// Error Codes
-#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR                -1010
-#define CL_INVALID_DX9_MEDIA_SURFACE_KHR                -1011
-#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR       -1012
-#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR           -1013
-
-// cl_media_adapter_type_khr
-#define CL_ADAPTER_D3D9_KHR                              0x2020
-#define CL_ADAPTER_D3D9EX_KHR                            0x2021
-#define CL_ADAPTER_DXVA_KHR                              0x2022
-
-// cl_media_adapter_set_khr
-#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR   0x2023
-#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR         0x2024
-
-// cl_context_info
-#define CL_CONTEXT_ADAPTER_D3D9_KHR                      0x2025
-#define CL_CONTEXT_ADAPTER_D3D9EX_KHR                    0x2026
-#define CL_CONTEXT_ADAPTER_DXVA_KHR                      0x2027
-
-// cl_mem_info
-#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR                0x2028
-#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR                0x2029
-
-// cl_image_info
-#define CL_IMAGE_DX9_MEDIA_PLANE_KHR                     0x202A
-
-// cl_command_type
-#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR        0x202B
-#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR        0x202C
-
-/******************************************************************************/
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
-    cl_platform_id                   platform,
-    cl_uint                          num_media_adapters,
-    cl_dx9_media_adapter_type_khr *  media_adapter_type,
-    void *                           media_adapters,
-    cl_dx9_media_adapter_set_khr     media_adapter_set,
-    cl_uint                          num_entries,
-    cl_device_id *                   devices,
-    cl_uint *                        num_devices) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
-    cl_context                    context,
-    cl_mem_flags                  flags,
-    cl_dx9_media_adapter_type_khr adapter_type,
-    void *                        surface_info,
-    cl_uint                       plane,                                                                          
-    cl_int *                      errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event) CL_API_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event) CL_API_SUFFIX__VERSION_1_2;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // __OPENCL_CL_DX9_MEDIA_SHARING_H
-
diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h
index 632cb21..4e92c7e 100644
--- a/include/CL/cl_ext.h
+++ b/include/CL/cl_ext.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -40,6 +40,9 @@ extern "C" {
 	#include <CL/cl.h>
 #endif
 
+/* cl_khr_fp64 extension - no extension #define since it has no functions  */
+#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+
 /* cl_khr_fp16 extension - no extension #define since it has no functions  */
 #define CL_DEVICE_HALF_FP_CONFIG                    0x1033
 
@@ -115,48 +118,6 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
     cl_uint *        /* num_platforms */);
 
 
-/* Extension: cl_khr_image2D_buffer
- *
- * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
- * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
- * Both the sampler and sampler-less read_image built-in functions are supported for 2D images
- * and 2D images created from a buffer.  Similarly, the write_image built-ins are also supported
- * for 2D images created from a buffer.
- *
- * When the 2D image from buffer is created, the client must specify the width,
- * height, image format (i.e. channel order and channel data type) and optionally the row pitch
- *
- * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
- * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
- */
-    
-/*************************************
- * cl_khr_initalize_memory extension *
- *************************************/
-    
-#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
-    
-    
-/**************************************
- * cl_khr_terminate_context extension *
- **************************************/
-    
-#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
-#define CL_CONTEXT_TERMINATE_KHR                    0x2010
-
-#define cl_khr_terminate_context 1
-extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
-
-typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
-    
-    
-/*
- * Extension: cl_khr_spir
- *
- * This extension adds support to create an OpenCL program object from a 
- * Standard Portable Intermediate Representation (SPIR) instance
- */
-
 /******************************************
 * cl_nv_device_attribute_query extension *
 ******************************************/
@@ -175,6 +136,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /
 *********************************/
 #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
 
+
 #ifdef CL_VERSION_1_1
    /***********************************
     * cl_ext_device_fission extension *
diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h
index eb9ce5c..3b4fe06 100644
--- a/include/CL/cl_gl.h
+++ b/include/CL/cl_gl.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -21,11 +21,20 @@
  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
  **********************************************************************************/
 
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+/*
+ * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
+ * OpenGL dependencies. The application is responsible for #including
+ * OpenGL or OpenGL ES headers before #including cl_gl.h.
+ */
+
 #ifndef __OPENCL_CL_GL_H
 #define __OPENCL_CL_GL_H
 
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
+#include <OpenGL/CGLDevice.h>
 #else
 #include <CL/cl.h>
 #endif	
@@ -39,20 +48,15 @@ typedef cl_uint     cl_gl_texture_info;
 typedef cl_uint     cl_gl_platform_info;
 typedef struct __GLsync *cl_GLsync;
 
-/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
-#define CL_GL_OBJECT_BUFFER                     0x2000
-#define CL_GL_OBJECT_TEXTURE2D                  0x2001
-#define CL_GL_OBJECT_TEXTURE3D                  0x2002
-#define CL_GL_OBJECT_RENDERBUFFER               0x2003
-#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
-#define CL_GL_OBJECT_TEXTURE1D                  0x200F
-#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
-#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
-
-/* cl_gl_texture_info           */
-#define CL_GL_TEXTURE_TARGET                    0x2004
-#define CL_GL_MIPMAP_LEVEL                      0x2005
+/* cl_gl_object_type */
+#define CL_GL_OBJECT_BUFFER             0x2000
+#define CL_GL_OBJECT_TEXTURE2D          0x2001
+#define CL_GL_OBJECT_TEXTURE3D          0x2002
+#define CL_GL_OBJECT_RENDERBUFFER       0x2003
 
+/* cl_gl_texture_info */
+#define CL_GL_TEXTURE_TARGET            0x2004
+#define CL_GL_MIPMAP_LEVEL              0x2005
 
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateFromGLBuffer(cl_context     /* context */,
@@ -61,13 +65,21 @@ clCreateFromGLBuffer(cl_context     /* context */,
                      int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLTexture(cl_context      /* context */,
-                      cl_mem_flags    /* flags */,
-                      cl_GLenum       /* target */,
-                      cl_GLint        /* miplevel */,
-                      cl_GLuint       /* texture */,
-                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-    
+clCreateFromGLTexture2D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        cl_GLenum       /* target */,
+                        cl_GLint        /* miplevel */,
+                        cl_GLuint       /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture3D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        cl_GLenum       /* target */,
+                        cl_GLint        /* miplevel */,
+                        cl_GLuint       /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateFromGLRenderbuffer(cl_context   /* context */,
                            cl_mem_flags /* flags */,
@@ -77,7 +89,7 @@ clCreateFromGLRenderbuffer(cl_context   /* context */,
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLObjectInfo(cl_mem                /* memobj */,
                   cl_gl_object_type *   /* gl_object_type */,
-                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+                  cl_GLuint *              /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
                   
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLTextureInfo(cl_mem               /* memobj */,
@@ -102,51 +114,33 @@ clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
                           const cl_event *      /* event_wait_list */,
                           cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
 
-
-// Deprecated OpenCL 1.1 APIs
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture2D(cl_context      /* context */,
-                        cl_mem_flags    /* flags */,
-                        cl_GLenum       /* target */,
-                        cl_GLint        /* miplevel */,
-                        cl_GLuint       /* texture */,
-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
-extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture3D(cl_context      /* context */,
-                        cl_mem_flags    /* flags */,
-                        cl_GLenum       /* target */,
-                        cl_GLint        /* miplevel */,
-                        cl_GLuint       /* texture */,
-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
 /* cl_khr_gl_sharing extension  */
-    
+
 #define cl_khr_gl_sharing 1
-    
+
 typedef cl_uint     cl_gl_context_info;
-    
+
 /* Additional Error Codes  */
 #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
-    
+
 /* cl_gl_context_info  */
 #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 #define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
-    
+
 /* Additional cl_context_properties  */
 #define CL_GL_CONTEXT_KHR                       0x2008
 #define CL_EGL_DISPLAY_KHR                      0x2009
 #define CL_GLX_DISPLAY_KHR                      0x200A
 #define CL_WGL_HDC_KHR                          0x200B
 #define CL_CGL_SHAREGROUP_KHR                   0x200C
-    
+
 extern CL_API_ENTRY cl_int CL_API_CALL
 clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
                       cl_gl_context_info            /* param_name */,
                       size_t                        /* param_value_size */,
                       void *                        /* param_value */,
                       size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-    
+
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
     const cl_context_properties * properties,
     cl_gl_context_info            param_name,
@@ -158,4 +152,4 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
 }
 #endif
 
-#endif  /* __OPENCL_CL_GL_H */
+#endif  /* __OPENCL_CL_GL_H  */
diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h
index 77d5353..26e4782 100644
--- a/include/CL/cl_gl_ext.h
+++ b/include/CL/cl_gl_ext.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -41,7 +41,7 @@ extern "C" {
 
 /*
  * For each extension, follow this template
- *  cl_VEN_extname extension  */
+ * /* cl_VEN_extname extension  */
 /* #define cl_VEN_extname 1
  * ... define new types, if any
  * ... define new tokens, if any
diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
index cf2b721..043b048 100644
--- a/include/CL/cl_platform.h
+++ b/include/CL/cl_platform.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
@@ -46,75 +46,19 @@ extern "C" {
 #endif
 
 #ifdef __APPLE__
-    #define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))
-    #define CL_API_SUFFIX__VERSION_1_0                  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
-    #define CL_EXT_SUFFIX__VERSION_1_0                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
-    #define CL_API_SUFFIX__VERSION_1_1                  AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-    #define GCL_API_SUFFIX__VERSION_1_1                 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-    #define CL_EXT_SUFFIX__VERSION_1_1                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED       CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
-    
-    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
-        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
-        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
-        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
-        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
-    #else
-        #warning  This path should never happen outside of internal operating system development.  AvailabilityMacros do not function correctly here!
-        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
-    #endif
+    #define CL_EXTENSION_WEAK_LINK                  __attribute__((weak_import))       
+    #define CL_API_SUFFIX__VERSION_1_0              AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_API_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+    #define CL_EXT_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
 #else
-    #define CL_EXTENSION_WEAK_LINK  
+    #define CL_EXTENSION_WEAK_LINK                         
     #define CL_API_SUFFIX__VERSION_1_0
     #define CL_EXT_SUFFIX__VERSION_1_0
     #define CL_API_SUFFIX__VERSION_1_1
     #define CL_EXT_SUFFIX__VERSION_1_1
-    #define CL_API_SUFFIX__VERSION_1_2
-    #define CL_EXT_SUFFIX__VERSION_1_2
-    
-    #ifdef __GNUC__
-        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
-        #else
-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
-        #endif
-    
-        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED    
-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
-        #else
-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
-        #endif
-    #elif _WIN32
-        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED    
-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED    
-        #else
-            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED 
-            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)     
-        #endif
-    
-        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED    
-        #else
-            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED 
-            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)     
-        #endif
-    #else
-        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
-        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
-    
-        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
-        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
-    #endif
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
 #endif
 
 #if (defined (_WIN32) && defined(_MSC_VER))
@@ -308,7 +252,7 @@ typedef double          cl_double   __attribute__((aligned(8)));
 
 #include <stddef.h>
 
-/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
 typedef unsigned int cl_GLuint;
 typedef int          cl_GLint;
 typedef unsigned int cl_GLenum;
@@ -1226,13 +1170,13 @@ typedef union
 /* Macro to facilitate debugging 
  * Usage:
  *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
- *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
+ *   The first line ends with:   CL_PROGRAM_STRING_BEGIN \"
  *   Each line thereafter of OpenCL C source must end with: \n\
  *   The last line ends in ";
  *
  *   Example:
  *
- *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
+ *   const char *my_program = CL_PROGRAM_STRING_BEGIN "\
  *   kernel void foo( int a, float * b )             \n\
  *   {                                               \n\
  *      // my comment                                \n\
diff --git a/include/CL/opencl.h b/include/CL/opencl.h
index 3f00524..26a6389 100644
--- a/include/CL/opencl.h
+++ b/include/CL/opencl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and/or associated documentation files (the
diff --git a/kernels/builtin_global_size.cl b/kernels/builtin_global_size.cl
new file mode 100644
index 0000000..e6ddb2f
--- /dev/null
+++ b/kernels/builtin_global_size.cl
@@ -0,0 +1,3 @@
+kernel void builtin_global_size( __global int *ret, __global int *i_dim ) {
+  *ret = get_global_size( *i_dim);
+}
diff --git a/kernels/compiler_function_constant0.cl b/kernels/compiler_function_constant0.cl
index f6efcef..363d84e 100644
--- a/kernels/compiler_function_constant0.cl
+++ b/kernels/compiler_function_constant0.cl
@@ -2,5 +2,5 @@ __kernel void
 compiler_function_constant0(__constant short *c0, __constant char *c1, __global int *dst, int value)
 {
   int id = (int)get_global_id(0);
-  dst[id] = value + c0[id%69] + c1[15];
+  dst[id] = value + c0[id%69] + c1[0];
 }
diff --git a/kernels/compiler_local_memory_barrier_2.cl b/kernels/compiler_local_memory_barrier_2.cl
new file mode 100644
index 0000000..f6dd59d
--- /dev/null
+++ b/kernels/compiler_local_memory_barrier_2.cl
@@ -0,0 +1,7 @@
+__kernel void compiler_global_memory_barrier_2(__global int *dst, __local int *src) {
+  src[get_local_id(0)] = get_local_id(0);
+  src[get_local_size(0) + get_local_id(0)] = get_local_id(0);
+  barrier(CLK_LOCAL_MEM_FENCE);
+  dst[get_local_size(0) * (2 * get_group_id(0)) + get_local_id(0)] = src[get_local_size(0) - (get_local_id(0) + 1)];
+  dst[get_local_size(0) * (2 * get_group_id(0) + 1) + get_local_id(0)] = src[get_local_size(0) + get_local_size(0) - (get_local_id(0) + 1)];
+}
diff --git a/src/cl_api.c b/src/cl_api.c
index f14bee4..3c78243 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -37,6 +37,15 @@
 #include <string.h>
 #include <assert.h>
 
+#ifndef CL_VERSION_1_2
+#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
+#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
+#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
+#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
+typedef intptr_t cl_device_partition_property;
+#endif
+
 static cl_int
 cl_check_device_type(cl_device_type device_type)
 {
@@ -696,6 +705,7 @@ clBuildProgram(cl_program            program,
   INVALID_VALUE_IF (num_devices > 1);
   INVALID_VALUE_IF (num_devices == 0 && device_list != NULL);
   INVALID_VALUE_IF (num_devices != 0 && device_list == NULL);
+  INVALID_VALUE_IF (pfn_notify  == 0 && user_data   != NULL);
 
   /* Everything is easy. We only support one device anyway */
   if (num_devices != 0) {
@@ -706,7 +716,9 @@ clBuildProgram(cl_program            program,
   /* TODO support create program from binary */
   assert(program->source_type == FROM_LLVM ||
          program->source_type == FROM_SOURCE);
-  cl_program_build(program, options);
+  if((err = cl_program_build(program, options)) != CL_SUCCESS) {
+    goto error;
+  }
   program->is_built = CL_TRUE;
 
   if (pfn_notify) pfn_notify(program, user_data);
@@ -722,6 +734,19 @@ clUnloadCompiler(void)
   return 0;
 }
 
+#define FILL_AND_RET(TYPE, ELT, VAL, RET) \
+	do { \
+	  if (param_value && param_value_size < sizeof(TYPE)*ELT) \
+	      return CL_INVALID_VALUE;  \
+	  if (param_value) { \
+	      memcpy(param_value, (VAL), sizeof(TYPE)*ELT); \
+	  } \
+          \
+	  if (param_value_size_ret) \
+	      *param_value_size_ret = sizeof(TYPE)*ELT; \
+	  return RET; \
+	} while(0)
+
 cl_int
 clGetProgramInfo(cl_program       program,
                  cl_program_info  param_name,
@@ -729,8 +754,50 @@ clGetProgramInfo(cl_program       program,
                  void *           param_value,
                  size_t *         param_value_size_ret)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  char * ret_str = "";
+
+  CHECK_PROGRAM (program);
+
+  if (param_name == CL_PROGRAM_REFERENCE_COUNT) {
+    cl_uint ref = program->ref_n;
+    FILL_AND_RET (cl_uint, 1, (&ref), CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_CONTEXT) {
+    cl_context context = program->ctx;
+    FILL_AND_RET (cl_context, 1, &context, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_NUM_DEVICES) {
+    cl_uint num_dev = 1; // Just 1 dev now.
+    FILL_AND_RET (cl_uint, 1, &num_dev, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_DEVICES) {
+    cl_device_id dev_id = program->ctx->device;
+    FILL_AND_RET (cl_device_id, 1, &dev_id, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_SOURCE) {
+
+    if (!program->source)
+      FILL_AND_RET (char, 1, &ret_str, CL_SUCCESS);
+    FILL_AND_RET (char, (strlen(program->source) + 1),
+                   program->source, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_BINARY_SIZES) {
+    FILL_AND_RET (size_t, 1, (&program->bin_sz), CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_BINARIES) {
+    if (!param_value)
+      return CL_SUCCESS;
+
+    /* param_value points to an array of n
+       pointers allocated by the caller */
+    if (program->bin_sz > 0) {
+      memcpy(*((void **)param_value), program->bin, program->bin_sz);
+    } else {
+      memcpy(*((void **)param_value), ret_str, 1);
+    }
+
+    return CL_SUCCESS;
+  } else {
+    return CL_INVALID_VALUE;
+  }
+
+error:
+    return err;
 }
 
 cl_int
@@ -741,10 +808,42 @@ clGetProgramBuildInfo(cl_program             program,
                       void *                 param_value,
                       size_t *               param_value_size_ret)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  char * ret_str = "";
+
+  CHECK_PROGRAM (program);
+  INVALID_DEVICE_IF (device != program->ctx->device);
+
+  if (param_name == CL_PROGRAM_BUILD_STATUS) {
+    cl_build_status status;
+
+    if (!program->is_built)
+      status = CL_BUILD_NONE;
+    else if (program->ker_n > 0)
+      status = CL_BUILD_SUCCESS;
+    else
+      status = CL_BUILD_ERROR;
+    // TODO: Support CL_BUILD_IN_PROGRESS ?
+
+    FILL_AND_RET (cl_build_status, 1, &status, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_BUILD_OPTIONS) {
+    if (program->is_built && program->build_opts)
+      ret_str = program->build_opts;
+
+    FILL_AND_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS);
+  } else if (param_name == CL_PROGRAM_BUILD_LOG) {
+    // TODO: need to add logs in backend when compiling.
+    FILL_AND_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS);
+  } else {
+    return CL_INVALID_VALUE;
+  }
+
+error:
+    return err;
 }
 
+#undef FILL_AND_RET
+
 cl_kernel
 clCreateKernel(cl_program   program,
                const char * kernel_name,
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 7669602..a2c3ed2 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -30,6 +30,10 @@
 #include <stdio.h>
 #include <string.h>
 
+#ifndef CL_VERSION_1_2
+#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
+#endif
+
 static struct _cl_device_id intel_ivb_gt2_device = {
   INIT_ICD(dispatch)
   .max_compute_unit = 128,
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 3ca6381..33ad174 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -24,6 +24,25 @@
 #include "cl_driver.h"
 #include "CL/cl.h"
 
+#ifndef CL_VERSION_1_2
+#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
+typedef struct _cl_image_desc {
+    cl_mem_object_type      image_type;
+    size_t                  image_width;
+    size_t                  image_height;
+    size_t                  image_depth;
+    size_t                  image_array_size;
+    size_t                  image_row_pitch;
+    size_t                  image_slice_pitch;
+    cl_uint                 num_mip_levels;
+    cl_uint                 num_samples;
+    cl_mem                  buffer;
+} cl_image_desc;
+#endif
+
 typedef enum cl_image_tiling {
   CL_NO_TILE = 0,
   CL_TILE_X  = 1,
diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c
index 930107f..c2a5395 100644
--- a/src/cl_mem_gl.c
+++ b/src/cl_mem_gl.c
@@ -37,6 +37,10 @@
 #include "CL/cl_intel.h"
 #include "CL/cl_gl.h"
 
+#ifndef CL_VERSION_1_2
+#define CL_INVALID_IMAGE_DESCRIPTOR -65
+#endif
+
 static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format)
 {
   cl_int ret = CL_SUCCESS;
diff --git a/src/cl_program.c b/src/cl_program.c
index 6acf31f..7870514 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -36,13 +36,10 @@
 static void
 cl_program_release_sources(cl_program p)
 {
-  size_t i;
-  if (p->sources == NULL) return;
-  for (i = 0; i < p->src_n; ++i)
-    if (p->sources[i]) cl_free(p->sources[i]);
-  cl_free(p->sources);
-  p->sources = NULL;
-  p->src_n = 0;
+  if (p->source) {
+    cl_free(p->source);
+    p->source = NULL;
+  }
 }
 
 LOCAL void
@@ -59,6 +56,12 @@ cl_program_delete(cl_program p)
   /* Destroy the sources if still allocated */
   cl_program_release_sources(p);
 
+  /* Release the build options. */
+  if (p->build_opts) {
+    cl_free(p->build_opts);
+    p->build_opts = NULL;
+  }
+
   /* Remove it from the list */
   assert(p->ctx);
   pthread_mutex_lock(&p->ctx->program_lock);
@@ -234,27 +237,36 @@ cl_program_create_from_source(cl_context ctx,
   cl_program program = NULL;
   cl_int err = CL_SUCCESS;
   cl_uint i;
-
+  int32_t * lens = NULL;
+  int32_t len_total = 0;
   assert(ctx);
-
+  char * p = NULL;
   // the real compilation step will be done at build time since we do not have
   // yet the compilation options
   program = cl_program_new(ctx);
-  TRY_ALLOC (program->sources, cl_calloc(count, sizeof(char*)));
+  TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
   for (i = 0; i < (int) count; ++i) {
     size_t len;
     if (lengths == NULL || lengths[i] == 0)
       len = strlen(strings[i]);
     else
       len = lengths[i];
-    TRY_ALLOC (program->sources[i], cl_calloc(len+1, sizeof(char)));
-    memcpy(program->sources[i], strings[i], len);
-    program->sources[i][len] = 0;
+    lens[i] = len;
+    len_total += len;
   }
-  program->src_n = count;
+  TRY_ALLOC(program->source, cl_calloc(len_total+1, sizeof(char)));
+  p = program->source;
+  for (i = 0; i < (int) count; ++i) {
+    memcpy(p, strings[i], lens[i]);
+    p += lens[i];
+  }
+  *p = '\0';
+
   program->source_type = FROM_SOURCE;
 
 exit:
+  cl_free(lens);
+  lens = NULL;
   if (errcode_ret)
     *errcode_ret = err;
   return program;
@@ -268,11 +280,21 @@ LOCAL cl_int
 cl_program_build(cl_program p, const char *options)
 {
   cl_int err = CL_SUCCESS;
+  int i = 0;
+  int copyed = 0;
+
+  if (options) {
+    if(p->build_opts) {
+      cl_free(p->build_opts);
+      p->build_opts = NULL;
+    }
+
+    TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char)));
+    memcpy(p->build_opts, options, strlen(options));
+  }
 
   if (p->source_type == FROM_SOURCE) {
-    /* XXX support multiple sources later */
-    FATAL_IF (p->src_n != 1, "Only ONE source file supported");
-    p->opaque = gbe_program_new_from_source(p->sources[0], 0, options, NULL, NULL);
+    p->opaque = gbe_program_new_from_source(p->source, 0, options, NULL, NULL);
     if (UNLIKELY(p->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
       goto error;
@@ -283,6 +305,20 @@ cl_program_build(cl_program p, const char *options)
     p->source_type = FROM_LLVM;
   }
 
+  for (i = 0; i < p->ker_n; i ++) {
+    const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i);
+    p->bin_sz += gbe_kernel_get_code_size(opaque);
+  }
+
+  TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char)));
+  for (i = 0; i < p->ker_n; i ++) {
+    const gbe_kernel opaque = gbe_program_get_kernel(p->opaque, i);
+    size_t sz = gbe_kernel_get_code_size(opaque);
+
+    memcpy(p->bin + copyed, gbe_kernel_get_code(opaque), sz);
+    copyed += sz;
+  }
+
   p->is_built = 1;
 error:
   return err;
diff --git a/src/cl_program.h b/src/cl_program.h
index 161d858..996a496 100644
--- a/src/cl_program.h
+++ b/src/cl_program.h
@@ -47,11 +47,11 @@ struct _cl_program {
   cl_context ctx;         /* Its parent context */
   char *bin;              /* The program copied verbatim */
   size_t bin_sz;          /* Its size in memory */
-  char **sources;         /* Program sources */
-  size_t src_n;           /* Number of sources */
+  char *source;           /* Program sources */
   uint32_t ker_n;         /* Number of declared kernels */
   uint32_t source_type:2; /* Built from binary, source or LLVM */
   uint32_t is_built:1;    /* Did we call clBuildProgram on it? */
+  char *build_opts;       /* The build options for this program */
 };
 
 /* Create a empty program */
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 19567dc..2791fbe 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -39,6 +39,13 @@
 #include "cl_utils.h"
 #include "cl_sampler.h"
 
+#ifndef CL_VERSION_1_2
+#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
+#endif
+
 #define GEN_CMD_MEDIA_OBJECT  (0x71000000)
 #define MO_TS_BIT             (1 << 24)
 #define MO_RETAIN_BIT         (1 << 28)
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index e5c03ee..108fa06 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -71,10 +71,11 @@ set (utests_sources
   compiler_insn_selection_min.cpp
   compiler_insn_selection_max.cpp
   compiler_insn_selection_masked_min_max.cpp
-#  compiler_global_memory_barrier.cpp
+  compiler_global_memory_barrier.cpp
   compiler_local_memory_two_ptr.cpp
   compiler_local_memory_barrier.cpp
   compiler_local_memory_barrier_wg64.cpp
+#  compiler_local_memory_barrier_2.cpp
   compiler_movforphi_undef.cpp
   compiler_volatile.cpp
   compiler_copy_image1.cpp
@@ -82,6 +83,7 @@ set (utests_sources
   compiler_vector_load_store.cpp
   compiler_cl_finish.cpp
   buildin_work_dim.cpp
+  builtin_global_size.cpp
   runtime_createcontext.cpp
   runtime_null_kernel_arg.cpp
   utest_assert.cpp
diff --git a/utests/builtin_global_size.cpp b/utests/builtin_global_size.cpp
new file mode 100644
index 0000000..094e019
--- /dev/null
+++ b/utests/builtin_global_size.cpp
@@ -0,0 +1,108 @@
+/*
+According to the OpenCL v1.1 & v1.2 chapter 6.11, the behavior of function get_global_size should be as following:
+
+  globals[0] = 3;
+  globals[1] = 4;
+  globals[2] = 5;
+
+#ifdef CL_VERSION_1_2 | CL_VERSION_1_1:
+get_global_size(-1) = 1 (dimension:1)
+get_global_size(0) = 3 (dimension:1)
+get_global_size(1) = 1 (dimension:1)
+get_global_size(2) = 1 (dimension:1)
+
+get_global_size(-1) = 1 (dimension:2)
+get_global_size(0) = 3 (dimension:2)
+get_global_size(1) = 4 (dimension:2)
+get_global_size(2) = 1 (dimension:2)
+get_global_size(3) = 1 (dimension:2)
+
+get_global_size(-1) = 1 (dimension:3)
+get_global_size(0) = 3 (dimension:3)
+get_global_size(1) = 4 (dimension:3)
+get_global_size(2) = 5 (dimension:3)
+get_global_size(3) = 1 (dimension:3)
+get_global_size(4) = 1 (dimension:3)
+
+#ifdef CL_VERSION_1_0:
+get_global_size(-1) = 0 (dimension:1)
+get_global_size(0) = 3 (dimension:1)
+get_global_size(1) = 0 (dimension:1)
+get_global_size(2) = 0 (dimension:1)
+
+get_global_size(-1) = 0 (dimension:2)
+get_global_size(0) = 3 (dimension:2)
+get_global_size(1) = 4 (dimension:2)
+get_global_size(2) = 0 (dimension:2)
+get_global_size(3) = 1 (dimension:2)
+
+get_global_size(-1) = 0 (dimension:3)
+get_global_size(0) = 3 (dimension:3)
+get_global_size(1) = 4 (dimension:3)
+get_global_size(2) = 5 (dimension:3)
+get_global_size(3) = 0 (dimension:3)
+get_global_size(4) = 0 (dimension:3)
+
+*/
+#include "utest_helper.hpp"
+static void builtin_global_size(void)
+{
+
+  // Setup kernel and buffers
+  int dim, dim_arg_global, global_size, err;
+  OCL_CREATE_KERNEL("builtin_global_size");
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+
+  globals[0] = 3;
+  globals[1] = 4;
+  globals[2] = 5;
+  locals[0] = 1;
+  locals[1] = 1;
+  locals[2] = 1;
+
+  for( dim=1; dim <= 3; dim++ )
+  {
+
+    for( dim_arg_global = -1; dim_arg_global <= dim + 1; dim_arg_global++ )
+    {
+
+      err = clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, sizeof(int), &dim_arg_global, 0, NULL, NULL);
+      if (err != CL_SUCCESS)
+      {
+        printf("Error: Failed to write to source array!\n");
+        exit(1);
+      }
+
+      // Run the kernel
+      OCL_NDRANGE( dim );
+
+      err = clEnqueueReadBuffer( queue, buf[0], CL_TRUE, 0, sizeof(int), &global_size, 0, NULL, NULL);
+      if (err != CL_SUCCESS)
+      {
+        printf("Error: Failed to read output array! %d\n", err);
+        exit(1);
+      }
+
+      //printf("get_global_size(%d) = %d (dimension:%d)\n", dim_arg_global, global_size, dim);
+
+      if ( dim_arg_global >= 0 && dim_arg_global < dim)
+        OCL_ASSERT( global_size == dim_arg_global + 3);
+      else
+      {
+      #if defined(CL_VERSION_1_2) || defined(CL_VERSION_1_1)
+        OCL_ASSERT( global_size == 1);
+      #elif defined(CL_VERSION_1_0)
+        OCL_ASSERT( global_size == 0);
+      #else
+        OCL_ASSERT( global_size == 1);
+      #endif
+      }
+    }
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_global_size);
diff --git a/utests/compiler_box_blur_image.cpp b/utests/compiler_box_blur_image.cpp
index d94a97c..351f08e 100644
--- a/utests/compiler_box_blur_image.cpp
+++ b/utests/compiler_box_blur_image.cpp
@@ -4,7 +4,6 @@ static void compiler_box_blur_image()
 {
   int w, h;
   cl_image_format format = { };
-  cl_image_desc desc = { };
   size_t origin[3] = { };
   size_t region[3];
   int *src, *dst;
@@ -16,17 +15,11 @@ static void compiler_box_blur_image()
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNORM_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_depth = 1;
-  desc.image_row_pitch = w*sizeof(uint32_t);
 
   /* Run the kernel */
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, src);
+  OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w*sizeof(uint32_t), src);
   free(src);
-  desc.image_row_pitch = 0;
-  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL);
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
   OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
   globals[0] = w;
diff --git a/utests/compiler_copy_image.cpp b/utests/compiler_copy_image.cpp
index 04c9544..58827f2 100644
--- a/utests/compiler_copy_image.cpp
+++ b/utests/compiler_copy_image.cpp
@@ -5,7 +5,6 @@ static void compiler_copy_image(void)
   const size_t w = 512;
   const size_t h = 512;
   cl_image_format format;
-  cl_image_desc desc;
   cl_sampler sampler;
 
   // Setup kernel and images
@@ -17,14 +16,9 @@ static void compiler_copy_image(void)
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_row_pitch = w * sizeof(uint32_t);
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+  OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]);
 
-  desc.image_row_pitch = 0;
-  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL);
   OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
   free(buf_data[0]);
   buf_data[0] = NULL;
diff --git a/utests/compiler_copy_image1.cpp b/utests/compiler_copy_image1.cpp
index a9ef3f4..d469fbd 100644
--- a/utests/compiler_copy_image1.cpp
+++ b/utests/compiler_copy_image1.cpp
@@ -5,7 +5,6 @@ static void compiler_copy_image1(void)
   const size_t w = 512;
   const size_t h = 512;
   cl_image_format format;
-  cl_image_desc desc;
   cl_sampler sampler;
 
   // Setup kernel and images
@@ -17,19 +16,14 @@ static void compiler_copy_image1(void)
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_row_pitch = w * sizeof(uint32_t);
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+  OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]);
   OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
 
-  desc.image_row_pitch = 0;
-  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
-  OCL_CREATE_IMAGE(buf[2], 0, &format, &desc, NULL);
-  OCL_CREATE_IMAGE(buf[3], 0, &format, &desc, NULL);
-  OCL_CREATE_IMAGE(buf[4], 0, &format, &desc, NULL);
-  OCL_CREATE_IMAGE(buf[5], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL);
+  OCL_CREATE_IMAGE2D(buf[2], 0, &format, w, h, 0, NULL);
+  OCL_CREATE_IMAGE2D(buf[3], 0, &format, w, h, 0, NULL);
+  OCL_CREATE_IMAGE2D(buf[4], 0, &format, w, h, 0, NULL);
+  OCL_CREATE_IMAGE2D(buf[5], 0, &format, w, h, 0, NULL);
   free(buf_data[0]);
   buf_data[0] = NULL;
 
diff --git a/utests/compiler_copy_image_3d.cpp b/utests/compiler_copy_image_3d.cpp
index 9816fe4..5290090 100644
--- a/utests/compiler_copy_image_3d.cpp
+++ b/utests/compiler_copy_image_3d.cpp
@@ -6,7 +6,6 @@ static void compiler_copy_image_3d(void)
   const size_t h = 512;
   const size_t depth = 1;
   cl_image_format format;
-  cl_image_desc desc;
   cl_sampler sampler;
 
   // Setup kernel and images
@@ -19,14 +18,8 @@ static void compiler_copy_image_3d(void)
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_depth = depth;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
-  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE3D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, depth, 0, 0, buf_data[0]);
+  OCL_CREATE_IMAGE3D(buf[1], 0, &format, w, h, depth, 0, 0, NULL);
   OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
   free(buf_data[0]);
   buf_data[0] = NULL;
diff --git a/utests/compiler_fill_gl_image.cpp b/utests/compiler_fill_gl_image.cpp
index 166621a..b070b8f 100644
--- a/utests/compiler_fill_gl_image.cpp
+++ b/utests/compiler_fill_gl_image.cpp
@@ -42,7 +42,7 @@ static void compiler_fill_gl_image(void)
   glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, NULL);
 
   OCL_CREATE_KERNEL("test_fill_gl_image");
-  OCL_CREATE_GL_IMAGE(buf[0], 0, GL_TEXTURE_2D, 0, tex);
+  OCL_CREATE_GL_IMAGE2D(buf[0], 0, GL_TEXTURE_2D, 0, tex);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_fill_image.cpp b/utests/compiler_fill_image.cpp
index c9242b2..2f9fe3d 100644
--- a/utests/compiler_fill_image.cpp
+++ b/utests/compiler_fill_image.cpp
@@ -6,19 +6,14 @@ static void compiler_fill_image(void)
   const size_t h = 512;
   uint32_t color = 0x12345678;
   cl_image_format format;
-  cl_image_desc desc;
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_row_pitch = 0;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_fill_image");
 
-  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[0], 0, &format, w, h, 0, NULL);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_fill_image0.cpp b/utests/compiler_fill_image0.cpp
index 7c8f40e..1ab13be 100644
--- a/utests/compiler_fill_image0.cpp
+++ b/utests/compiler_fill_image0.cpp
@@ -5,19 +5,14 @@ static void compiler_fill_image0(void)
   const size_t w = 512;
   const size_t h = 512;
   cl_image_format format;
-  cl_image_desc desc;
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_row_pitch = 0;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_fill_image0");
 
-  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[0], 0, &format, w, h, 0, NULL);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_fill_image_3d.cpp b/utests/compiler_fill_image_3d.cpp
index 5d98b0b..4b3d4e3 100644
--- a/utests/compiler_fill_image_3d.cpp
+++ b/utests/compiler_fill_image_3d.cpp
@@ -7,21 +7,14 @@ static void compiler_fill_image_3d(void)
   const size_t depth = 1;
   uint32_t color = 0x12345678;
   cl_image_format format;
-  cl_image_desc desc;
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_depth = depth;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_fill_image_3d");
 
-  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_fill_image_3d_2.cpp b/utests/compiler_fill_image_3d_2.cpp
index 3b4edb9..8ecc3e3 100644
--- a/utests/compiler_fill_image_3d_2.cpp
+++ b/utests/compiler_fill_image_3d_2.cpp
@@ -6,21 +6,14 @@ static void compiler_fill_image_3d_2(void)
   const size_t h = 512;
   const size_t depth = 1;
   cl_image_format format;
-  cl_image_desc desc;
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_depth = depth;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_fill_image_3d_2");
 
-  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_function_constant0.cpp b/utests/compiler_function_constant0.cpp
index de564f3..c0a8a9d 100644
--- a/utests/compiler_function_constant0.cpp
+++ b/utests/compiler_function_constant0.cpp
@@ -8,7 +8,7 @@ void compiler_function_constant0(void)
   // Setup kernel and buffers
   OCL_CREATE_KERNEL("compiler_function_constant0");
   OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(short), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, 256 * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, 1 * sizeof(char), NULL);
   OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL);
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
   OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
@@ -21,9 +21,7 @@ void compiler_function_constant0(void)
   OCL_UNMAP_BUFFER(0);
 
   OCL_MAP_BUFFER(1);
-  for(uint32_t i = 0; i < 256; ++i)
-    ((char *)buf_data[1])[i] = 10;
-  ((char *)buf_data[1])[15] = 15;
+  ((char *)buf_data[1])[0] = 15;
   OCL_UNMAP_BUFFER(1);
 
   // Run the kernel
diff --git a/utests/compiler_get_image_info.cpp b/utests/compiler_get_image_info.cpp
index 3b9d132..4454d03 100644
--- a/utests/compiler_get_image_info.cpp
+++ b/utests/compiler_get_image_info.cpp
@@ -6,24 +6,14 @@ static void compiler_get_image_info(void)
   const size_t h = 512;
   const size_t depth = 3;
   cl_image_format format;
-  cl_image_desc desc;
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_depth = depth;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
-  desc.num_mip_levels = 0;
-  desc.num_samples = 0;
-  desc.buffer = NULL;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_get_image_info");
 
-  OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE3D(buf[0], 0, &format, w, h, depth, 0, 0, NULL);
   OCL_CREATE_BUFFER(buf[1], 0, 32 * sizeof(int), NULL);
   OCL_CREATE_BUFFER(buf[2], 0, 32 * sizeof(int), NULL);
 
diff --git a/utests/compiler_local_memory_barrier_2.cpp b/utests/compiler_local_memory_barrier_2.cpp
new file mode 100644
index 0000000..d670654
--- /dev/null
+++ b/utests/compiler_local_memory_barrier_2.cpp
@@ -0,0 +1,29 @@
+#include "utest_helper.hpp"
+
+static void compiler_global_memory_barrier(void)
+{
+  const size_t n = 16*1024;
+
+  globals[0] = n/2;
+  locals[0] = 32;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_local_memory_barrier_2");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+  //OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, locals[0] * 2 * sizeof(uint32_t), NULL);
+
+  // Run the kernel
+  OCL_NDRANGE(1);
+  OCL_MAP_BUFFER(0);
+
+  // Check results
+  uint32_t *dst = (uint32_t*)buf_data[0];
+  for (uint32_t i = 0; i < n; i+=locals[0])
+    for (uint32_t j = 0; j < locals[0]; ++j)
+        OCL_ASSERT(dst[i+j] == locals[0] - 1 -j);
+  OCL_UNMAP_BUFFER(0);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_global_memory_barrier);
diff --git a/utests/compiler_movforphi_undef.cpp b/utests/compiler_movforphi_undef.cpp
index 17bde8c..50526e8 100644
--- a/utests/compiler_movforphi_undef.cpp
+++ b/utests/compiler_movforphi_undef.cpp
@@ -6,7 +6,6 @@ static void compiler_movforphi_undef(void)
   const size_t h = 16;
   cl_sampler sampler;
   cl_image_format format;
-  cl_image_desc desc;
 
   // Setup kernel and images
   OCL_CREATE_KERNEL("test_movforphi_undef");
@@ -17,14 +16,9 @@ static void compiler_movforphi_undef(void)
 
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_UNSIGNED_INT8;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = w;
-  desc.image_height = h;
-  desc.image_row_pitch = w * sizeof(uint32_t);
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+  OCL_CREATE_IMAGE2D(buf[0], CL_MEM_COPY_HOST_PTR, &format, w, h, w * sizeof(uint32_t), buf_data[0]);
 
-  desc.image_row_pitch = 0;
-  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+  OCL_CREATE_IMAGE2D(buf[1], 0, &format, w, h, 0, NULL);
   OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
   free(buf_data[0]);
   buf_data[0] = NULL;
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp
index dd98a1f..d642351 100644
--- a/utests/utest_helper.hpp
+++ b/utests/utest_helper.hpp
@@ -93,6 +93,12 @@ extern EGLSurface  eglSurface;
 #define OCL_CREATE_IMAGE(IMAGE, FLAGS, FORMAT, DESC, DATA) \
     OCL_CALL2(clCreateImage, IMAGE, ctx, FLAGS, FORMAT, DESC, DATA)
 
+#define OCL_CREATE_IMAGE2D(IMAGE, FLAGS, FORMAT, WIDTH, HEIGHT, PITCH, DATA) \
+    OCL_CALL2(clCreateImage2D, IMAGE, ctx, FLAGS, FORMAT, WIDTH, HEIGHT, PITCH, DATA)
+
+#define OCL_CREATE_IMAGE3D(IMAGE, FLAGS, FORMAT, WIDTH, HEIGHT, DEPTH, RPITCH, SPITCH, DATA) \
+    OCL_CALL2(clCreateImage3D, IMAGE, ctx, FLAGS, FORMAT, WIDTH, HEIGHT, DEPTH, RPITCH, SPITCH, DATA)
+
 #define OCL_READ_IMAGE(IMAGE, ORIGIN, REGION, DATA) \
     OCL_CALL(clEnqueueReadImage, queue, IMAGE, CL_TRUE, ORIGIN, REGION, 0, 0, DATA, 0, NULL, NULL)
 
@@ -102,6 +108,12 @@ extern EGLSurface  eglSurface;
 #define OCL_CREATE_GL_IMAGE(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \
     OCL_CALL2(clCreateFromGLTexture, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE)
 
+#define OCL_CREATE_GL_IMAGE2D(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \
+    OCL_CALL2(clCreateFromGLTexture2D, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE)
+
+#define OCL_CREATE_GL_IMAGE3D(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \
+    OCL_CALL2(clCreateFromGLTexture3D, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE)
+
 #define OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(ID) \
     OCL_CALL(clEnqueueAcquireGLObjects, queue, 1, &buf[ID], 0, 0, 0)
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-devel mailing list